diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 500c743f97..c38b7cc1a3 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -328,28 +328,6 @@ jobs: env: RUST_BACKTRACE: 1 - # Perform all tests (debug mode) for `wasmtime` with the old x86 backend. - test_x86: - name: Test old x86 backend - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - with: - submodules: true - - uses: ./.github/actions/install-rust - with: - toolchain: stable - - # Install wasm32 targets in order to build various tests throughout the - # repo. - - run: rustup target add wasm32-wasi - - run: rustup target add wasm32-unknown-unknown - - # Run the old x86 backend CI (we will eventually remove this). - - run: ./ci/run-tests.sh --features old-x86-backend --locked - env: - RUST_BACKTRACE: 1 - # Build and test the wasi-nn module. test_wasi_nn: name: Test wasi-nn module diff --git a/Cargo.toml b/Cargo.toml index 08e0c0eb72..d69e3ac062 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -100,9 +100,6 @@ posix-signals-on-macos = ["wasmtime/posix-signals-on-macos"] # backend is the default now. experimental_x64 = [] -# Use the old x86 backend. -old-x86-backend = ["wasmtime/old-x86-backend"] - [badges] maintenance = { status = "actively-developed" } diff --git a/build.rs b/build.rs index 06f0669cdf..cc6d3e5047 100644 --- a/build.rs +++ b/build.rs @@ -182,11 +182,6 @@ fn write_testsuite_tests( fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { match strategy { "Cranelift" => match (testsuite, testname) { - // Skip all reference types tests on the old backend. The modern - // implementation of reference types uses atomic instructions - // for reference counts on `externref`, but the old backend does not - // implement atomic instructions. - ("reference_types", _) if cfg!(feature = "old-x86-backend") => return true, // No simd support yet for s390x. ("simd", _) if platform_is_s390x() => return true, // No memory64 support yet for s390x. diff --git a/cranelift/codegen/Cargo.toml b/cranelift/codegen/Cargo.toml index 5bc6b2c9ca..4397568e5a 100644 --- a/cranelift/codegen/Cargo.toml +++ b/cranelift/codegen/Cargo.toml @@ -71,9 +71,6 @@ arm32 = [] # Work-in-progress codegen backend for ARM. # backend is the default now. experimental_x64 = [] -# Make the old x86 backend the default. -old-x86-backend = [] - # Option to enable all architectures. all-arch = [ "x86", diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs deleted file mode 100644 index 2f222defb5..0000000000 --- a/cranelift/codegen/meta/src/isa/x86/encodings.rs +++ /dev/null @@ -1,2731 +0,0 @@ -#![allow(non_snake_case)] - -use cranelift_codegen_shared::condcodes::IntCC; -use std::collections::HashMap; - -use crate::cdsl::encodings::{Encoding, EncodingBuilder}; -use crate::cdsl::instructions::{ - vector, Bindable, Immediate, InstSpec, Instruction, InstructionGroup, InstructionPredicate, - InstructionPredicateNode, InstructionPredicateRegistry, -}; -use crate::cdsl::recipes::{EncodingRecipe, EncodingRecipeNumber, Recipes}; -use crate::cdsl::settings::{SettingGroup, SettingPredicateNumber}; -use crate::cdsl::types::{LaneType, ValueType}; -use crate::shared::types::Bool::{B1, B16, B32, B64, B8}; -use crate::shared::types::Float::{F32, F64}; -use crate::shared::types::Int::{I16, I32, I64, I8}; -use crate::shared::types::Reference::{R32, R64}; -use crate::shared::Definitions as SharedDefinitions; - -use crate::isa::x86::opcodes::*; - -use super::recipes::{RecipeGroup, Template}; -use crate::cdsl::instructions::BindParameter::Any; - -pub(crate) struct PerCpuModeEncodings { - pub enc32: Vec, - pub enc64: Vec, - pub recipes: Recipes, - recipes_by_name: HashMap, - pub inst_pred_reg: InstructionPredicateRegistry, -} - -impl PerCpuModeEncodings { - fn new() -> Self { - Self { - enc32: Vec::new(), - enc64: Vec::new(), - recipes: Recipes::new(), - recipes_by_name: HashMap::new(), - inst_pred_reg: InstructionPredicateRegistry::new(), - } - } - - fn add_recipe(&mut self, recipe: EncodingRecipe) -> EncodingRecipeNumber { - if let Some(found_index) = self.recipes_by_name.get(&recipe.name) { - assert!( - self.recipes[*found_index] == recipe, - "trying to insert different recipes with a same name ({})", - recipe.name - ); - *found_index - } else { - let recipe_name = recipe.name.clone(); - let index = self.recipes.push(recipe); - self.recipes_by_name.insert(recipe_name, index); - index - } - } - - fn make_encoding( - &mut self, - inst: InstSpec, - template: Template, - builder_closure: T, - ) -> Encoding - where - T: FnOnce(EncodingBuilder) -> EncodingBuilder, - { - let (recipe, bits) = template.build(); - let recipe_number = self.add_recipe(recipe); - let builder = EncodingBuilder::new(inst, recipe_number, bits); - builder_closure(builder).build(&self.recipes, &mut self.inst_pred_reg) - } - - fn enc32_func(&mut self, inst: impl Into, template: Template, builder_closure: T) - where - T: FnOnce(EncodingBuilder) -> EncodingBuilder, - { - let encoding = self.make_encoding(inst.into(), template, builder_closure); - self.enc32.push(encoding); - } - fn enc32(&mut self, inst: impl Into, template: Template) { - self.enc32_func(inst, template, |x| x); - } - fn enc32_isap( - &mut self, - inst: impl Into, - template: Template, - isap: SettingPredicateNumber, - ) { - self.enc32_func(inst, template, |encoding| encoding.isa_predicate(isap)); - } - fn enc32_instp( - &mut self, - inst: impl Into, - template: Template, - instp: InstructionPredicateNode, - ) { - self.enc32_func(inst, template, |encoding| encoding.inst_predicate(instp)); - } - fn enc32_rec(&mut self, inst: impl Into, recipe: &EncodingRecipe, bits: u16) { - let recipe_number = self.add_recipe(recipe.clone()); - let builder = EncodingBuilder::new(inst.into(), recipe_number, bits); - let encoding = builder.build(&self.recipes, &mut self.inst_pred_reg); - self.enc32.push(encoding); - } - - fn enc64_func(&mut self, inst: impl Into, template: Template, builder_closure: T) - where - T: FnOnce(EncodingBuilder) -> EncodingBuilder, - { - let encoding = self.make_encoding(inst.into(), template, builder_closure); - self.enc64.push(encoding); - } - fn enc64(&mut self, inst: impl Into, template: Template) { - self.enc64_func(inst, template, |x| x); - } - fn enc64_isap( - &mut self, - inst: impl Into, - template: Template, - isap: SettingPredicateNumber, - ) { - self.enc64_func(inst, template, |encoding| encoding.isa_predicate(isap)); - } - fn enc64_instp( - &mut self, - inst: impl Into, - template: Template, - instp: InstructionPredicateNode, - ) { - self.enc64_func(inst, template, |encoding| encoding.inst_predicate(instp)); - } - fn enc64_rec(&mut self, inst: impl Into, recipe: &EncodingRecipe, bits: u16) { - let recipe_number = self.add_recipe(recipe.clone()); - let builder = EncodingBuilder::new(inst.into(), recipe_number, bits); - let encoding = builder.build(&self.recipes, &mut self.inst_pred_reg); - self.enc64.push(encoding); - } - - /// Adds I32/I64 encodings as appropriate for a typed instruction. - /// The REX prefix is always inferred at runtime. - /// - /// Add encodings for `inst.i32` to X86_32. - /// Add encodings for `inst.i32` to X86_64 with optional, inferred REX. - /// Add encodings for `inst.i64` to X86_64 with a REX.W prefix. - fn enc_i32_i64(&mut self, inst: impl Into, template: Template) { - let inst: InstSpec = inst.into(); - - // I32 on x86: no REX prefix. - self.enc32(inst.bind(I32), template.infer_rex()); - - // I32 on x86_64: REX.W unset; REX.RXB determined at runtime from registers. - self.enc64(inst.bind(I32), template.infer_rex()); - - // I64 on x86_64: REX.W set; REX.RXB determined at runtime from registers. - self.enc64(inst.bind(I64), template.rex().w()); - } - - /// Adds I32/I64 encodings as appropriate for a typed instruction. - /// All variants of REX prefix are explicitly emitted, not inferred. - /// - /// Add encodings for `inst.i32` to X86_32. - /// Add encodings for `inst.i32` to X86_64 with and without REX. - /// Add encodings for `inst.i64` to X86_64 with and without REX. - fn enc_i32_i64_explicit_rex(&mut self, inst: impl Into, template: Template) { - let inst: InstSpec = inst.into(); - self.enc32(inst.bind(I32), template.nonrex()); - - // REX-less encoding must come after REX encoding so we don't use it by default. - // Otherwise reg-alloc would never use r8 and up. - self.enc64(inst.bind(I32), template.rex()); - self.enc64(inst.bind(I32), template.nonrex()); - self.enc64(inst.bind(I64), template.rex().w()); - } - - /// Adds B32/B64 encodings as appropriate for a typed instruction. - /// The REX prefix is always inferred at runtime. - /// - /// Adds encoding for `inst.b32` to X86_32. - /// Adds encoding for `inst.b32` to X86_64 with optional, inferred REX. - /// Adds encoding for `inst.b64` to X86_64 with a REX.W prefix. - fn enc_b32_b64(&mut self, inst: impl Into, template: Template) { - let inst: InstSpec = inst.into(); - - // B32 on x86: no REX prefix. - self.enc32(inst.bind(B32), template.infer_rex()); - - // B32 on x86_64: REX.W unset; REX.RXB determined at runtime from registers. - self.enc64(inst.bind(B32), template.infer_rex()); - - // B64 on x86_64: REX.W set; REX.RXB determined at runtime from registers. - self.enc64(inst.bind(B64), template.rex().w()); - } - - /// Add encodings for `inst.i32` to X86_32. - /// Add encodings for `inst.i32` to X86_64 with a REX prefix. - /// Add encodings for `inst.i64` to X86_64 with a REX.W prefix. - fn enc_i32_i64_rex_only(&mut self, inst: impl Into, template: Template) { - let inst: InstSpec = inst.into(); - self.enc32(inst.bind(I32), template.nonrex()); - self.enc64(inst.bind(I32), template.rex()); - self.enc64(inst.bind(I64), template.rex().w()); - } - - /// Add encodings for `inst.i32` to X86_32. - /// Add encodings for `inst.i32` to X86_64 with and without REX. - /// Add encodings for `inst.i64` to X86_64 with a REX.W prefix. - fn enc_i32_i64_instp( - &mut self, - inst: &Instruction, - template: Template, - instp: InstructionPredicateNode, - ) { - self.enc32_func(inst.bind(I32), template.nonrex(), |builder| { - builder.inst_predicate(instp.clone()) - }); - - // REX-less encoding must come after REX encoding so we don't use it by default. Otherwise - // reg-alloc would never use r8 and up. - self.enc64_func(inst.bind(I32), template.rex(), |builder| { - builder.inst_predicate(instp.clone()) - }); - self.enc64_func(inst.bind(I32), template.nonrex(), |builder| { - builder.inst_predicate(instp.clone()) - }); - self.enc64_func(inst.bind(I64), template.rex().w(), |builder| { - builder.inst_predicate(instp) - }); - } - - /// Add encodings for `inst.r32` to X86_32. - /// Add encodings for `inst.r32` to X86_64 with and without REX. - /// Add encodings for `inst.r64` to X86_64 with a REX.W prefix. - fn enc_r32_r64_instp( - &mut self, - inst: &Instruction, - template: Template, - instp: InstructionPredicateNode, - ) { - self.enc32_func(inst.bind(R32), template.nonrex(), |builder| { - builder.inst_predicate(instp.clone()) - }); - - // REX-less encoding must come after REX encoding so we don't use it by default. Otherwise - // reg-alloc would never use r8 and up. - self.enc64_func(inst.bind(R32), template.rex(), |builder| { - builder.inst_predicate(instp.clone()) - }); - self.enc64_func(inst.bind(R32), template.nonrex(), |builder| { - builder.inst_predicate(instp.clone()) - }); - self.enc64_func(inst.bind(R64), template.rex().w(), |builder| { - builder.inst_predicate(instp) - }); - } - - /// Add encodings for `inst.r32` to X86_32. - /// Add encodings for `inst.r64` to X86_64 with a REX.W prefix. - fn enc_r32_r64_rex_only(&mut self, inst: impl Into, template: Template) { - let inst: InstSpec = inst.into(); - self.enc32(inst.bind(R32), template.nonrex()); - self.enc64(inst.bind(R64), template.rex().w()); - } - - fn enc_r32_r64_ld_st(&mut self, inst: &Instruction, w_bit: bool, template: Template) { - self.enc32(inst.clone().bind(R32).bind(Any), template.clone()); - - // REX-less encoding must come after REX encoding so we don't use it by - // default. Otherwise reg-alloc would never use r8 and up. - self.enc64(inst.clone().bind(R32).bind(Any), template.clone().rex()); - self.enc64(inst.clone().bind(R32).bind(Any), template.clone()); - - if w_bit { - self.enc64(inst.clone().bind(R64).bind(Any), template.rex().w()); - } else { - self.enc64(inst.clone().bind(R64).bind(Any), template.clone().rex()); - self.enc64(inst.clone().bind(R64).bind(Any), template); - } - } - - /// Add encodings for `inst` to X86_64 with and without a REX prefix. - fn enc_x86_64(&mut self, inst: impl Into + Clone, template: Template) { - // See above comment about the ordering of rex vs non-rex encodings. - self.enc64(inst.clone(), template.rex()); - self.enc64(inst, template); - } - - /// Add encodings for `inst` to X86_64 with and without a REX prefix. - fn enc_x86_64_instp( - &mut self, - inst: impl Clone + Into, - template: Template, - instp: InstructionPredicateNode, - ) { - // See above comment about the ordering of rex vs non-rex encodings. - self.enc64_func(inst.clone(), template.rex(), |builder| { - builder.inst_predicate(instp.clone()) - }); - self.enc64_func(inst, template, |builder| builder.inst_predicate(instp)); - } - fn enc_x86_64_isap( - &mut self, - inst: impl Clone + Into, - template: Template, - isap: SettingPredicateNumber, - ) { - // See above comment about the ordering of rex vs non-rex encodings. - self.enc64_isap(inst.clone(), template.rex(), isap); - self.enc64_isap(inst, template, isap); - } - - /// Add all three encodings for `inst`: - /// - X86_32 - /// - X86_64 with and without the REX prefix. - fn enc_both(&mut self, inst: impl Clone + Into, template: Template) { - self.enc32(inst.clone(), template.clone()); - self.enc_x86_64(inst, template); - } - fn enc_both_isap( - &mut self, - inst: impl Clone + Into, - template: Template, - isap: SettingPredicateNumber, - ) { - self.enc32_isap(inst.clone(), template.clone(), isap); - self.enc_x86_64_isap(inst, template, isap); - } - fn enc_both_instp( - &mut self, - inst: impl Clone + Into, - template: Template, - instp: InstructionPredicateNode, - ) { - self.enc32_instp(inst.clone(), template.clone(), instp.clone()); - self.enc_x86_64_instp(inst, template, instp); - } - - /// Add two encodings for `inst`: - /// - X86_32, no REX prefix, since this is not valid in 32-bit mode. - /// - X86_64, dynamically infer the REX prefix. - fn enc_both_inferred(&mut self, inst: impl Clone + Into, template: Template) { - self.enc32(inst.clone(), template.clone()); - self.enc64(inst, template.infer_rex()); - } - fn enc_both_inferred_maybe_isap( - &mut self, - inst: impl Clone + Into, - template: Template, - isap: Option, - ) { - self.enc32_maybe_isap(inst.clone(), template.clone(), isap); - self.enc64_maybe_isap(inst, template.infer_rex(), isap); - } - - /// Add two encodings for `inst`: - /// - X86_32 - /// - X86_64 with the REX prefix. - fn enc_both_rex_only(&mut self, inst: impl Clone + Into, template: Template) { - self.enc32(inst.clone(), template.clone()); - self.enc64(inst, template.rex()); - } - - /// Add encodings for `inst.i32` to X86_32. - /// Add encodings for `inst.i32` to X86_64 with and without REX. - /// Add encodings for `inst.i64` to X86_64 with a REX prefix, using the `w_bit` - /// argument to determine whether or not to set the REX.W bit. - fn enc_i32_i64_ld_st(&mut self, inst: &Instruction, w_bit: bool, template: Template) { - self.enc32(inst.clone().bind(I32).bind(Any), template.clone()); - - // REX-less encoding must come after REX encoding so we don't use it by - // default. Otherwise reg-alloc would never use r8 and up. - self.enc64(inst.clone().bind(I32).bind(Any), template.clone().rex()); - self.enc64(inst.clone().bind(I32).bind(Any), template.clone()); - - if w_bit { - self.enc64(inst.clone().bind(I64).bind(Any), template.rex().w()); - } else { - self.enc64(inst.clone().bind(I64).bind(Any), template.clone().rex()); - self.enc64(inst.clone().bind(I64).bind(Any), template); - } - } - - /// Add the same encoding/recipe pairing to both X86_32 and X86_64 - fn enc_32_64_rec( - &mut self, - inst: impl Clone + Into, - recipe: &EncodingRecipe, - bits: u16, - ) { - self.enc32_rec(inst.clone(), recipe, bits); - self.enc64_rec(inst, recipe, bits); - } - - /// Add the same encoding to both X86_32 and X86_64; assumes configuration (e.g. REX, operand binding) has already happened - fn enc_32_64_func( - &mut self, - inst: impl Clone + Into, - template: Template, - builder_closure: T, - ) where - T: FnOnce(EncodingBuilder) -> EncodingBuilder, - { - let encoding = self.make_encoding(inst.into(), template, builder_closure); - self.enc32.push(encoding.clone()); - self.enc64.push(encoding); - } - - /// Add the same encoding to both X86_32 and X86_64; assumes configuration (e.g. REX, operand - /// binding) has already happened. - fn enc_32_64_maybe_isap( - &mut self, - inst: impl Clone + Into, - template: Template, - isap: Option, - ) { - self.enc32_maybe_isap(inst.clone(), template.clone(), isap); - self.enc64_maybe_isap(inst, template, isap); - } - - fn enc32_maybe_isap( - &mut self, - inst: impl Into, - template: Template, - isap: Option, - ) { - match isap { - None => self.enc32(inst, template), - Some(isap) => self.enc32_isap(inst, template, isap), - } - } - - fn enc64_maybe_isap( - &mut self, - inst: impl Into, - template: Template, - isap: Option, - ) { - match isap { - None => self.enc64(inst, template), - Some(isap) => self.enc64_isap(inst, template, isap), - } - } -} - -// Definitions. - -#[inline(never)] -fn define_moves(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r: &RecipeGroup) { - let shared = &shared_defs.instructions; - let formats = &shared_defs.formats; - - // Shorthands for instructions. - let bconst = shared.by_name("bconst"); - let bint = shared.by_name("bint"); - let copy = shared.by_name("copy"); - let copy_special = shared.by_name("copy_special"); - let copy_to_ssa = shared.by_name("copy_to_ssa"); - let get_pinned_reg = shared.by_name("get_pinned_reg"); - let iconst = shared.by_name("iconst"); - let ireduce = shared.by_name("ireduce"); - let regmove = shared.by_name("regmove"); - let sextend = shared.by_name("sextend"); - let set_pinned_reg = shared.by_name("set_pinned_reg"); - let uextend = shared.by_name("uextend"); - let dummy_sarg_t = shared.by_name("dummy_sarg_t"); - - // Shorthands for recipes. - let rec_copysp = r.template("copysp"); - let rec_furm_reg_to_ssa = r.template("furm_reg_to_ssa"); - let rec_get_pinned_reg = r.recipe("get_pinned_reg"); - let rec_null = r.recipe("null"); - let rec_pu_id = r.template("pu_id"); - let rec_pu_id_bool = r.template("pu_id_bool"); - let rec_pu_iq = r.template("pu_iq"); - let rec_rmov = r.template("rmov"); - let rec_set_pinned_reg = r.template("set_pinned_reg"); - let rec_u_id = r.template("u_id"); - let rec_u_id_z = r.template("u_id_z"); - let rec_umr = r.template("umr"); - let rec_umr_reg_to_ssa = r.template("umr_reg_to_ssa"); - let rec_urm_noflags = r.template("urm_noflags"); - let rec_urm_noflags_abcd = r.template("urm_noflags_abcd"); - let rec_dummy_sarg_t = r.recipe("dummy_sarg_t"); - - // The pinned reg is fixed to a certain value entirely user-controlled, so it generates nothing! - e.enc64_rec(get_pinned_reg.bind(I64), rec_get_pinned_reg, 0); - e.enc_x86_64( - set_pinned_reg.bind(I64), - rec_set_pinned_reg.opcodes(&MOV_STORE).rex().w(), - ); - - e.enc_i32_i64(copy, rec_umr.opcodes(&MOV_STORE)); - e.enc_r32_r64_rex_only(copy, rec_umr.opcodes(&MOV_STORE)); - e.enc_both(copy.bind(B1), rec_umr.opcodes(&MOV_STORE)); - e.enc_both(copy.bind(I8), rec_umr.opcodes(&MOV_STORE)); - e.enc_both(copy.bind(I16), rec_umr.opcodes(&MOV_STORE)); - - // TODO For x86-64, only define REX forms for now, since we can't describe the - // special regunit immediate operands with the current constraint language. - for &ty in &[I8, I16, I32] { - e.enc32(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE)); - e.enc64(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE).rex()); - } - for &ty in &[B8, B16, B32] { - e.enc32(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE)); - e.enc64(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE).rex()); - } - e.enc64(regmove.bind(I64), rec_rmov.opcodes(&MOV_STORE).rex().w()); - e.enc_both(regmove.bind(B1), rec_rmov.opcodes(&MOV_STORE)); - e.enc_both(regmove.bind(I8), rec_rmov.opcodes(&MOV_STORE)); - e.enc32(regmove.bind(R32), rec_rmov.opcodes(&MOV_STORE)); - e.enc64(regmove.bind(R32), rec_rmov.opcodes(&MOV_STORE).rex()); - e.enc64(regmove.bind(R64), rec_rmov.opcodes(&MOV_STORE).rex().w()); - - // Immediate constants. - e.enc32(iconst.bind(I32), rec_pu_id.opcodes(&MOV_IMM)); - - e.enc64(iconst.bind(I32), rec_pu_id.rex().opcodes(&MOV_IMM)); - e.enc64(iconst.bind(I32), rec_pu_id.opcodes(&MOV_IMM)); - - // The 32-bit immediate movl also zero-extends to 64 bits. - let is_unsigned_int32 = - InstructionPredicate::new_is_unsigned_int(&*formats.unary_imm, "imm", 32, 0); - - e.enc64_func( - iconst.bind(I64), - rec_pu_id.opcodes(&MOV_IMM).rex(), - |encoding| encoding.inst_predicate(is_unsigned_int32.clone()), - ); - e.enc64_func(iconst.bind(I64), rec_pu_id.opcodes(&MOV_IMM), |encoding| { - encoding.inst_predicate(is_unsigned_int32) - }); - - // Sign-extended 32-bit immediate. - e.enc64( - iconst.bind(I64), - rec_u_id.rex().opcodes(&MOV_IMM_SIGNEXTEND).rrr(0).w(), - ); - - // Finally, the MOV_IMM opcode takes an 8-byte immediate with a REX.W prefix. - e.enc64(iconst.bind(I64), rec_pu_iq.opcodes(&MOV_IMM).rex().w()); - - // Bool constants (uses MOV) - for &ty in &[B1, B8, B16, B32] { - e.enc_both(bconst.bind(ty), rec_pu_id_bool.opcodes(&MOV_IMM)); - } - e.enc64(bconst.bind(B64), rec_pu_id_bool.opcodes(&MOV_IMM).rex()); - - // You may expect that i8 encodings would use 0x30 (XORB) to indicate that encodings should be - // on 8-bit operands (f.ex "xor %al, %al"). Cranelift currently does not know when it can - // safely drop the 0x66 prefix, so we explicitly select a wider but permissible opcode. - let is_zero_int = InstructionPredicate::new_is_zero_int(&formats.unary_imm, "imm"); - e.enc_both_instp( - iconst.bind(I8), - rec_u_id_z.opcodes(&XOR), - is_zero_int.clone(), - ); - - // You may expect that i16 encodings would have an 0x66 prefix on the opcode to indicate that - // encodings should be on 16-bit operands (f.ex, "xor %ax, %ax"). Cranelift currently does not - // know that it can drop the 0x66 prefix and clear the upper half of a 32-bit register in these - // scenarios, so we explicitly select a wider but permissible opcode. - // - // This effectively formalizes the i16->i32 widening that Cranelift performs when there isn't - // an appropriate i16 encoding available. - e.enc_both_instp( - iconst.bind(I16), - rec_u_id_z.opcodes(&XOR), - is_zero_int.clone(), - ); - e.enc_both_instp( - iconst.bind(I32), - rec_u_id_z.opcodes(&XOR), - is_zero_int.clone(), - ); - e.enc_x86_64_instp(iconst.bind(I64), rec_u_id_z.opcodes(&XOR), is_zero_int); - - // Numerical conversions. - - // Reducing an integer is a no-op. - e.enc32_rec(ireduce.bind(I8).bind(I16), rec_null, 0); - e.enc32_rec(ireduce.bind(I8).bind(I32), rec_null, 0); - e.enc32_rec(ireduce.bind(I16).bind(I32), rec_null, 0); - - e.enc64_rec(ireduce.bind(I8).bind(I16), rec_null, 0); - e.enc64_rec(ireduce.bind(I8).bind(I32), rec_null, 0); - e.enc64_rec(ireduce.bind(I16).bind(I32), rec_null, 0); - e.enc64_rec(ireduce.bind(I8).bind(I64), rec_null, 0); - e.enc64_rec(ireduce.bind(I16).bind(I64), rec_null, 0); - e.enc64_rec(ireduce.bind(I32).bind(I64), rec_null, 0); - - // TODO: Add encodings for cbw, cwde, cdqe, which are sign-extending - // instructions for %al/%ax/%eax to %ax/%eax/%rax. - - // movsbl - e.enc32( - sextend.bind(I32).bind(I8), - rec_urm_noflags_abcd.opcodes(&MOVSX_BYTE), - ); - e.enc64( - sextend.bind(I32).bind(I8), - rec_urm_noflags.opcodes(&MOVSX_BYTE).rex(), - ); - e.enc64( - sextend.bind(I32).bind(I8), - rec_urm_noflags_abcd.opcodes(&MOVSX_BYTE), - ); - - // movswl - e.enc32( - sextend.bind(I32).bind(I16), - rec_urm_noflags.opcodes(&MOVSX_WORD), - ); - e.enc64( - sextend.bind(I32).bind(I16), - rec_urm_noflags.opcodes(&MOVSX_WORD).rex(), - ); - e.enc64( - sextend.bind(I32).bind(I16), - rec_urm_noflags.opcodes(&MOVSX_WORD), - ); - - // movsbq - e.enc64( - sextend.bind(I64).bind(I8), - rec_urm_noflags.opcodes(&MOVSX_BYTE).rex().w(), - ); - - // movswq - e.enc64( - sextend.bind(I64).bind(I16), - rec_urm_noflags.opcodes(&MOVSX_WORD).rex().w(), - ); - - // movslq - e.enc64( - sextend.bind(I64).bind(I32), - rec_urm_noflags.opcodes(&MOVSXD).rex().w(), - ); - - // movzbl - e.enc32( - uextend.bind(I32).bind(I8), - rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), - ); - e.enc64( - uextend.bind(I32).bind(I8), - rec_urm_noflags.opcodes(&MOVZX_BYTE).rex(), - ); - e.enc64( - uextend.bind(I32).bind(I8), - rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), - ); - - // movzwl - e.enc32( - uextend.bind(I32).bind(I16), - rec_urm_noflags.opcodes(&MOVZX_WORD), - ); - e.enc64( - uextend.bind(I32).bind(I16), - rec_urm_noflags.opcodes(&MOVZX_WORD).rex(), - ); - e.enc64( - uextend.bind(I32).bind(I16), - rec_urm_noflags.opcodes(&MOVZX_WORD), - ); - - // movzbq, encoded as movzbl because it's equivalent and shorter. - e.enc64( - uextend.bind(I64).bind(I8), - rec_urm_noflags.opcodes(&MOVZX_BYTE).rex(), - ); - e.enc64( - uextend.bind(I64).bind(I8), - rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), - ); - - // movzwq, encoded as movzwl because it's equivalent and shorter - e.enc64( - uextend.bind(I64).bind(I16), - rec_urm_noflags.opcodes(&MOVZX_WORD).rex(), - ); - e.enc64( - uextend.bind(I64).bind(I16), - rec_urm_noflags.opcodes(&MOVZX_WORD), - ); - - // A 32-bit register copy clears the high 32 bits. - e.enc64( - uextend.bind(I64).bind(I32), - rec_umr.opcodes(&MOV_STORE).rex(), - ); - e.enc64(uextend.bind(I64).bind(I32), rec_umr.opcodes(&MOV_STORE)); - - // Convert bool to int. - // - // This assumes that b1 is represented as an 8-bit low register with the value 0 - // or 1. - // - // Encode movzbq as movzbl, because it's equivalent and shorter. - for &to in &[I8, I16, I32, I64] { - for &from in &[B1, B8] { - e.enc64( - bint.bind(to).bind(from), - rec_urm_noflags.opcodes(&MOVZX_BYTE).rex(), - ); - e.enc64( - bint.bind(to).bind(from), - rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), - ); - if to != I64 { - e.enc32( - bint.bind(to).bind(from), - rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), - ); - } - } - } - for (to, from) in &[(I16, B16), (I32, B32), (I64, B64)] { - e.enc_both( - bint.bind(*to).bind(*from), - rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), - ); - } - - // Copy Special - // For x86-64, only define REX forms for now, since we can't describe the - // special regunit immediate operands with the current constraint language. - e.enc64(copy_special, rec_copysp.opcodes(&MOV_STORE).rex().w()); - e.enc32(copy_special, rec_copysp.opcodes(&MOV_STORE)); - - // Copy to SSA. These have to be done with special _rex_only encoders, because the standard - // machinery for deciding whether a REX.{RXB} prefix is needed doesn't take into account - // the source register, which is specified directly in the instruction. - e.enc_i32_i64_rex_only(copy_to_ssa, rec_umr_reg_to_ssa.opcodes(&MOV_STORE)); - e.enc_r32_r64_rex_only(copy_to_ssa, rec_umr_reg_to_ssa.opcodes(&MOV_STORE)); - e.enc_both_rex_only(copy_to_ssa.bind(B1), rec_umr_reg_to_ssa.opcodes(&MOV_STORE)); - e.enc_both_rex_only(copy_to_ssa.bind(I8), rec_umr_reg_to_ssa.opcodes(&MOV_STORE)); - e.enc_both_rex_only( - copy_to_ssa.bind(I16), - rec_umr_reg_to_ssa.opcodes(&MOV_STORE), - ); - e.enc_both_rex_only( - copy_to_ssa.bind(F64), - rec_furm_reg_to_ssa.opcodes(&MOVSD_LOAD), - ); - e.enc_both_rex_only( - copy_to_ssa.bind(F32), - rec_furm_reg_to_ssa.opcodes(&MOVSS_LOAD), - ); - - e.enc_32_64_rec(dummy_sarg_t, rec_dummy_sarg_t, 0); -} - -#[inline(never)] -fn define_memory( - e: &mut PerCpuModeEncodings, - shared_defs: &SharedDefinitions, - x86: &InstructionGroup, - r: &RecipeGroup, -) { - let shared = &shared_defs.instructions; - let formats = &shared_defs.formats; - - // Shorthands for instructions. - let adjust_sp_down = shared.by_name("adjust_sp_down"); - let adjust_sp_down_imm = shared.by_name("adjust_sp_down_imm"); - let adjust_sp_up_imm = shared.by_name("adjust_sp_up_imm"); - let copy_nop = shared.by_name("copy_nop"); - let fill = shared.by_name("fill"); - let fill_nop = shared.by_name("fill_nop"); - let istore16 = shared.by_name("istore16"); - let istore16_complex = shared.by_name("istore16_complex"); - let istore32 = shared.by_name("istore32"); - let istore32_complex = shared.by_name("istore32_complex"); - let istore8 = shared.by_name("istore8"); - let istore8_complex = shared.by_name("istore8_complex"); - let load = shared.by_name("load"); - let load_complex = shared.by_name("load_complex"); - let regfill = shared.by_name("regfill"); - let regspill = shared.by_name("regspill"); - let sload16 = shared.by_name("sload16"); - let sload16_complex = shared.by_name("sload16_complex"); - let sload32 = shared.by_name("sload32"); - let sload32_complex = shared.by_name("sload32_complex"); - let sload8 = shared.by_name("sload8"); - let sload8_complex = shared.by_name("sload8_complex"); - let spill = shared.by_name("spill"); - let store = shared.by_name("store"); - let store_complex = shared.by_name("store_complex"); - let uload16 = shared.by_name("uload16"); - let uload16_complex = shared.by_name("uload16_complex"); - let uload32 = shared.by_name("uload32"); - let uload32_complex = shared.by_name("uload32_complex"); - let uload8 = shared.by_name("uload8"); - let uload8_complex = shared.by_name("uload8_complex"); - let x86_pop = x86.by_name("x86_pop"); - let x86_push = x86.by_name("x86_push"); - - // Shorthands for recipes. - let rec_adjustsp = r.template("adjustsp"); - let rec_adjustsp_ib = r.template("adjustsp_ib"); - let rec_adjustsp_id = r.template("adjustsp_id"); - let rec_ffillnull = r.recipe("ffillnull"); - let rec_fillnull = r.recipe("fillnull"); - let rec_fillSib32 = r.template("fillSib32"); - let rec_ld = r.template("ld"); - let rec_ldDisp32 = r.template("ldDisp32"); - let rec_ldDisp8 = r.template("ldDisp8"); - let rec_ldWithIndex = r.template("ldWithIndex"); - let rec_ldWithIndexDisp32 = r.template("ldWithIndexDisp32"); - let rec_ldWithIndexDisp8 = r.template("ldWithIndexDisp8"); - let rec_popq = r.template("popq"); - let rec_pushq = r.template("pushq"); - let rec_regfill32 = r.template("regfill32"); - let rec_regspill32 = r.template("regspill32"); - let rec_spillSib32 = r.template("spillSib32"); - let rec_st = r.template("st"); - let rec_stacknull = r.recipe("stacknull"); - let rec_stDisp32 = r.template("stDisp32"); - let rec_stDisp32_abcd = r.template("stDisp32_abcd"); - let rec_stDisp8 = r.template("stDisp8"); - let rec_stDisp8_abcd = r.template("stDisp8_abcd"); - let rec_stWithIndex = r.template("stWithIndex"); - let rec_stWithIndexDisp32 = r.template("stWithIndexDisp32"); - let rec_stWithIndexDisp32_abcd = r.template("stWithIndexDisp32_abcd"); - let rec_stWithIndexDisp8 = r.template("stWithIndexDisp8"); - let rec_stWithIndexDisp8_abcd = r.template("stWithIndexDisp8_abcd"); - let rec_stWithIndex_abcd = r.template("stWithIndex_abcd"); - let rec_st_abcd = r.template("st_abcd"); - - // Loads and stores. - let is_load_complex_length_two = - InstructionPredicate::new_length_equals(&*formats.load_complex, 2); - - for recipe in &[rec_ldWithIndex, rec_ldWithIndexDisp8, rec_ldWithIndexDisp32] { - e.enc_i32_i64_instp( - load_complex, - recipe.opcodes(&MOV_LOAD), - is_load_complex_length_two.clone(), - ); - e.enc_r32_r64_instp( - load_complex, - recipe.opcodes(&MOV_LOAD), - is_load_complex_length_two.clone(), - ); - e.enc_x86_64_instp( - uload32_complex, - recipe.opcodes(&MOV_LOAD), - is_load_complex_length_two.clone(), - ); - - e.enc64_instp( - sload32_complex, - recipe.opcodes(&MOVSXD).rex().w(), - is_load_complex_length_two.clone(), - ); - - e.enc_i32_i64_instp( - uload16_complex, - recipe.opcodes(&MOVZX_WORD), - is_load_complex_length_two.clone(), - ); - e.enc_i32_i64_instp( - sload16_complex, - recipe.opcodes(&MOVSX_WORD), - is_load_complex_length_two.clone(), - ); - - e.enc_i32_i64_instp( - uload8_complex, - recipe.opcodes(&MOVZX_BYTE), - is_load_complex_length_two.clone(), - ); - - e.enc_i32_i64_instp( - sload8_complex, - recipe.opcodes(&MOVSX_BYTE), - is_load_complex_length_two.clone(), - ); - } - - let is_store_complex_length_three = - InstructionPredicate::new_length_equals(&*formats.store_complex, 3); - - for recipe in &[rec_stWithIndex, rec_stWithIndexDisp8, rec_stWithIndexDisp32] { - e.enc_i32_i64_instp( - store_complex, - recipe.opcodes(&MOV_STORE), - is_store_complex_length_three.clone(), - ); - e.enc_r32_r64_instp( - store_complex, - recipe.opcodes(&MOV_STORE), - is_store_complex_length_three.clone(), - ); - e.enc_x86_64_instp( - istore32_complex, - recipe.opcodes(&MOV_STORE), - is_store_complex_length_three.clone(), - ); - e.enc_both_instp( - istore16_complex.bind(I32), - recipe.opcodes(&MOV_STORE_16), - is_store_complex_length_three.clone(), - ); - e.enc_x86_64_instp( - istore16_complex.bind(I64), - recipe.opcodes(&MOV_STORE_16), - is_store_complex_length_three.clone(), - ); - } - - for recipe in &[ - rec_stWithIndex_abcd, - rec_stWithIndexDisp8_abcd, - rec_stWithIndexDisp32_abcd, - ] { - e.enc_both_instp( - istore8_complex.bind(I32), - recipe.opcodes(&MOV_BYTE_STORE), - is_store_complex_length_three.clone(), - ); - e.enc_x86_64_instp( - istore8_complex.bind(I64), - recipe.opcodes(&MOV_BYTE_STORE), - is_store_complex_length_three.clone(), - ); - } - - for recipe in &[rec_st, rec_stDisp8, rec_stDisp32] { - e.enc_i32_i64_ld_st(store, true, recipe.opcodes(&MOV_STORE)); - e.enc_r32_r64_ld_st(store, true, recipe.opcodes(&MOV_STORE)); - e.enc_x86_64(istore32.bind(I64).bind(Any), recipe.opcodes(&MOV_STORE)); - e.enc_i32_i64_ld_st(istore16, false, recipe.opcodes(&MOV_STORE_16)); - } - - // Byte stores are more complicated because the registers they can address - // depends of the presence of a REX prefix. The st*_abcd recipes fall back to - // the corresponding st* recipes when a REX prefix is applied. - - for recipe in &[rec_st_abcd, rec_stDisp8_abcd, rec_stDisp32_abcd] { - e.enc_both(istore8.bind(I32).bind(Any), recipe.opcodes(&MOV_BYTE_STORE)); - e.enc_x86_64(istore8.bind(I64).bind(Any), recipe.opcodes(&MOV_BYTE_STORE)); - } - - e.enc_i32_i64_explicit_rex(spill, rec_spillSib32.opcodes(&MOV_STORE)); - e.enc_i32_i64_explicit_rex(regspill, rec_regspill32.opcodes(&MOV_STORE)); - e.enc_r32_r64_rex_only(spill, rec_spillSib32.opcodes(&MOV_STORE)); - e.enc_r32_r64_rex_only(regspill, rec_regspill32.opcodes(&MOV_STORE)); - - // Use a 32-bit write for spilling `b1`, `i8` and `i16` to avoid - // constraining the permitted registers. - // See MIN_SPILL_SLOT_SIZE which makes this safe. - - e.enc_both(spill.bind(B1), rec_spillSib32.opcodes(&MOV_STORE)); - e.enc_both(regspill.bind(B1), rec_regspill32.opcodes(&MOV_STORE)); - for &ty in &[I8, I16] { - e.enc_both(spill.bind(ty), rec_spillSib32.opcodes(&MOV_STORE)); - e.enc_both(regspill.bind(ty), rec_regspill32.opcodes(&MOV_STORE)); - } - - for recipe in &[rec_ld, rec_ldDisp8, rec_ldDisp32] { - e.enc_i32_i64_ld_st(load, true, recipe.opcodes(&MOV_LOAD)); - e.enc_r32_r64_ld_st(load, true, recipe.opcodes(&MOV_LOAD)); - e.enc_x86_64(uload32.bind(I64), recipe.opcodes(&MOV_LOAD)); - e.enc64(sload32.bind(I64), recipe.opcodes(&MOVSXD).rex().w()); - e.enc_i32_i64_ld_st(uload16, true, recipe.opcodes(&MOVZX_WORD)); - e.enc_i32_i64_ld_st(sload16, true, recipe.opcodes(&MOVSX_WORD)); - e.enc_i32_i64_ld_st(uload8, true, recipe.opcodes(&MOVZX_BYTE)); - e.enc_i32_i64_ld_st(sload8, true, recipe.opcodes(&MOVSX_BYTE)); - } - - e.enc_i32_i64_explicit_rex(fill, rec_fillSib32.opcodes(&MOV_LOAD)); - e.enc_i32_i64_explicit_rex(regfill, rec_regfill32.opcodes(&MOV_LOAD)); - e.enc_r32_r64_rex_only(fill, rec_fillSib32.opcodes(&MOV_LOAD)); - e.enc_r32_r64_rex_only(regfill, rec_regfill32.opcodes(&MOV_LOAD)); - - // No-op fills, created by late-stage redundant-fill removal. - for &ty in &[I64, I32, I16, I8] { - e.enc64_rec(fill_nop.bind(ty), rec_fillnull, 0); - e.enc32_rec(fill_nop.bind(ty), rec_fillnull, 0); - } - e.enc64_rec(fill_nop.bind(B1), rec_fillnull, 0); - e.enc32_rec(fill_nop.bind(B1), rec_fillnull, 0); - for &ty in &[F64, F32] { - e.enc64_rec(fill_nop.bind(ty), rec_ffillnull, 0); - e.enc32_rec(fill_nop.bind(ty), rec_ffillnull, 0); - } - for &ty in &[R64, R32] { - e.enc64_rec(fill_nop.bind(ty), rec_fillnull, 0); - e.enc32_rec(fill_nop.bind(ty), rec_fillnull, 0); - } - - // Load 32 bits from `b1`, `i8` and `i16` spill slots. See `spill.b1` above. - - e.enc_both(fill.bind(B1), rec_fillSib32.opcodes(&MOV_LOAD)); - e.enc_both(regfill.bind(B1), rec_regfill32.opcodes(&MOV_LOAD)); - for &ty in &[I8, I16] { - e.enc_both(fill.bind(ty), rec_fillSib32.opcodes(&MOV_LOAD)); - e.enc_both(regfill.bind(ty), rec_regfill32.opcodes(&MOV_LOAD)); - } - - // Push and Pop. - e.enc32(x86_push.bind(I32), rec_pushq.opcodes(&PUSH_REG)); - e.enc_x86_64(x86_push.bind(I64), rec_pushq.opcodes(&PUSH_REG)); - - e.enc32(x86_pop.bind(I32), rec_popq.opcodes(&POP_REG)); - e.enc_x86_64(x86_pop.bind(I64), rec_popq.opcodes(&POP_REG)); - - // Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn - // into a no-op. - // The same encoding is generated for both the 64- and 32-bit architectures. - for &ty in &[I64, I32, I16, I8] { - e.enc64_rec(copy_nop.bind(ty), rec_stacknull, 0); - e.enc32_rec(copy_nop.bind(ty), rec_stacknull, 0); - } - for &ty in &[F64, F32] { - e.enc64_rec(copy_nop.bind(ty), rec_stacknull, 0); - e.enc32_rec(copy_nop.bind(ty), rec_stacknull, 0); - } - - // Adjust SP down by a dynamic value (or up, with a negative operand). - e.enc32(adjust_sp_down.bind(I32), rec_adjustsp.opcodes(&SUB)); - e.enc64( - adjust_sp_down.bind(I64), - rec_adjustsp.opcodes(&SUB).rex().w(), - ); - - // Adjust SP up by an immediate (or down, with a negative immediate). - e.enc32(adjust_sp_up_imm, rec_adjustsp_ib.opcodes(&CMP_IMM8)); - e.enc32(adjust_sp_up_imm, rec_adjustsp_id.opcodes(&CMP_IMM)); - e.enc64( - adjust_sp_up_imm, - rec_adjustsp_ib.opcodes(&CMP_IMM8).rex().w(), - ); - e.enc64( - adjust_sp_up_imm, - rec_adjustsp_id.opcodes(&CMP_IMM).rex().w(), - ); - - // Adjust SP down by an immediate (or up, with a negative immediate). - e.enc32( - adjust_sp_down_imm, - rec_adjustsp_ib.opcodes(&CMP_IMM8).rrr(5), - ); - e.enc32(adjust_sp_down_imm, rec_adjustsp_id.opcodes(&CMP_IMM).rrr(5)); - e.enc64( - adjust_sp_down_imm, - rec_adjustsp_ib.opcodes(&CMP_IMM8).rrr(5).rex().w(), - ); - e.enc64( - adjust_sp_down_imm, - rec_adjustsp_id.opcodes(&CMP_IMM).rrr(5).rex().w(), - ); -} - -#[inline(never)] -fn define_fpu_moves(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r: &RecipeGroup) { - let shared = &shared_defs.instructions; - - // Shorthands for instructions. - let bitcast = shared.by_name("bitcast"); - let copy = shared.by_name("copy"); - let regmove = shared.by_name("regmove"); - - // Shorthands for recipes. - let rec_frmov = r.template("frmov"); - let rec_frurm = r.template("frurm"); - let rec_furm = r.template("furm"); - let rec_rfumr = r.template("rfumr"); - - // Floating-point moves. - // movd - e.enc_both( - bitcast.bind(F32).bind(I32), - rec_frurm.opcodes(&MOVD_LOAD_XMM), - ); - e.enc_both( - bitcast.bind(I32).bind(F32), - rec_rfumr.opcodes(&MOVD_STORE_XMM), - ); - - // movq - e.enc64( - bitcast.bind(F64).bind(I64), - rec_frurm.opcodes(&MOVD_LOAD_XMM).rex().w(), - ); - e.enc64( - bitcast.bind(I64).bind(F64), - rec_rfumr.opcodes(&MOVD_STORE_XMM).rex().w(), - ); - - // movaps - e.enc_both(copy.bind(F32), rec_furm.opcodes(&MOVAPS_LOAD)); - e.enc_both(copy.bind(F64), rec_furm.opcodes(&MOVAPS_LOAD)); - - // TODO For x86-64, only define REX forms for now, since we can't describe the special regunit - // immediate operands with the current constraint language. - e.enc32(regmove.bind(F32), rec_frmov.opcodes(&MOVAPS_LOAD)); - e.enc64(regmove.bind(F32), rec_frmov.opcodes(&MOVAPS_LOAD).rex()); - - // TODO For x86-64, only define REX forms for now, since we can't describe the special regunit - // immediate operands with the current constraint language. - e.enc32(regmove.bind(F64), rec_frmov.opcodes(&MOVAPS_LOAD)); - e.enc64(regmove.bind(F64), rec_frmov.opcodes(&MOVAPS_LOAD).rex()); -} - -#[inline(never)] -fn define_fpu_memory( - e: &mut PerCpuModeEncodings, - shared_defs: &SharedDefinitions, - r: &RecipeGroup, -) { - let shared = &shared_defs.instructions; - - // Shorthands for instructions. - let fill = shared.by_name("fill"); - let load = shared.by_name("load"); - let load_complex = shared.by_name("load_complex"); - let regfill = shared.by_name("regfill"); - let regspill = shared.by_name("regspill"); - let spill = shared.by_name("spill"); - let store = shared.by_name("store"); - let store_complex = shared.by_name("store_complex"); - - // Shorthands for recipes. - let rec_ffillSib32 = r.template("ffillSib32"); - let rec_fld = r.template("fld"); - let rec_fldDisp32 = r.template("fldDisp32"); - let rec_fldDisp8 = r.template("fldDisp8"); - let rec_fldWithIndex = r.template("fldWithIndex"); - let rec_fldWithIndexDisp32 = r.template("fldWithIndexDisp32"); - let rec_fldWithIndexDisp8 = r.template("fldWithIndexDisp8"); - let rec_fregfill32 = r.template("fregfill32"); - let rec_fregspill32 = r.template("fregspill32"); - let rec_fspillSib32 = r.template("fspillSib32"); - let rec_fst = r.template("fst"); - let rec_fstDisp32 = r.template("fstDisp32"); - let rec_fstDisp8 = r.template("fstDisp8"); - let rec_fstWithIndex = r.template("fstWithIndex"); - let rec_fstWithIndexDisp32 = r.template("fstWithIndexDisp32"); - let rec_fstWithIndexDisp8 = r.template("fstWithIndexDisp8"); - - // Float loads and stores. - e.enc_both(load.bind(F32).bind(Any), rec_fld.opcodes(&MOVSS_LOAD)); - e.enc_both(load.bind(F32).bind(Any), rec_fldDisp8.opcodes(&MOVSS_LOAD)); - e.enc_both(load.bind(F32).bind(Any), rec_fldDisp32.opcodes(&MOVSS_LOAD)); - - e.enc_both( - load_complex.bind(F32), - rec_fldWithIndex.opcodes(&MOVSS_LOAD), - ); - e.enc_both( - load_complex.bind(F32), - rec_fldWithIndexDisp8.opcodes(&MOVSS_LOAD), - ); - e.enc_both( - load_complex.bind(F32), - rec_fldWithIndexDisp32.opcodes(&MOVSS_LOAD), - ); - - e.enc_both(load.bind(F64).bind(Any), rec_fld.opcodes(&MOVSD_LOAD)); - e.enc_both(load.bind(F64).bind(Any), rec_fldDisp8.opcodes(&MOVSD_LOAD)); - e.enc_both(load.bind(F64).bind(Any), rec_fldDisp32.opcodes(&MOVSD_LOAD)); - - e.enc_both( - load_complex.bind(F64), - rec_fldWithIndex.opcodes(&MOVSD_LOAD), - ); - e.enc_both( - load_complex.bind(F64), - rec_fldWithIndexDisp8.opcodes(&MOVSD_LOAD), - ); - e.enc_both( - load_complex.bind(F64), - rec_fldWithIndexDisp32.opcodes(&MOVSD_LOAD), - ); - - e.enc_both(store.bind(F32).bind(Any), rec_fst.opcodes(&MOVSS_STORE)); - e.enc_both( - store.bind(F32).bind(Any), - rec_fstDisp8.opcodes(&MOVSS_STORE), - ); - e.enc_both( - store.bind(F32).bind(Any), - rec_fstDisp32.opcodes(&MOVSS_STORE), - ); - - e.enc_both( - store_complex.bind(F32), - rec_fstWithIndex.opcodes(&MOVSS_STORE), - ); - e.enc_both( - store_complex.bind(F32), - rec_fstWithIndexDisp8.opcodes(&MOVSS_STORE), - ); - e.enc_both( - store_complex.bind(F32), - rec_fstWithIndexDisp32.opcodes(&MOVSS_STORE), - ); - - e.enc_both(store.bind(F64).bind(Any), rec_fst.opcodes(&MOVSD_STORE)); - e.enc_both( - store.bind(F64).bind(Any), - rec_fstDisp8.opcodes(&MOVSD_STORE), - ); - e.enc_both( - store.bind(F64).bind(Any), - rec_fstDisp32.opcodes(&MOVSD_STORE), - ); - - e.enc_both( - store_complex.bind(F64), - rec_fstWithIndex.opcodes(&MOVSD_STORE), - ); - e.enc_both( - store_complex.bind(F64), - rec_fstWithIndexDisp8.opcodes(&MOVSD_STORE), - ); - e.enc_both( - store_complex.bind(F64), - rec_fstWithIndexDisp32.opcodes(&MOVSD_STORE), - ); - - e.enc_both(fill.bind(F32), rec_ffillSib32.opcodes(&MOVSS_LOAD)); - e.enc_both(regfill.bind(F32), rec_fregfill32.opcodes(&MOVSS_LOAD)); - e.enc_both(fill.bind(F64), rec_ffillSib32.opcodes(&MOVSD_LOAD)); - e.enc_both(regfill.bind(F64), rec_fregfill32.opcodes(&MOVSD_LOAD)); - - e.enc_both(spill.bind(F32), rec_fspillSib32.opcodes(&MOVSS_STORE)); - e.enc_both(regspill.bind(F32), rec_fregspill32.opcodes(&MOVSS_STORE)); - e.enc_both(spill.bind(F64), rec_fspillSib32.opcodes(&MOVSD_STORE)); - e.enc_both(regspill.bind(F64), rec_fregspill32.opcodes(&MOVSD_STORE)); -} - -#[inline(never)] -fn define_fpu_ops( - e: &mut PerCpuModeEncodings, - shared_defs: &SharedDefinitions, - settings: &SettingGroup, - x86: &InstructionGroup, - r: &RecipeGroup, -) { - let shared = &shared_defs.instructions; - let formats = &shared_defs.formats; - - // Shorthands for instructions. - let ceil = shared.by_name("ceil"); - let f32const = shared.by_name("f32const"); - let f64const = shared.by_name("f64const"); - let fadd = shared.by_name("fadd"); - let fcmp = shared.by_name("fcmp"); - let fcvt_from_sint = shared.by_name("fcvt_from_sint"); - let fdemote = shared.by_name("fdemote"); - let fdiv = shared.by_name("fdiv"); - let ffcmp = shared.by_name("ffcmp"); - let floor = shared.by_name("floor"); - let fmul = shared.by_name("fmul"); - let fpromote = shared.by_name("fpromote"); - let fsub = shared.by_name("fsub"); - let nearest = shared.by_name("nearest"); - let sqrt = shared.by_name("sqrt"); - let trunc = shared.by_name("trunc"); - let x86_cvtt2si = x86.by_name("x86_cvtt2si"); - let x86_fmax = x86.by_name("x86_fmax"); - let x86_fmin = x86.by_name("x86_fmin"); - - // Shorthands for recipes. - let rec_f32imm_z = r.template("f32imm_z"); - let rec_f64imm_z = r.template("f64imm_z"); - let rec_fa = r.template("fa"); - let rec_fcmp = r.template("fcmp"); - let rec_fcscc = r.template("fcscc"); - let rec_frurm = r.template("frurm"); - let rec_furm = r.template("furm"); - let rec_furmi_rnd = r.template("furmi_rnd"); - let rec_rfurm = r.template("rfurm"); - - // Predicates shorthands. - let use_sse41 = settings.predicate_by_name("use_sse41"); - - // Floating-point constants equal to 0.0 can be encoded using either `xorps` or `xorpd`, for - // 32-bit and 64-bit floats respectively. - let is_zero_32_bit_float = - InstructionPredicate::new_is_zero_32bit_float(&*formats.unary_ieee32, "imm"); - e.enc32_instp( - f32const, - rec_f32imm_z.opcodes(&XORPS), - is_zero_32_bit_float.clone(), - ); - - let is_zero_64_bit_float = - InstructionPredicate::new_is_zero_64bit_float(&*formats.unary_ieee64, "imm"); - e.enc32_instp( - f64const, - rec_f64imm_z.opcodes(&XORPD), - is_zero_64_bit_float.clone(), - ); - - e.enc_x86_64_instp(f32const, rec_f32imm_z.opcodes(&XORPS), is_zero_32_bit_float); - e.enc_x86_64_instp(f64const, rec_f64imm_z.opcodes(&XORPD), is_zero_64_bit_float); - - // cvtsi2ss - e.enc_i32_i64(fcvt_from_sint.bind(F32), rec_frurm.opcodes(&CVTSI2SS)); - - // cvtsi2sd - e.enc_i32_i64(fcvt_from_sint.bind(F64), rec_frurm.opcodes(&CVTSI2SD)); - - // cvtss2sd - e.enc_both(fpromote.bind(F64).bind(F32), rec_furm.opcodes(&CVTSS2SD)); - - // cvtsd2ss - e.enc_both(fdemote.bind(F32).bind(F64), rec_furm.opcodes(&CVTSD2SS)); - - // cvttss2si - e.enc_both( - x86_cvtt2si.bind(I32).bind(F32), - rec_rfurm.opcodes(&CVTTSS2SI), - ); - e.enc64( - x86_cvtt2si.bind(I64).bind(F32), - rec_rfurm.opcodes(&CVTTSS2SI).rex().w(), - ); - - // cvttsd2si - e.enc_both( - x86_cvtt2si.bind(I32).bind(F64), - rec_rfurm.opcodes(&CVTTSD2SI), - ); - e.enc64( - x86_cvtt2si.bind(I64).bind(F64), - rec_rfurm.opcodes(&CVTTSD2SI).rex().w(), - ); - - // Exact square roots. - e.enc_both(sqrt.bind(F32), rec_furm.opcodes(&SQRTSS)); - e.enc_both(sqrt.bind(F64), rec_furm.opcodes(&SQRTSD)); - - // Rounding. The recipe looks at the opcode to pick an immediate. - for inst in &[nearest, floor, ceil, trunc] { - e.enc_both_isap(inst.bind(F32), rec_furmi_rnd.opcodes(&ROUNDSS), use_sse41); - e.enc_both_isap(inst.bind(F64), rec_furmi_rnd.opcodes(&ROUNDSD), use_sse41); - } - - // Binary arithmetic ops. - e.enc_both(fadd.bind(F32), rec_fa.opcodes(&ADDSS)); - e.enc_both(fadd.bind(F64), rec_fa.opcodes(&ADDSD)); - - e.enc_both(fsub.bind(F32), rec_fa.opcodes(&SUBSS)); - e.enc_both(fsub.bind(F64), rec_fa.opcodes(&SUBSD)); - - e.enc_both(fmul.bind(F32), rec_fa.opcodes(&MULSS)); - e.enc_both(fmul.bind(F64), rec_fa.opcodes(&MULSD)); - - e.enc_both(fdiv.bind(F32), rec_fa.opcodes(&DIVSS)); - e.enc_both(fdiv.bind(F64), rec_fa.opcodes(&DIVSD)); - - e.enc_both(x86_fmin.bind(F32), rec_fa.opcodes(&MINSS)); - e.enc_both(x86_fmin.bind(F64), rec_fa.opcodes(&MINSD)); - - e.enc_both(x86_fmax.bind(F32), rec_fa.opcodes(&MAXSS)); - e.enc_both(x86_fmax.bind(F64), rec_fa.opcodes(&MAXSD)); - - // Comparisons. - // - // This only covers the condition codes in `supported_floatccs`, the rest are - // handled by legalization patterns. - e.enc_both(fcmp.bind(F32), rec_fcscc.opcodes(&UCOMISS)); - e.enc_both(fcmp.bind(F64), rec_fcscc.opcodes(&UCOMISD)); - e.enc_both(ffcmp.bind(F32), rec_fcmp.opcodes(&UCOMISS)); - e.enc_both(ffcmp.bind(F64), rec_fcmp.opcodes(&UCOMISD)); -} - -#[inline(never)] -fn define_alu( - e: &mut PerCpuModeEncodings, - shared_defs: &SharedDefinitions, - settings: &SettingGroup, - x86: &InstructionGroup, - r: &RecipeGroup, -) { - let shared = &shared_defs.instructions; - - // Shorthands for instructions. - let clz = shared.by_name("clz"); - let ctz = shared.by_name("ctz"); - let icmp = shared.by_name("icmp"); - let icmp_imm = shared.by_name("icmp_imm"); - let ifcmp = shared.by_name("ifcmp"); - let ifcmp_imm = shared.by_name("ifcmp_imm"); - let ifcmp_sp = shared.by_name("ifcmp_sp"); - let ishl = shared.by_name("ishl"); - let ishl_imm = shared.by_name("ishl_imm"); - let popcnt = shared.by_name("popcnt"); - let rotl = shared.by_name("rotl"); - let rotl_imm = shared.by_name("rotl_imm"); - let rotr = shared.by_name("rotr"); - let rotr_imm = shared.by_name("rotr_imm"); - let selectif = shared.by_name("selectif"); - let selectif_spectre_guard = shared.by_name("selectif_spectre_guard"); - let sshr = shared.by_name("sshr"); - let sshr_imm = shared.by_name("sshr_imm"); - let trueff = shared.by_name("trueff"); - let trueif = shared.by_name("trueif"); - let ushr = shared.by_name("ushr"); - let ushr_imm = shared.by_name("ushr_imm"); - let x86_bsf = x86.by_name("x86_bsf"); - let x86_bsr = x86.by_name("x86_bsr"); - - // Shorthands for recipes. - let rec_bsf_and_bsr = r.template("bsf_and_bsr"); - let rec_cmov = r.template("cmov"); - let rec_icscc = r.template("icscc"); - let rec_icscc_ib = r.template("icscc_ib"); - let rec_icscc_id = r.template("icscc_id"); - let rec_rcmp = r.template("rcmp"); - let rec_rcmp_ib = r.template("rcmp_ib"); - let rec_rcmp_id = r.template("rcmp_id"); - let rec_rcmp_sp = r.template("rcmp_sp"); - let rec_rc = r.template("rc"); - let rec_setf_abcd = r.template("setf_abcd"); - let rec_seti_abcd = r.template("seti_abcd"); - let rec_urm = r.template("urm"); - - // Predicates shorthands. - let use_popcnt = settings.predicate_by_name("use_popcnt"); - let use_lzcnt = settings.predicate_by_name("use_lzcnt"); - let use_bmi1 = settings.predicate_by_name("use_bmi1"); - - let band = shared.by_name("band"); - let band_imm = shared.by_name("band_imm"); - let band_not = shared.by_name("band_not"); - let bnot = shared.by_name("bnot"); - let bor = shared.by_name("bor"); - let bor_imm = shared.by_name("bor_imm"); - let bxor = shared.by_name("bxor"); - let bxor_imm = shared.by_name("bxor_imm"); - let iadd = shared.by_name("iadd"); - let iadd_ifcarry = shared.by_name("iadd_ifcarry"); - let iadd_ifcin = shared.by_name("iadd_ifcin"); - let iadd_ifcout = shared.by_name("iadd_ifcout"); - let iadd_imm = shared.by_name("iadd_imm"); - let imul = shared.by_name("imul"); - let isub = shared.by_name("isub"); - let isub_ifbin = shared.by_name("isub_ifbin"); - let isub_ifborrow = shared.by_name("isub_ifborrow"); - let isub_ifbout = shared.by_name("isub_ifbout"); - let x86_sdivmodx = x86.by_name("x86_sdivmodx"); - let x86_smulx = x86.by_name("x86_smulx"); - let x86_udivmodx = x86.by_name("x86_udivmodx"); - let x86_umulx = x86.by_name("x86_umulx"); - - let rec_div = r.template("div"); - let rec_fa = r.template("fa"); - let rec_fax = r.template("fax"); - let rec_mulx = r.template("mulx"); - let rec_r_ib = r.template("r_ib"); - let rec_r_id = r.template("r_id"); - let rec_rin = r.template("rin"); - let rec_rio = r.template("rio"); - let rec_rout = r.template("rout"); - let rec_rr = r.template("rr"); - let rec_rrx = r.template("rrx"); - let rec_ur = r.template("ur"); - - e.enc_i32_i64(iadd, rec_rr.opcodes(&ADD)); - e.enc_i32_i64(iadd_ifcout, rec_rout.opcodes(&ADD)); - e.enc_i32_i64(iadd_ifcin, rec_rin.opcodes(&ADC)); - e.enc_i32_i64(iadd_ifcarry, rec_rio.opcodes(&ADC)); - e.enc_i32_i64(iadd_imm, rec_r_ib.opcodes(&ADD_IMM8_SIGN_EXTEND).rrr(0)); - e.enc_i32_i64(iadd_imm, rec_r_id.opcodes(&ADD_IMM).rrr(0)); - - e.enc_i32_i64(isub, rec_rr.opcodes(&SUB)); - e.enc_i32_i64(isub_ifbout, rec_rout.opcodes(&SUB)); - e.enc_i32_i64(isub_ifbin, rec_rin.opcodes(&SBB)); - e.enc_i32_i64(isub_ifborrow, rec_rio.opcodes(&SBB)); - - e.enc_i32_i64(band, rec_rr.opcodes(&AND)); - e.enc_b32_b64(band, rec_rr.opcodes(&AND)); - - // TODO: band_imm.i64 with an unsigned 32-bit immediate can be encoded as band_imm.i32. Can - // even use the single-byte immediate for 0xffff_ffXX masks. - - e.enc_i32_i64(band_imm, rec_r_ib.opcodes(&AND_IMM8_SIGN_EXTEND).rrr(4)); - e.enc_i32_i64(band_imm, rec_r_id.opcodes(&AND_IMM).rrr(4)); - - e.enc_i32_i64(bor, rec_rr.opcodes(&OR)); - e.enc_b32_b64(bor, rec_rr.opcodes(&OR)); - e.enc_i32_i64(bor_imm, rec_r_ib.opcodes(&OR_IMM8_SIGN_EXTEND).rrr(1)); - e.enc_i32_i64(bor_imm, rec_r_id.opcodes(&OR_IMM).rrr(1)); - - e.enc_i32_i64(bxor, rec_rr.opcodes(&XOR)); - e.enc_b32_b64(bxor, rec_rr.opcodes(&XOR)); - e.enc_i32_i64(bxor_imm, rec_r_ib.opcodes(&XOR_IMM8_SIGN_EXTEND).rrr(6)); - e.enc_i32_i64(bxor_imm, rec_r_id.opcodes(&XOR_IMM).rrr(6)); - - // x86 has a bitwise not instruction NOT. - e.enc_i32_i64(bnot, rec_ur.opcodes(&NOT).rrr(2)); - e.enc_b32_b64(bnot, rec_ur.opcodes(&NOT).rrr(2)); - e.enc_both(bnot.bind(B1), rec_ur.opcodes(&NOT).rrr(2)); - - // Also add a `b1` encodings for the logic instructions. - // TODO: Should this be done with 8-bit instructions? It would improve partial register - // dependencies. - e.enc_both(band.bind(B1), rec_rr.opcodes(&AND)); - e.enc_both(bor.bind(B1), rec_rr.opcodes(&OR)); - e.enc_both(bxor.bind(B1), rec_rr.opcodes(&XOR)); - - e.enc_i32_i64(imul, rec_rrx.opcodes(&IMUL)); - e.enc_i32_i64(x86_sdivmodx, rec_div.opcodes(&IDIV).rrr(7)); - e.enc_i32_i64(x86_udivmodx, rec_div.opcodes(&DIV).rrr(6)); - - e.enc_i32_i64(x86_smulx, rec_mulx.opcodes(&IMUL_RDX_RAX).rrr(5)); - e.enc_i32_i64(x86_umulx, rec_mulx.opcodes(&MUL).rrr(4)); - - // Binary bitwise ops. - // - // The F64 version is intentionally encoded using the single-precision opcode: - // the operation is identical and the encoding is one byte shorter. - e.enc_both(band.bind(F32), rec_fa.opcodes(&ANDPS)); - e.enc_both(band.bind(F64), rec_fa.opcodes(&ANDPS)); - - e.enc_both(bor.bind(F32), rec_fa.opcodes(&ORPS)); - e.enc_both(bor.bind(F64), rec_fa.opcodes(&ORPS)); - - e.enc_both(bxor.bind(F32), rec_fa.opcodes(&XORPS)); - e.enc_both(bxor.bind(F64), rec_fa.opcodes(&XORPS)); - - // The `andnps(x,y)` instruction computes `~x&y`, while band_not(x,y)` is `x&~y. - e.enc_both(band_not.bind(F32), rec_fax.opcodes(&ANDNPS)); - e.enc_both(band_not.bind(F64), rec_fax.opcodes(&ANDNPS)); - - // Shifts and rotates. - // Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit - // and 16-bit shifts would need explicit masking. - - for &(inst, rrr) in &[(rotl, 0), (rotr, 1), (ishl, 4), (ushr, 5), (sshr, 7)] { - // Cannot use enc_i32_i64 for this pattern because instructions require - // to bind any. - e.enc32(inst.bind(I32).bind(I8), rec_rc.opcodes(&ROTATE_CL).rrr(rrr)); - e.enc32( - inst.bind(I32).bind(I16), - rec_rc.opcodes(&ROTATE_CL).rrr(rrr), - ); - e.enc32( - inst.bind(I32).bind(I32), - rec_rc.opcodes(&ROTATE_CL).rrr(rrr), - ); - e.enc64( - inst.bind(I64).bind(Any), - rec_rc.opcodes(&ROTATE_CL).rrr(rrr).rex().w(), - ); - e.enc64( - inst.bind(I32).bind(Any), - rec_rc.opcodes(&ROTATE_CL).rrr(rrr).rex(), - ); - e.enc64( - inst.bind(I32).bind(Any), - rec_rc.opcodes(&ROTATE_CL).rrr(rrr), - ); - } - - e.enc_i32_i64(rotl_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(0)); - e.enc_i32_i64(rotr_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(1)); - e.enc_i32_i64(ishl_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(4)); - e.enc_i32_i64(ushr_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(5)); - e.enc_i32_i64(sshr_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(7)); - - // Population count. - e.enc32_isap(popcnt.bind(I32), rec_urm.opcodes(&POPCNT), use_popcnt); - e.enc64_isap( - popcnt.bind(I64), - rec_urm.opcodes(&POPCNT).rex().w(), - use_popcnt, - ); - e.enc64_isap(popcnt.bind(I32), rec_urm.opcodes(&POPCNT).rex(), use_popcnt); - e.enc64_isap(popcnt.bind(I32), rec_urm.opcodes(&POPCNT), use_popcnt); - - // Count leading zero bits. - e.enc32_isap(clz.bind(I32), rec_urm.opcodes(&LZCNT), use_lzcnt); - e.enc64_isap(clz.bind(I64), rec_urm.opcodes(&LZCNT).rex().w(), use_lzcnt); - e.enc64_isap(clz.bind(I32), rec_urm.opcodes(&LZCNT).rex(), use_lzcnt); - e.enc64_isap(clz.bind(I32), rec_urm.opcodes(&LZCNT), use_lzcnt); - - // Count trailing zero bits. - e.enc32_isap(ctz.bind(I32), rec_urm.opcodes(&TZCNT), use_bmi1); - e.enc64_isap(ctz.bind(I64), rec_urm.opcodes(&TZCNT).rex().w(), use_bmi1); - e.enc64_isap(ctz.bind(I32), rec_urm.opcodes(&TZCNT).rex(), use_bmi1); - e.enc64_isap(ctz.bind(I32), rec_urm.opcodes(&TZCNT), use_bmi1); - - // Bit scan forwards and reverse - e.enc_i32_i64(x86_bsf, rec_bsf_and_bsr.opcodes(&BIT_SCAN_FORWARD)); - e.enc_i32_i64(x86_bsr, rec_bsf_and_bsr.opcodes(&BIT_SCAN_REVERSE)); - - // Comparisons - e.enc_i32_i64(icmp, rec_icscc.opcodes(&CMP_REG)); - e.enc_i32_i64(icmp_imm, rec_icscc_ib.opcodes(&CMP_IMM8).rrr(7)); - e.enc_i32_i64(icmp_imm, rec_icscc_id.opcodes(&CMP_IMM).rrr(7)); - e.enc_i32_i64(ifcmp, rec_rcmp.opcodes(&CMP_REG)); - e.enc_i32_i64(ifcmp_imm, rec_rcmp_ib.opcodes(&CMP_IMM8).rrr(7)); - e.enc_i32_i64(ifcmp_imm, rec_rcmp_id.opcodes(&CMP_IMM).rrr(7)); - // TODO: We could special-case ifcmp_imm(x, 0) to TEST(x, x). - - e.enc32(ifcmp_sp.bind(I32), rec_rcmp_sp.opcodes(&CMP_REG)); - e.enc64(ifcmp_sp.bind(I64), rec_rcmp_sp.opcodes(&CMP_REG).rex().w()); - - // Convert flags to bool. - // This encodes `b1` as an 8-bit low register with the value 0 or 1. - e.enc_both(trueif, rec_seti_abcd.opcodes(&SET_BYTE_IF_OVERFLOW)); - e.enc_both(trueff, rec_setf_abcd.opcodes(&SET_BYTE_IF_OVERFLOW)); - - // Conditional move (a.k.a integer select). - e.enc_i32_i64(selectif, rec_cmov.opcodes(&CMOV_OVERFLOW)); - // A Spectre-guard integer select is exactly the same as a selectif, but - // is not associated with any other legalization rules and is not - // recognized by any optimizations, so it must arrive here unmodified - // and in its original place. - e.enc_i32_i64(selectif_spectre_guard, rec_cmov.opcodes(&CMOV_OVERFLOW)); -} - -#[inline(never)] -#[allow(clippy::cognitive_complexity)] -fn define_simd( - e: &mut PerCpuModeEncodings, - shared_defs: &SharedDefinitions, - settings: &SettingGroup, - x86: &InstructionGroup, - r: &RecipeGroup, -) { - let shared = &shared_defs.instructions; - let formats = &shared_defs.formats; - - // Shorthands for instructions. - let avg_round = shared.by_name("avg_round"); - let bitcast = shared.by_name("bitcast"); - let bor = shared.by_name("bor"); - let bxor = shared.by_name("bxor"); - let copy = shared.by_name("copy"); - let copy_nop = shared.by_name("copy_nop"); - let copy_to_ssa = shared.by_name("copy_to_ssa"); - let fadd = shared.by_name("fadd"); - let fcmp = shared.by_name("fcmp"); - let fcvt_from_sint = shared.by_name("fcvt_from_sint"); - let fdiv = shared.by_name("fdiv"); - let fill = shared.by_name("fill"); - let fill_nop = shared.by_name("fill_nop"); - let fmul = shared.by_name("fmul"); - let fsub = shared.by_name("fsub"); - let iabs = shared.by_name("iabs"); - let iadd = shared.by_name("iadd"); - let icmp = shared.by_name("icmp"); - let imul = shared.by_name("imul"); - let ishl_imm = shared.by_name("ishl_imm"); - let load = shared.by_name("load"); - let load_complex = shared.by_name("load_complex"); - let raw_bitcast = shared.by_name("raw_bitcast"); - let regfill = shared.by_name("regfill"); - let regmove = shared.by_name("regmove"); - let regspill = shared.by_name("regspill"); - let sadd_sat = shared.by_name("sadd_sat"); - let scalar_to_vector = shared.by_name("scalar_to_vector"); - let sload8x8 = shared.by_name("sload8x8"); - let sload8x8_complex = shared.by_name("sload8x8_complex"); - let sload16x4 = shared.by_name("sload16x4"); - let sload16x4_complex = shared.by_name("sload16x4_complex"); - let sload32x2 = shared.by_name("sload32x2"); - let sload32x2_complex = shared.by_name("sload32x2_complex"); - let spill = shared.by_name("spill"); - let sqrt = shared.by_name("sqrt"); - let sshr_imm = shared.by_name("sshr_imm"); - let ssub_sat = shared.by_name("ssub_sat"); - let store = shared.by_name("store"); - let store_complex = shared.by_name("store_complex"); - let swiden_low = shared.by_name("swiden_low"); - let uadd_sat = shared.by_name("uadd_sat"); - let uload8x8 = shared.by_name("uload8x8"); - let uload8x8_complex = shared.by_name("uload8x8_complex"); - let uload16x4 = shared.by_name("uload16x4"); - let uload16x4_complex = shared.by_name("uload16x4_complex"); - let uload32x2 = shared.by_name("uload32x2"); - let uload32x2_complex = shared.by_name("uload32x2_complex"); - let snarrow = shared.by_name("snarrow"); - let unarrow = shared.by_name("unarrow"); - let uwiden_low = shared.by_name("uwiden_low"); - let ushr_imm = shared.by_name("ushr_imm"); - let usub_sat = shared.by_name("usub_sat"); - let vconst = shared.by_name("vconst"); - let vselect = shared.by_name("vselect"); - let widening_pairwise_dot_product_s = shared.by_name("widening_pairwise_dot_product_s"); - let x86_cvtt2si = x86.by_name("x86_cvtt2si"); - let x86_insertps = x86.by_name("x86_insertps"); - let x86_fmax = x86.by_name("x86_fmax"); - let x86_fmin = x86.by_name("x86_fmin"); - let x86_movlhps = x86.by_name("x86_movlhps"); - let x86_movsd = x86.by_name("x86_movsd"); - let x86_pblendw = x86.by_name("x86_pblendw"); - let x86_pextr = x86.by_name("x86_pextr"); - let x86_pinsr = x86.by_name("x86_pinsr"); - let x86_pmaxs = x86.by_name("x86_pmaxs"); - let x86_pmaxu = x86.by_name("x86_pmaxu"); - let x86_pmins = x86.by_name("x86_pmins"); - let x86_pminu = x86.by_name("x86_pminu"); - let x86_pmullq = x86.by_name("x86_pmullq"); - let x86_pmuludq = x86.by_name("x86_pmuludq"); - let x86_palignr = x86.by_name("x86_palignr"); - let x86_pshufb = x86.by_name("x86_pshufb"); - let x86_pshufd = x86.by_name("x86_pshufd"); - let x86_psll = x86.by_name("x86_psll"); - let x86_psra = x86.by_name("x86_psra"); - let x86_psrl = x86.by_name("x86_psrl"); - let x86_ptest = x86.by_name("x86_ptest"); - let x86_punpckh = x86.by_name("x86_punpckh"); - let x86_punpckl = x86.by_name("x86_punpckl"); - let x86_vcvtudq2ps = x86.by_name("x86_vcvtudq2ps"); - - // Shorthands for recipes. - let rec_blend = r.template("blend"); - let rec_evex_reg_vvvv_rm_128 = r.template("evex_reg_vvvv_rm_128"); - let rec_evex_reg_rm_128 = r.template("evex_reg_rm_128"); - let rec_f_ib = r.template("f_ib"); - let rec_fa = r.template("fa"); - let rec_fa_ib = r.template("fa_ib"); - let rec_fax = r.template("fax"); - let rec_fcmp = r.template("fcmp"); - let rec_ffillSib32 = r.template("ffillSib32"); - let rec_ffillnull = r.recipe("ffillnull"); - let rec_fld = r.template("fld"); - let rec_fldDisp32 = r.template("fldDisp32"); - let rec_fldDisp8 = r.template("fldDisp8"); - let rec_fldWithIndex = r.template("fldWithIndex"); - let rec_fldWithIndexDisp32 = r.template("fldWithIndexDisp32"); - let rec_fldWithIndexDisp8 = r.template("fldWithIndexDisp8"); - let rec_fregfill32 = r.template("fregfill32"); - let rec_fregspill32 = r.template("fregspill32"); - let rec_frmov = r.template("frmov"); - let rec_frurm = r.template("frurm"); - let rec_fspillSib32 = r.template("fspillSib32"); - let rec_fst = r.template("fst"); - let rec_fstDisp32 = r.template("fstDisp32"); - let rec_fstDisp8 = r.template("fstDisp8"); - let rec_fstWithIndex = r.template("fstWithIndex"); - let rec_fstWithIndexDisp32 = r.template("fstWithIndexDisp32"); - let rec_fstWithIndexDisp8 = r.template("fstWithIndexDisp8"); - let rec_furm = r.template("furm"); - let rec_furm_reg_to_ssa = r.template("furm_reg_to_ssa"); - let rec_icscc_fpr = r.template("icscc_fpr"); - let rec_null_fpr = r.recipe("null_fpr"); - let rec_pfcmp = r.template("pfcmp"); - let rec_r_ib_unsigned_fpr = r.template("r_ib_unsigned_fpr"); - let rec_r_ib_unsigned_gpr = r.template("r_ib_unsigned_gpr"); - let rec_r_ib_unsigned_r = r.template("r_ib_unsigned_r"); - let rec_stacknull = r.recipe("stacknull"); - let rec_vconst = r.template("vconst"); - let rec_vconst_optimized = r.template("vconst_optimized"); - - // Predicates shorthands. - settings.predicate_by_name("all_ones_funcaddrs_and_not_is_pic"); - settings.predicate_by_name("not_all_ones_funcaddrs_and_not_is_pic"); - let use_ssse3_simd = settings.predicate_by_name("use_ssse3_simd"); - let use_sse41_simd = settings.predicate_by_name("use_sse41_simd"); - let use_sse42_simd = settings.predicate_by_name("use_sse42_simd"); - let use_avx512dq_simd = settings.predicate_by_name("use_avx512dq_simd"); - let use_avx512vl_simd = settings.predicate_by_name("use_avx512vl_simd"); - - // SIMD vector size: eventually multiple vector sizes may be supported but for now only - // SSE-sized vectors are available. - let sse_vector_size: u64 = 128; - - // SIMD splat: before x86 can use vector data, it must be moved to XMM registers; see - // legalize.rs for how this is done; once there, x86_pshuf* (below) is used for broadcasting the - // value across the register. - - let allowed_simd_type = |t: &LaneType| t.lane_bits() >= 8 && t.lane_bits() < 128; - - // PSHUFB, 8-bit shuffle using two XMM registers. - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - let instruction = x86_pshufb.bind(vector(ty, sse_vector_size)); - let template = rec_fa.opcodes(&PSHUFB); - e.enc_both_inferred_maybe_isap(instruction.clone(), template.clone(), Some(use_ssse3_simd)); - } - - // PSHUFD, 32-bit shuffle using one XMM register and a u8 immediate. - for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 32) { - let instruction = x86_pshufd.bind(vector(ty, sse_vector_size)); - let template = rec_r_ib_unsigned_fpr.opcodes(&PSHUFD); - e.enc_both_inferred(instruction, template); - } - - // SIMD vselect; controlling value of vselect is a boolean vector, so each lane should be - // either all ones or all zeroes - it makes it possible to always use 8-bit PBLENDVB; - // for 32/64-bit lanes we can also use BLENDVPS and BLENDVPD - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - let opcode = match ty.lane_bits() { - 32 => &BLENDVPS, - 64 => &BLENDVPD, - _ => &PBLENDVB, - }; - let instruction = vselect.bind(vector(ty, sse_vector_size)); - let template = rec_blend.opcodes(opcode); - e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd)); - } - - // PBLENDW, select lanes using a u8 immediate. - for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 16) { - let instruction = x86_pblendw.bind(vector(ty, sse_vector_size)); - let template = rec_fa_ib.opcodes(&PBLENDW); - e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd)); - } - - // SIMD scalar_to_vector; this uses MOV to copy the scalar value to an XMM register; according - // to the Intel manual: "When the destination operand is an XMM register, the source operand is - // written to the low doubleword of the register and the register is zero-extended to 128 bits." - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - let instruction = scalar_to_vector.bind(vector(ty, sse_vector_size)); - if ty.is_float() { - // No need to move floats--they already live in XMM registers. - e.enc_32_64_rec(instruction, rec_null_fpr, 0); - } else { - let template = rec_frurm.opcodes(&MOVD_LOAD_XMM); - if ty.lane_bits() < 64 { - e.enc_both_inferred(instruction, template); - } else { - // No 32-bit encodings for 64-bit widths. - assert_eq!(ty.lane_bits(), 64); - e.enc64(instruction, template.rex().w()); - } - } - } - - // SIMD insertlane - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - let (opcode, isap): (&[_], _) = match ty.lane_bits() { - 8 => (&PINSRB, Some(use_sse41_simd)), - 16 => (&PINSRW, None), - 32 | 64 => (&PINSR, Some(use_sse41_simd)), - _ => panic!("invalid size for SIMD insertlane"), - }; - - let instruction = x86_pinsr.bind(vector(ty, sse_vector_size)); - let template = rec_r_ib_unsigned_r.opcodes(opcode); - if ty.lane_bits() < 64 { - e.enc_both_inferred_maybe_isap(instruction, template, isap); - } else { - // It turns out the 64-bit widths have REX/W encodings and only are available on - // x86_64. - e.enc64_maybe_isap(instruction, template.rex().w(), isap); - } - } - - // For legalizing insertlane with floats, INSERTPS from SSE4.1. - { - let instruction = x86_insertps.bind(vector(F32, sse_vector_size)); - let template = rec_fa_ib.opcodes(&INSERTPS); - e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd)); - } - - // For legalizing insertlane with floats, MOVSD from SSE2. - { - let instruction = x86_movsd.bind(vector(F64, sse_vector_size)); - let template = rec_fa.opcodes(&MOVSD_LOAD); - e.enc_both_inferred(instruction, template); // from SSE2 - } - - // For legalizing insertlane with floats, MOVLHPS from SSE. - { - let instruction = x86_movlhps.bind(vector(F64, sse_vector_size)); - let template = rec_fa.opcodes(&MOVLHPS); - e.enc_both_inferred(instruction, template); // from SSE - } - - // SIMD extractlane - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - let opcode = match ty.lane_bits() { - 8 => &PEXTRB, - 16 => &PEXTRW, - 32 | 64 => &PEXTR, - _ => panic!("invalid size for SIMD extractlane"), - }; - - let instruction = x86_pextr.bind(vector(ty, sse_vector_size)); - let template = rec_r_ib_unsigned_gpr.opcodes(opcode); - if ty.lane_bits() < 64 { - e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd)); - } else { - // It turns out the 64-bit widths have REX/W encodings and only are available on - // x86_64. - e.enc64_maybe_isap(instruction, template.rex().w(), Some(use_sse41_simd)); - } - } - - // SIMD packing/unpacking - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - let (high, low) = match ty.lane_bits() { - 8 => (&PUNPCKHBW, &PUNPCKLBW), - 16 => (&PUNPCKHWD, &PUNPCKLWD), - 32 => (&PUNPCKHDQ, &PUNPCKLDQ), - 64 => (&PUNPCKHQDQ, &PUNPCKLQDQ), - _ => panic!("invalid size for SIMD packing/unpacking"), - }; - - e.enc_both_inferred( - x86_punpckh.bind(vector(ty, sse_vector_size)), - rec_fa.opcodes(high), - ); - e.enc_both_inferred( - x86_punpckl.bind(vector(ty, sse_vector_size)), - rec_fa.opcodes(low), - ); - } - - // SIMD narrow/widen - for (ty, opcodes) in &[(I16, &PACKSSWB), (I32, &PACKSSDW)] { - let snarrow = snarrow.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred(snarrow, rec_fa.opcodes(*opcodes)); - } - for (ty, opcodes, isap) in &[ - (I16, &PACKUSWB[..], None), - (I32, &PACKUSDW[..], Some(use_sse41_simd)), - ] { - let unarrow = unarrow.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred_maybe_isap(unarrow, rec_fa.opcodes(*opcodes), *isap); - } - for (ty, swiden_opcode, uwiden_opcode) in &[ - (I8, &PMOVSXBW[..], &PMOVZXBW[..]), - (I16, &PMOVSXWD[..], &PMOVZXWD[..]), - ] { - let isap = Some(use_sse41_simd); - let swiden_low = swiden_low.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred_maybe_isap(swiden_low, rec_furm.opcodes(*swiden_opcode), isap); - let uwiden_low = uwiden_low.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred_maybe_isap(uwiden_low, rec_furm.opcodes(*uwiden_opcode), isap); - } - for ty in &[I8, I16, I32, I64] { - e.enc_both_inferred_maybe_isap( - x86_palignr.bind(vector(*ty, sse_vector_size)), - rec_fa_ib.opcodes(&PALIGNR[..]), - Some(use_ssse3_simd), - ); - } - - // SIMD bitcast all 128-bit vectors to each other (for legalizing splat.x16x8). - for from_type in ValueType::all_lane_types().filter(allowed_simd_type) { - for to_type in - ValueType::all_lane_types().filter(|t| allowed_simd_type(t) && *t != from_type) - { - let instruction = raw_bitcast - .bind(vector(to_type, sse_vector_size)) - .bind(vector(from_type, sse_vector_size)); - e.enc_32_64_rec(instruction, rec_null_fpr, 0); - } - } - - // SIMD raw bitcast floats to vector (and back); assumes that floats are already stored in an - // XMM register. - for float_type in &[F32, F64] { - for lane_type in ValueType::all_lane_types().filter(allowed_simd_type) { - e.enc_32_64_rec( - raw_bitcast - .bind(vector(lane_type, sse_vector_size)) - .bind(*float_type), - rec_null_fpr, - 0, - ); - e.enc_32_64_rec( - raw_bitcast - .bind(*float_type) - .bind(vector(lane_type, sse_vector_size)), - rec_null_fpr, - 0, - ); - } - } - - // SIMD conversions - { - let fcvt_from_sint_32 = fcvt_from_sint - .bind(vector(F32, sse_vector_size)) - .bind(vector(I32, sse_vector_size)); - e.enc_both(fcvt_from_sint_32, rec_furm.opcodes(&CVTDQ2PS)); - - e.enc_32_64_maybe_isap( - x86_vcvtudq2ps, - rec_evex_reg_rm_128.opcodes(&VCVTUDQ2PS), - Some(use_avx512vl_simd), // TODO need an OR predicate to join with AVX512F - ); - - e.enc_both_inferred( - x86_cvtt2si - .bind(vector(I32, sse_vector_size)) - .bind(vector(F32, sse_vector_size)), - rec_furm.opcodes(&CVTTPS2DQ), - ); - } - - // SIMD vconst for special cases (all zeroes, all ones) - // this must be encoded prior to the MOVUPS implementation (below) so the compiler sees this - // encoding first - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - let instruction = vconst.bind(vector(ty, sse_vector_size)); - - let is_zero_128bit = - InstructionPredicate::new_is_all_zeroes(&*formats.unary_const, "constant_handle"); - let template = rec_vconst_optimized.opcodes(&PXOR).infer_rex(); - e.enc_32_64_func(instruction.clone(), template, |builder| { - builder.inst_predicate(is_zero_128bit) - }); - - let is_ones_128bit = - InstructionPredicate::new_is_all_ones(&*formats.unary_const, "constant_handle"); - let template = rec_vconst_optimized.opcodes(&PCMPEQB).infer_rex(); - e.enc_32_64_func(instruction, template, |builder| { - builder.inst_predicate(is_ones_128bit) - }); - } - - // SIMD vconst using MOVUPS - // TODO it would be ideal if eventually this became the more efficient MOVAPS but we would have - // to guarantee that the constants are aligned when emitted and there is currently no mechanism - // for that; alternately, constants could be loaded into XMM registers using a sequence like: - // MOVQ + MOVHPD + MOVQ + MOVLPD (this allows the constants to be immediates instead of stored - // in memory) but some performance measurements are needed. - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - let instruction = vconst.bind(vector(ty, sse_vector_size)); - let template = rec_vconst.opcodes(&MOVUPS_LOAD); - e.enc_both_inferred(instruction, template); // from SSE - } - - // SIMD register movement: store, load, spill, fill, regmove, etc. All of these use encodings of - // MOVUPS and MOVAPS from SSE (TODO ideally all of these would either use MOVAPS when we have - // alignment or type-specific encodings, see https://github.com/bytecodealliance/wasmtime/issues/1124). - // Also, it would be ideal to infer REX prefixes for all of these instructions but for the - // time being only instructions with common recipes have `infer_rex()` support. - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - // Store - let bound_store = store.bind(vector(ty, sse_vector_size)).bind(Any); - e.enc_both_inferred(bound_store.clone(), rec_fst.opcodes(&MOVUPS_STORE)); - e.enc_both_inferred(bound_store.clone(), rec_fstDisp8.opcodes(&MOVUPS_STORE)); - e.enc_both_inferred(bound_store, rec_fstDisp32.opcodes(&MOVUPS_STORE)); - - // Store complex - let bound_store_complex = store_complex.bind(vector(ty, sse_vector_size)); - e.enc_both( - bound_store_complex.clone(), - rec_fstWithIndex.opcodes(&MOVUPS_STORE), - ); - e.enc_both( - bound_store_complex.clone(), - rec_fstWithIndexDisp8.opcodes(&MOVUPS_STORE), - ); - e.enc_both( - bound_store_complex, - rec_fstWithIndexDisp32.opcodes(&MOVUPS_STORE), - ); - - // Load - let bound_load = load.bind(vector(ty, sse_vector_size)).bind(Any); - e.enc_both_inferred(bound_load.clone(), rec_fld.opcodes(&MOVUPS_LOAD)); - e.enc_both_inferred(bound_load.clone(), rec_fldDisp8.opcodes(&MOVUPS_LOAD)); - e.enc_both_inferred(bound_load, rec_fldDisp32.opcodes(&MOVUPS_LOAD)); - - // Load complex - let bound_load_complex = load_complex.bind(vector(ty, sse_vector_size)); - e.enc_both( - bound_load_complex.clone(), - rec_fldWithIndex.opcodes(&MOVUPS_LOAD), - ); - e.enc_both( - bound_load_complex.clone(), - rec_fldWithIndexDisp8.opcodes(&MOVUPS_LOAD), - ); - e.enc_both( - bound_load_complex, - rec_fldWithIndexDisp32.opcodes(&MOVUPS_LOAD), - ); - - // Spill - let bound_spill = spill.bind(vector(ty, sse_vector_size)); - e.enc_both(bound_spill, rec_fspillSib32.opcodes(&MOVUPS_STORE)); - let bound_regspill = regspill.bind(vector(ty, sse_vector_size)); - e.enc_both(bound_regspill, rec_fregspill32.opcodes(&MOVUPS_STORE)); - - // Fill - let bound_fill = fill.bind(vector(ty, sse_vector_size)); - e.enc_both(bound_fill, rec_ffillSib32.opcodes(&MOVUPS_LOAD)); - let bound_regfill = regfill.bind(vector(ty, sse_vector_size)); - e.enc_both(bound_regfill, rec_fregfill32.opcodes(&MOVUPS_LOAD)); - let bound_fill_nop = fill_nop.bind(vector(ty, sse_vector_size)); - e.enc_32_64_rec(bound_fill_nop, rec_ffillnull, 0); - - // Regmove - let bound_regmove = regmove.bind(vector(ty, sse_vector_size)); - e.enc_both(bound_regmove, rec_frmov.opcodes(&MOVAPS_LOAD)); - - // Copy - let bound_copy = copy.bind(vector(ty, sse_vector_size)); - e.enc_both(bound_copy, rec_furm.opcodes(&MOVAPS_LOAD)); - let bound_copy_to_ssa = copy_to_ssa.bind(vector(ty, sse_vector_size)); - e.enc_both(bound_copy_to_ssa, rec_furm_reg_to_ssa.opcodes(&MOVAPS_LOAD)); - let bound_copy_nop = copy_nop.bind(vector(ty, sse_vector_size)); - e.enc_32_64_rec(bound_copy_nop, rec_stacknull, 0); - } - - // SIMD load extend - for (inst, opcodes) in &[ - (uload8x8, &PMOVZXBW), - (uload16x4, &PMOVZXWD), - (uload32x2, &PMOVZXDQ), - (sload8x8, &PMOVSXBW), - (sload16x4, &PMOVSXWD), - (sload32x2, &PMOVSXDQ), - ] { - let isap = Some(use_sse41_simd); - for recipe in &[rec_fld, rec_fldDisp8, rec_fldDisp32] { - let inst = *inst; - let template = recipe.opcodes(*opcodes); - e.enc_both_inferred_maybe_isap(inst.clone().bind(I32), template.clone(), isap); - e.enc64_maybe_isap(inst.bind(I64), template.infer_rex(), isap); - } - } - - // SIMD load extend (complex addressing) - let is_load_complex_length_two = - InstructionPredicate::new_length_equals(&*formats.load_complex, 2); - for (inst, opcodes) in &[ - (uload8x8_complex, &PMOVZXBW), - (uload16x4_complex, &PMOVZXWD), - (uload32x2_complex, &PMOVZXDQ), - (sload8x8_complex, &PMOVSXBW), - (sload16x4_complex, &PMOVSXWD), - (sload32x2_complex, &PMOVSXDQ), - ] { - for recipe in &[ - rec_fldWithIndex, - rec_fldWithIndexDisp8, - rec_fldWithIndexDisp32, - ] { - let template = recipe.opcodes(*opcodes); - let predicate = |encoding: EncodingBuilder| { - encoding - .isa_predicate(use_sse41_simd) - .inst_predicate(is_load_complex_length_two.clone()) - }; - e.enc32_func(inst.clone(), template.clone(), predicate); - // No infer_rex calculator for these recipes; place REX version first as in enc_x86_64. - e.enc64_func(inst.clone(), template.rex(), predicate); - e.enc64_func(inst.clone(), template, predicate); - } - } - - // SIMD integer addition - for (ty, opcodes) in &[(I8, &PADDB), (I16, &PADDW), (I32, &PADDD), (I64, &PADDQ)] { - let iadd = iadd.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred(iadd, rec_fa.opcodes(*opcodes)); - } - - // SIMD integer saturating addition - e.enc_both_inferred( - sadd_sat.bind(vector(I8, sse_vector_size)), - rec_fa.opcodes(&PADDSB), - ); - e.enc_both_inferred( - sadd_sat.bind(vector(I16, sse_vector_size)), - rec_fa.opcodes(&PADDSW), - ); - e.enc_both_inferred( - uadd_sat.bind(vector(I8, sse_vector_size)), - rec_fa.opcodes(&PADDUSB), - ); - e.enc_both_inferred( - uadd_sat.bind(vector(I16, sse_vector_size)), - rec_fa.opcodes(&PADDUSW), - ); - - // SIMD integer subtraction - let isub = shared.by_name("isub"); - for (ty, opcodes) in &[(I8, &PSUBB), (I16, &PSUBW), (I32, &PSUBD), (I64, &PSUBQ)] { - let isub = isub.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred(isub, rec_fa.opcodes(*opcodes)); - } - - // SIMD integer saturating subtraction - e.enc_both_inferred( - ssub_sat.bind(vector(I8, sse_vector_size)), - rec_fa.opcodes(&PSUBSB), - ); - e.enc_both_inferred( - ssub_sat.bind(vector(I16, sse_vector_size)), - rec_fa.opcodes(&PSUBSW), - ); - e.enc_both_inferred( - usub_sat.bind(vector(I8, sse_vector_size)), - rec_fa.opcodes(&PSUBUSB), - ); - e.enc_both_inferred( - usub_sat.bind(vector(I16, sse_vector_size)), - rec_fa.opcodes(&PSUBUSW), - ); - - // SIMD integer multiplication: the x86 ISA does not have instructions for multiplying I8x16 - // and I64x2 and these are (at the time of writing) not necessary for WASM SIMD. - for (ty, opcodes, isap) in &[ - (I16, &PMULLW[..], None), - (I32, &PMULLD[..], Some(use_sse41_simd)), - ] { - let imul = imul.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred_maybe_isap(imul, rec_fa.opcodes(opcodes), *isap); - } - - // SIMD multiplication with lane expansion. - e.enc_both_inferred(x86_pmuludq, rec_fa.opcodes(&PMULUDQ)); - - // SIMD multiplication and add adjacent pairs, from SSE2. - e.enc_both_inferred(widening_pairwise_dot_product_s, rec_fa.opcodes(&PMADDWD)); - - // SIMD integer multiplication for I64x2 using a AVX512. - { - e.enc_32_64_maybe_isap( - x86_pmullq, - rec_evex_reg_vvvv_rm_128.opcodes(&VPMULLQ).w(), - Some(use_avx512dq_simd), // TODO need an OR predicate to join with AVX512VL - ); - } - - // SIMD integer average with rounding. - for (ty, opcodes) in &[(I8, &PAVGB[..]), (I16, &PAVGW[..])] { - let avgr = avg_round.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred(avgr, rec_fa.opcodes(opcodes)); - } - - // SIMD integer absolute value. - for (ty, opcodes) in &[(I8, &PABSB[..]), (I16, &PABSW[..]), (I32, &PABSD)] { - let iabs = iabs.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred_maybe_isap(iabs, rec_furm.opcodes(opcodes), Some(use_ssse3_simd)); - } - - // SIMD logical operations - let band = shared.by_name("band"); - let band_not = shared.by_name("band_not"); - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - // and - let band = band.bind(vector(ty, sse_vector_size)); - e.enc_both_inferred(band, rec_fa.opcodes(&PAND)); - - // and not (note flipped recipe operands to match band_not order) - let band_not = band_not.bind(vector(ty, sse_vector_size)); - e.enc_both_inferred(band_not, rec_fax.opcodes(&PANDN)); - - // or - let bor = bor.bind(vector(ty, sse_vector_size)); - e.enc_both_inferred(bor, rec_fa.opcodes(&POR)); - - // xor - let bxor = bxor.bind(vector(ty, sse_vector_size)); - e.enc_both_inferred(bxor, rec_fa.opcodes(&PXOR)); - - // ptest - let x86_ptest = x86_ptest.bind(vector(ty, sse_vector_size)); - e.enc_both_inferred_maybe_isap(x86_ptest, rec_fcmp.opcodes(&PTEST), Some(use_sse41_simd)); - } - - // SIMD bitcast from I32/I64 to the low bits of a vector (e.g. I64x2); this register movement - // allows SIMD shifts to be legalized more easily. TODO ideally this would be typed as an - // I128x1 but restrictions on the type builder prevent this; the general idea here is that - // the upper bits are all zeroed and do not form parts of any separate lane. See - // https://github.com/bytecodealliance/wasmtime/issues/1140. - e.enc_both_inferred( - bitcast.bind(vector(I64, sse_vector_size)).bind(I32), - rec_frurm.opcodes(&MOVD_LOAD_XMM), - ); - e.enc64( - bitcast.bind(vector(I64, sse_vector_size)).bind(I64), - rec_frurm.opcodes(&MOVD_LOAD_XMM).rex().w(), - ); - - // SIMD shift left - for (ty, opcodes) in &[(I16, &PSLLW), (I32, &PSLLD), (I64, &PSLLQ)] { - let x86_psll = x86_psll.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred(x86_psll, rec_fa.opcodes(*opcodes)); - } - - // SIMD shift right (logical) - for (ty, opcodes) in &[(I16, &PSRLW), (I32, &PSRLD), (I64, &PSRLQ)] { - let x86_psrl = x86_psrl.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred(x86_psrl, rec_fa.opcodes(*opcodes)); - } - - // SIMD shift right (arithmetic) - for (ty, opcodes) in &[(I16, &PSRAW), (I32, &PSRAD)] { - let x86_psra = x86_psra.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred(x86_psra, rec_fa.opcodes(*opcodes)); - } - - // SIMD immediate shift - for (ty, opcodes) in &[(I16, &PS_W_IMM), (I32, &PS_D_IMM), (I64, &PS_Q_IMM)] { - let ishl_imm = ishl_imm.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred(ishl_imm, rec_f_ib.opcodes(*opcodes).rrr(6)); - - let ushr_imm = ushr_imm.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred(ushr_imm, rec_f_ib.opcodes(*opcodes).rrr(2)); - - // One exception: PSRAQ does not exist in for 64x2 in SSE2, it requires a higher CPU feature set. - if *ty != I64 { - let sshr_imm = sshr_imm.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred(sshr_imm, rec_f_ib.opcodes(*opcodes).rrr(4)); - } - } - - // SIMD integer comparisons - { - use IntCC::*; - for (ty, cc, opcodes, isa_predicate) in &[ - (I8, Equal, &PCMPEQB[..], None), - (I16, Equal, &PCMPEQW[..], None), - (I32, Equal, &PCMPEQD[..], None), - (I64, Equal, &PCMPEQQ[..], Some(use_sse41_simd)), - (I8, SignedGreaterThan, &PCMPGTB[..], None), - (I16, SignedGreaterThan, &PCMPGTW[..], None), - (I32, SignedGreaterThan, &PCMPGTD[..], None), - (I64, SignedGreaterThan, &PCMPGTQ, Some(use_sse42_simd)), - ] { - let instruction = icmp - .bind(Immediate::IntCC(*cc)) - .bind(vector(*ty, sse_vector_size)); - let template = rec_icscc_fpr.opcodes(opcodes); - e.enc_both_inferred_maybe_isap(instruction, template, *isa_predicate); - } - } - - // SIMD min/max - for (ty, inst, opcodes, isa_predicate) in &[ - (I8, x86_pmaxs, &PMAXSB[..], Some(use_sse41_simd)), - (I16, x86_pmaxs, &PMAXSW[..], None), - (I32, x86_pmaxs, &PMAXSD[..], Some(use_sse41_simd)), - (I8, x86_pmaxu, &PMAXUB[..], None), - (I16, x86_pmaxu, &PMAXUW[..], Some(use_sse41_simd)), - (I32, x86_pmaxu, &PMAXUD[..], Some(use_sse41_simd)), - (I8, x86_pmins, &PMINSB[..], Some(use_sse41_simd)), - (I16, x86_pmins, &PMINSW[..], None), - (I32, x86_pmins, &PMINSD[..], Some(use_sse41_simd)), - (I8, x86_pminu, &PMINUB[..], None), - (I16, x86_pminu, &PMINUW[..], Some(use_sse41_simd)), - (I32, x86_pminu, &PMINUD[..], Some(use_sse41_simd)), - ] { - let inst = inst.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred_maybe_isap(inst, rec_fa.opcodes(opcodes), *isa_predicate); - } - - // SIMD float comparisons - e.enc_both_inferred( - fcmp.bind(vector(F32, sse_vector_size)), - rec_pfcmp.opcodes(&CMPPS), - ); - e.enc_both_inferred( - fcmp.bind(vector(F64, sse_vector_size)), - rec_pfcmp.opcodes(&CMPPD), - ); - - // SIMD float arithmetic - for (ty, inst, opcodes) in &[ - (F32, fadd, &ADDPS[..]), - (F64, fadd, &ADDPD[..]), - (F32, fsub, &SUBPS[..]), - (F64, fsub, &SUBPD[..]), - (F32, fmul, &MULPS[..]), - (F64, fmul, &MULPD[..]), - (F32, fdiv, &DIVPS[..]), - (F64, fdiv, &DIVPD[..]), - (F32, x86_fmin, &MINPS[..]), - (F64, x86_fmin, &MINPD[..]), - (F32, x86_fmax, &MAXPS[..]), - (F64, x86_fmax, &MAXPD[..]), - ] { - let inst = inst.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred(inst, rec_fa.opcodes(opcodes)); - } - for (ty, inst, opcodes) in &[(F32, sqrt, &SQRTPS[..]), (F64, sqrt, &SQRTPD[..])] { - let inst = inst.bind(vector(*ty, sse_vector_size)); - e.enc_both_inferred(inst, rec_furm.opcodes(opcodes)); - } -} - -#[inline(never)] -fn define_entity_ref( - e: &mut PerCpuModeEncodings, - shared_defs: &SharedDefinitions, - settings: &SettingGroup, - r: &RecipeGroup, -) { - let shared = &shared_defs.instructions; - let formats = &shared_defs.formats; - - // Shorthands for instructions. - let const_addr = shared.by_name("const_addr"); - let func_addr = shared.by_name("func_addr"); - let stack_addr = shared.by_name("stack_addr"); - let symbol_value = shared.by_name("symbol_value"); - - // Shorthands for recipes. - let rec_allones_fnaddr4 = r.template("allones_fnaddr4"); - let rec_allones_fnaddr8 = r.template("allones_fnaddr8"); - let rec_fnaddr4 = r.template("fnaddr4"); - let rec_fnaddr8 = r.template("fnaddr8"); - let rec_const_addr = r.template("const_addr"); - let rec_got_fnaddr8 = r.template("got_fnaddr8"); - let rec_got_gvaddr8 = r.template("got_gvaddr8"); - let rec_gvaddr4 = r.template("gvaddr4"); - let rec_gvaddr8 = r.template("gvaddr8"); - let rec_pcrel_fnaddr8 = r.template("pcrel_fnaddr8"); - let rec_pcrel_gvaddr8 = r.template("pcrel_gvaddr8"); - let rec_spaddr_id = r.template("spaddr_id"); - - // Predicates shorthands. - let all_ones_funcaddrs_and_not_is_pic = - settings.predicate_by_name("all_ones_funcaddrs_and_not_is_pic"); - let is_pic = settings.predicate_by_name("is_pic"); - let not_all_ones_funcaddrs_and_not_is_pic = - settings.predicate_by_name("not_all_ones_funcaddrs_and_not_is_pic"); - let not_is_pic = settings.predicate_by_name("not_is_pic"); - - // Function addresses. - - // Non-PIC, all-ones funcaddresses. - e.enc32_isap( - func_addr.bind(I32), - rec_fnaddr4.opcodes(&MOV_IMM), - not_all_ones_funcaddrs_and_not_is_pic, - ); - e.enc64_isap( - func_addr.bind(I64), - rec_fnaddr8.opcodes(&MOV_IMM).rex().w(), - not_all_ones_funcaddrs_and_not_is_pic, - ); - - // Non-PIC, all-zeros funcaddresses. - e.enc32_isap( - func_addr.bind(I32), - rec_allones_fnaddr4.opcodes(&MOV_IMM), - all_ones_funcaddrs_and_not_is_pic, - ); - e.enc64_isap( - func_addr.bind(I64), - rec_allones_fnaddr8.opcodes(&MOV_IMM).rex().w(), - all_ones_funcaddrs_and_not_is_pic, - ); - - // 64-bit, colocated, both PIC and non-PIC. Use the lea instruction's pc-relative field. - let is_colocated_func = - InstructionPredicate::new_is_colocated_func(&*formats.func_addr, "func_ref"); - e.enc64_instp( - func_addr.bind(I64), - rec_pcrel_fnaddr8.opcodes(&LEA).rex().w(), - is_colocated_func, - ); - - // 64-bit, non-colocated, PIC. - e.enc64_isap( - func_addr.bind(I64), - rec_got_fnaddr8.opcodes(&MOV_LOAD).rex().w(), - is_pic, - ); - - // Global addresses. - - // Non-PIC. - e.enc32_isap( - symbol_value.bind(I32), - rec_gvaddr4.opcodes(&MOV_IMM), - not_is_pic, - ); - e.enc64_isap( - symbol_value.bind(I64), - rec_gvaddr8.opcodes(&MOV_IMM).rex().w(), - not_is_pic, - ); - - // PIC, colocated. - e.enc64_func( - symbol_value.bind(I64), - rec_pcrel_gvaddr8.opcodes(&LEA).rex().w(), - |encoding| { - encoding - .isa_predicate(is_pic) - .inst_predicate(InstructionPredicate::new_is_colocated_data(formats)) - }, - ); - - // PIC, non-colocated. - e.enc64_isap( - symbol_value.bind(I64), - rec_got_gvaddr8.opcodes(&MOV_LOAD).rex().w(), - is_pic, - ); - - // Stack addresses. - // - // TODO: Add encoding rules for stack_load and stack_store, so that they - // don't get legalized to stack_addr + load/store. - e.enc64(stack_addr.bind(I64), rec_spaddr_id.opcodes(&LEA).rex().w()); - e.enc32(stack_addr.bind(I32), rec_spaddr_id.opcodes(&LEA)); - - // Constant addresses (PIC). - e.enc64(const_addr.bind(I64), rec_const_addr.opcodes(&LEA).rex().w()); - e.enc32(const_addr.bind(I32), rec_const_addr.opcodes(&LEA)); -} - -/// Control flow opcodes. -#[inline(never)] -fn define_control_flow( - e: &mut PerCpuModeEncodings, - shared_defs: &SharedDefinitions, - settings: &SettingGroup, - r: &RecipeGroup, -) { - let shared = &shared_defs.instructions; - let formats = &shared_defs.formats; - - // Shorthands for instructions. - let brff = shared.by_name("brff"); - let brif = shared.by_name("brif"); - let brnz = shared.by_name("brnz"); - let brz = shared.by_name("brz"); - let call = shared.by_name("call"); - let call_indirect = shared.by_name("call_indirect"); - let debugtrap = shared.by_name("debugtrap"); - let indirect_jump_table_br = shared.by_name("indirect_jump_table_br"); - let jump = shared.by_name("jump"); - let jump_table_base = shared.by_name("jump_table_base"); - let jump_table_entry = shared.by_name("jump_table_entry"); - let return_ = shared.by_name("return"); - let trap = shared.by_name("trap"); - let trapff = shared.by_name("trapff"); - let trapif = shared.by_name("trapif"); - let resumable_trap = shared.by_name("resumable_trap"); - - // Shorthands for recipes. - let rec_brfb = r.template("brfb"); - let rec_brfd = r.template("brfd"); - let rec_brib = r.template("brib"); - let rec_brid = r.template("brid"); - let rec_call_id = r.template("call_id"); - let rec_call_plt_id = r.template("call_plt_id"); - let rec_call_r = r.template("call_r"); - let rec_debugtrap = r.recipe("debugtrap"); - let rec_indirect_jmp = r.template("indirect_jmp"); - let rec_jmpb = r.template("jmpb"); - let rec_jmpd = r.template("jmpd"); - let rec_jt_base = r.template("jt_base"); - let rec_jt_entry = r.template("jt_entry"); - let rec_ret = r.template("ret"); - let rec_t8jccb_abcd = r.template("t8jccb_abcd"); - let rec_t8jccd_abcd = r.template("t8jccd_abcd"); - let rec_t8jccd_long = r.template("t8jccd_long"); - let rec_tjccb = r.template("tjccb"); - let rec_tjccd = r.template("tjccd"); - let rec_trap = r.template("trap"); - let rec_trapif = r.recipe("trapif"); - let rec_trapff = r.recipe("trapff"); - - // Predicates shorthands. - let is_pic = settings.predicate_by_name("is_pic"); - - // Call/return - - // 32-bit, both PIC and non-PIC. - e.enc32(call, rec_call_id.opcodes(&CALL_RELATIVE)); - - // 64-bit, colocated, both PIC and non-PIC. Use the call instruction's pc-relative field. - let is_colocated_func = InstructionPredicate::new_is_colocated_func(&*formats.call, "func_ref"); - e.enc64_instp(call, rec_call_id.opcodes(&CALL_RELATIVE), is_colocated_func); - - // 64-bit, non-colocated, PIC. There is no 64-bit non-colocated non-PIC version, since non-PIC - // is currently using the large model, which requires calls be lowered to - // func_addr+call_indirect. - e.enc64_isap(call, rec_call_plt_id.opcodes(&CALL_RELATIVE), is_pic); - - e.enc32( - call_indirect.bind(I32), - rec_call_r.opcodes(&JUMP_ABSOLUTE).rrr(2), - ); - e.enc64( - call_indirect.bind(I64), - rec_call_r.opcodes(&JUMP_ABSOLUTE).rrr(2).rex(), - ); - e.enc64( - call_indirect.bind(I64), - rec_call_r.opcodes(&JUMP_ABSOLUTE).rrr(2), - ); - - e.enc32(return_, rec_ret.opcodes(&RET_NEAR)); - e.enc64(return_, rec_ret.opcodes(&RET_NEAR)); - - // Branches. - e.enc32(jump, rec_jmpb.opcodes(&JUMP_SHORT)); - e.enc64(jump, rec_jmpb.opcodes(&JUMP_SHORT)); - e.enc32(jump, rec_jmpd.opcodes(&JUMP_NEAR_RELATIVE)); - e.enc64(jump, rec_jmpd.opcodes(&JUMP_NEAR_RELATIVE)); - - e.enc_both(brif, rec_brib.opcodes(&JUMP_SHORT_IF_OVERFLOW)); - e.enc_both(brif, rec_brid.opcodes(&JUMP_NEAR_IF_OVERFLOW)); - - // Not all float condition codes are legal, see `supported_floatccs`. - e.enc_both(brff, rec_brfb.opcodes(&JUMP_SHORT_IF_OVERFLOW)); - e.enc_both(brff, rec_brfd.opcodes(&JUMP_NEAR_IF_OVERFLOW)); - - // Note that the tjccd opcode will be prefixed with 0x0f. - e.enc_i32_i64_explicit_rex(brz, rec_tjccb.opcodes(&JUMP_SHORT_IF_EQUAL)); - e.enc_i32_i64_explicit_rex(brz, rec_tjccd.opcodes(&TEST_BYTE_REG)); - e.enc_i32_i64_explicit_rex(brnz, rec_tjccb.opcodes(&JUMP_SHORT_IF_NOT_EQUAL)); - e.enc_i32_i64_explicit_rex(brnz, rec_tjccd.opcodes(&TEST_REG)); - - // Branch on a b1 value in a register only looks at the low 8 bits. See also - // bint encodings below. - // - // Start with the worst-case encoding for X86_32 only. The register allocator - // can't handle a branch with an ABCD-constrained operand. - e.enc32(brz.bind(B1), rec_t8jccd_long.opcodes(&TEST_BYTE_REG)); - e.enc32(brnz.bind(B1), rec_t8jccd_long.opcodes(&TEST_REG)); - - e.enc_both(brz.bind(B1), rec_t8jccb_abcd.opcodes(&JUMP_SHORT_IF_EQUAL)); - e.enc_both(brz.bind(B1), rec_t8jccd_abcd.opcodes(&TEST_BYTE_REG)); - e.enc_both( - brnz.bind(B1), - rec_t8jccb_abcd.opcodes(&JUMP_SHORT_IF_NOT_EQUAL), - ); - e.enc_both(brnz.bind(B1), rec_t8jccd_abcd.opcodes(&TEST_REG)); - - // Jump tables. - e.enc64( - jump_table_entry.bind(I64), - rec_jt_entry.opcodes(&MOVSXD).rex().w(), - ); - e.enc32(jump_table_entry.bind(I32), rec_jt_entry.opcodes(&MOV_LOAD)); - - e.enc64( - jump_table_base.bind(I64), - rec_jt_base.opcodes(&LEA).rex().w(), - ); - e.enc32(jump_table_base.bind(I32), rec_jt_base.opcodes(&LEA)); - - e.enc_x86_64( - indirect_jump_table_br.bind(I64), - rec_indirect_jmp.opcodes(&JUMP_ABSOLUTE).rrr(4), - ); - e.enc32( - indirect_jump_table_br.bind(I32), - rec_indirect_jmp.opcodes(&JUMP_ABSOLUTE).rrr(4), - ); - - // Trap as ud2 - e.enc32(trap, rec_trap.opcodes(&UNDEFINED2)); - e.enc64(trap, rec_trap.opcodes(&UNDEFINED2)); - e.enc32(resumable_trap, rec_trap.opcodes(&UNDEFINED2)); - e.enc64(resumable_trap, rec_trap.opcodes(&UNDEFINED2)); - - // Debug trap as int3 - e.enc32_rec(debugtrap, rec_debugtrap, 0); - e.enc64_rec(debugtrap, rec_debugtrap, 0); - - e.enc32_rec(trapif, rec_trapif, 0); - e.enc64_rec(trapif, rec_trapif, 0); - e.enc32_rec(trapff, rec_trapff, 0); - e.enc64_rec(trapff, rec_trapff, 0); -} - -/// Reference type instructions. -#[inline(never)] -fn define_reftypes(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r: &RecipeGroup) { - let shared = &shared_defs.instructions; - - let is_null = shared.by_name("is_null"); - let is_invalid = shared.by_name("is_invalid"); - let null = shared.by_name("null"); - let safepoint = shared.by_name("safepoint"); - - let rec_is_zero = r.template("is_zero"); - let rec_is_invalid = r.template("is_invalid"); - let rec_pu_id_ref = r.template("pu_id_ref"); - let rec_safepoint = r.recipe("safepoint"); - - // Null references implemented as iconst 0. - e.enc32(null.bind(R32), rec_pu_id_ref.opcodes(&MOV_IMM)); - - e.enc64(null.bind(R64), rec_pu_id_ref.rex().opcodes(&MOV_IMM)); - e.enc64(null.bind(R64), rec_pu_id_ref.opcodes(&MOV_IMM)); - - // is_null, implemented by testing whether the value is 0. - e.enc_r32_r64_rex_only(is_null, rec_is_zero.opcodes(&TEST_REG)); - - // is_invalid, implemented by testing whether the value is -1. - e.enc_r32_r64_rex_only(is_invalid, rec_is_invalid.opcodes(&CMP_IMM8).rrr(7)); - - // safepoint instruction calls sink, no actual encoding. - e.enc32_rec(safepoint, rec_safepoint, 0); - e.enc64_rec(safepoint, rec_safepoint, 0); -} - -#[allow(clippy::cognitive_complexity)] -pub(crate) fn define( - shared_defs: &SharedDefinitions, - settings: &SettingGroup, - x86: &InstructionGroup, - r: &RecipeGroup, -) -> PerCpuModeEncodings { - // Definitions. - let mut e = PerCpuModeEncodings::new(); - - define_moves(&mut e, shared_defs, r); - define_memory(&mut e, shared_defs, x86, r); - define_fpu_moves(&mut e, shared_defs, r); - define_fpu_memory(&mut e, shared_defs, r); - define_fpu_ops(&mut e, shared_defs, settings, x86, r); - define_alu(&mut e, shared_defs, settings, x86, r); - define_simd(&mut e, shared_defs, settings, x86, r); - define_entity_ref(&mut e, shared_defs, settings, r); - define_control_flow(&mut e, shared_defs, settings, r); - define_reftypes(&mut e, shared_defs, r); - - let x86_elf_tls_get_addr = x86.by_name("x86_elf_tls_get_addr"); - let x86_macho_tls_get_addr = x86.by_name("x86_macho_tls_get_addr"); - - let rec_elf_tls_get_addr = r.recipe("elf_tls_get_addr"); - let rec_macho_tls_get_addr = r.recipe("macho_tls_get_addr"); - - e.enc64_rec(x86_elf_tls_get_addr, rec_elf_tls_get_addr, 0); - e.enc64_rec(x86_macho_tls_get_addr, rec_macho_tls_get_addr, 0); - - e -} diff --git a/cranelift/codegen/meta/src/isa/x86/instructions.rs b/cranelift/codegen/meta/src/isa/x86/instructions.rs deleted file mode 100644 index 7acd2e2c50..0000000000 --- a/cranelift/codegen/meta/src/isa/x86/instructions.rs +++ /dev/null @@ -1,723 +0,0 @@ -#![allow(non_snake_case)] - -use crate::cdsl::instructions::{ - AllInstructions, InstructionBuilder as Inst, InstructionGroup, InstructionGroupBuilder, -}; -use crate::cdsl::operands::Operand; -use crate::cdsl::types::ValueType; -use crate::cdsl::typevar::{Interval, TypeSetBuilder, TypeVar}; -use crate::shared::entities::EntityRefs; -use crate::shared::formats::Formats; -use crate::shared::immediates::Immediates; -use crate::shared::types; - -#[allow(clippy::many_single_char_names)] -pub(crate) fn define( - mut all_instructions: &mut AllInstructions, - formats: &Formats, - immediates: &Immediates, - entities: &EntityRefs, -) -> InstructionGroup { - let mut ig = InstructionGroupBuilder::new(&mut all_instructions); - - let iflags: &TypeVar = &ValueType::Special(types::Flag::IFlags.into()).into(); - - let iWord = &TypeVar::new( - "iWord", - "A scalar integer machine word", - TypeSetBuilder::new().ints(32..64).build(), - ); - let nlo = &Operand::new("nlo", iWord).with_doc("Low part of numerator"); - let nhi = &Operand::new("nhi", iWord).with_doc("High part of numerator"); - let d = &Operand::new("d", iWord).with_doc("Denominator"); - let q = &Operand::new("q", iWord).with_doc("Quotient"); - let r = &Operand::new("r", iWord).with_doc("Remainder"); - - ig.push( - Inst::new( - "x86_udivmodx", - r#" - Extended unsigned division. - - Concatenate the bits in `nhi` and `nlo` to form the numerator. - Interpret the bits as an unsigned number and divide by the unsigned - denominator `d`. Trap when `d` is zero or if the quotient is larger - than the range of the output. - - Return both quotient and remainder. - "#, - &formats.ternary, - ) - .operands_in(vec![nlo, nhi, d]) - .operands_out(vec![q, r]) - .can_trap(true), - ); - - ig.push( - Inst::new( - "x86_sdivmodx", - r#" - Extended signed division. - - Concatenate the bits in `nhi` and `nlo` to form the numerator. - Interpret the bits as a signed number and divide by the signed - denominator `d`. Trap when `d` is zero or if the quotient is outside - the range of the output. - - Return both quotient and remainder. - "#, - &formats.ternary, - ) - .operands_in(vec![nlo, nhi, d]) - .operands_out(vec![q, r]) - .can_trap(true), - ); - - let argL = &Operand::new("argL", iWord); - let argR = &Operand::new("argR", iWord); - let resLo = &Operand::new("resLo", iWord); - let resHi = &Operand::new("resHi", iWord); - - ig.push( - Inst::new( - "x86_umulx", - r#" - Unsigned integer multiplication, producing a double-length result. - - Polymorphic over all scalar integer types, but does not support vector - types. - "#, - &formats.binary, - ) - .operands_in(vec![argL, argR]) - .operands_out(vec![resLo, resHi]), - ); - - ig.push( - Inst::new( - "x86_smulx", - r#" - Signed integer multiplication, producing a double-length result. - - Polymorphic over all scalar integer types, but does not support vector - types. - "#, - &formats.binary, - ) - .operands_in(vec![argL, argR]) - .operands_out(vec![resLo, resHi]), - ); - - let Float = &TypeVar::new( - "Float", - "A scalar or vector floating point number", - TypeSetBuilder::new() - .floats(Interval::All) - .simd_lanes(Interval::All) - .build(), - ); - let IntTo = &TypeVar::new( - "IntTo", - "An integer type with the same number of lanes", - TypeSetBuilder::new() - .ints(32..64) - .simd_lanes(Interval::All) - .build(), - ); - let x = &Operand::new("x", Float); - let a = &Operand::new("a", IntTo); - - ig.push( - Inst::new( - "x86_cvtt2si", - r#" - Convert with truncation floating point to signed integer. - - The source floating point operand is converted to a signed integer by - rounding towards zero. If the result can't be represented in the output - type, returns the smallest signed value the output type can represent. - - This instruction does not trap. - "#, - &formats.unary, - ) - .operands_in(vec![x]) - .operands_out(vec![a]), - ); - - let f32x4 = &TypeVar::new( - "f32x4", - "A floating point number", - TypeSetBuilder::new() - .floats(32..32) - .simd_lanes(4..4) - .build(), - ); - let i32x4 = &TypeVar::new( - "i32x4", - "An integer type with the same number of lanes", - TypeSetBuilder::new().ints(32..32).simd_lanes(4..4).build(), - ); - let x = &Operand::new("x", i32x4); - let a = &Operand::new("a", f32x4); - - ig.push( - Inst::new( - "x86_vcvtudq2ps", - r#" - Convert unsigned integer to floating point. - - Convert packed doubleword unsigned integers to packed single-precision floating-point - values. This instruction does not trap. - "#, - &formats.unary, - ) - .operands_in(vec![x]) - .operands_out(vec![a]), - ); - - let x = &Operand::new("x", Float); - let a = &Operand::new("a", Float); - let y = &Operand::new("y", Float); - - ig.push( - Inst::new( - "x86_fmin", - r#" - Floating point minimum with x86 semantics. - - This is equivalent to the C ternary operator `x < y ? x : y` which - differs from `fmin` when either operand is NaN or when comparing - +0.0 to -0.0. - - When the two operands don't compare as LT, `y` is returned unchanged, - even if it is a signalling NaN. - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - ig.push( - Inst::new( - "x86_fmax", - r#" - Floating point maximum with x86 semantics. - - This is equivalent to the C ternary operator `x > y ? x : y` which - differs from `fmax` when either operand is NaN or when comparing - +0.0 to -0.0. - - When the two operands don't compare as GT, `y` is returned unchanged, - even if it is a signalling NaN. - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - let x = &Operand::new("x", iWord); - - ig.push( - Inst::new( - "x86_push", - r#" - Pushes a value onto the stack. - - Decrements the stack pointer and stores the specified value on to the top. - - This is polymorphic in i32 and i64. However, it is only implemented for i64 - in 64-bit mode, and only for i32 in 32-bit mode. - "#, - &formats.unary, - ) - .operands_in(vec![x]) - .other_side_effects(true) - .can_store(true), - ); - - ig.push( - Inst::new( - "x86_pop", - r#" - Pops a value from the stack. - - Loads a value from the top of the stack and then increments the stack - pointer. - - This is polymorphic in i32 and i64. However, it is only implemented for i64 - in 64-bit mode, and only for i32 in 32-bit mode. - "#, - &formats.nullary, - ) - .operands_out(vec![x]) - .other_side_effects(true) - .can_load(true), - ); - - let y = &Operand::new("y", iWord); - let rflags = &Operand::new("rflags", iflags); - - ig.push( - Inst::new( - "x86_bsr", - r#" - Bit Scan Reverse -- returns the bit-index of the most significant 1 - in the word. Result is undefined if the argument is zero. However, it - sets the Z flag depending on the argument, so it is at least easy to - detect and handle that case. - - This is polymorphic in i32 and i64. It is implemented for both i64 and - i32 in 64-bit mode, and only for i32 in 32-bit mode. - "#, - &formats.unary, - ) - .operands_in(vec![x]) - .operands_out(vec![y, rflags]), - ); - - ig.push( - Inst::new( - "x86_bsf", - r#" - Bit Scan Forwards -- returns the bit-index of the least significant 1 - in the word. Is otherwise identical to 'bsr', just above. - "#, - &formats.unary, - ) - .operands_in(vec![x]) - .operands_out(vec![y, rflags]), - ); - - let uimm8 = &immediates.uimm8; - let TxN = &TypeVar::new( - "TxN", - "A SIMD vector type", - TypeSetBuilder::new() - .ints(Interval::All) - .floats(Interval::All) - .bools(Interval::All) - .simd_lanes(Interval::All) - .includes_scalars(false) - .build(), - ); - let a = &Operand::new("a", TxN).with_doc("A vector value (i.e. held in an XMM register)"); - let b = &Operand::new("b", TxN).with_doc("A vector value (i.e. held in an XMM register)"); - let i = &Operand::new("i", uimm8).with_doc("An ordering operand controlling the copying of data from the source to the destination; see PSHUFD in Intel manual for details"); - - ig.push( - Inst::new( - "x86_pshufd", - r#" - Packed Shuffle Doublewords -- copies data from either memory or lanes in an extended - register and re-orders the data according to the passed immediate byte. - "#, - &formats.binary_imm8, - ) - .operands_in(vec![a, i]) // TODO allow copying from memory here (need more permissive type than TxN) - .operands_out(vec![a]), - ); - - ig.push( - Inst::new( - "x86_pshufb", - r#" - Packed Shuffle Bytes -- re-orders data in an extended register using a shuffle - mask from either memory or another extended register - "#, - &formats.binary, - ) - .operands_in(vec![a, b]) // TODO allow re-ordering from memory here (need more permissive type than TxN) - .operands_out(vec![a]), - ); - - let mask = &Operand::new("mask", uimm8).with_doc("mask to select lanes from b"); - ig.push( - Inst::new( - "x86_pblendw", - r#" - Blend packed words using an immediate mask. Each bit of the 8-bit immediate corresponds to a - lane in ``b``: if the bit is set, the lane is copied into ``a``. - "#, - &formats.ternary_imm8, - ) - .operands_in(vec![a, b, mask]) - .operands_out(vec![a]), - ); - - let Idx = &Operand::new("Idx", uimm8).with_doc("Lane index"); - let x = &Operand::new("x", TxN); - let a = &Operand::new("a", &TxN.lane_of()); - - ig.push( - Inst::new( - "x86_pextr", - r#" - Extract lane ``Idx`` from ``x``. - The lane index, ``Idx``, is an immediate value, not an SSA value. It - must indicate a valid lane index for the type of ``x``. - "#, - &formats.binary_imm8, - ) - .operands_in(vec![x, Idx]) - .operands_out(vec![a]), - ); - - let IBxN = &TypeVar::new( - "IBxN", - "A SIMD vector type containing only booleans and integers", - TypeSetBuilder::new() - .ints(Interval::All) - .bools(Interval::All) - .simd_lanes(Interval::All) - .includes_scalars(false) - .build(), - ); - let x = &Operand::new("x", IBxN); - let y = &Operand::new("y", &IBxN.lane_of()).with_doc("New lane value"); - let a = &Operand::new("a", IBxN); - - ig.push( - Inst::new( - "x86_pinsr", - r#" - Insert ``y`` into ``x`` at lane ``Idx``. - The lane index, ``Idx``, is an immediate value, not an SSA value. It - must indicate a valid lane index for the type of ``x``. - "#, - &formats.ternary_imm8, - ) - .operands_in(vec![x, y, Idx]) - .operands_out(vec![a]), - ); - - let FxN = &TypeVar::new( - "FxN", - "A SIMD vector type containing floats", - TypeSetBuilder::new() - .floats(Interval::All) - .simd_lanes(Interval::All) - .includes_scalars(false) - .build(), - ); - let x = &Operand::new("x", FxN); - let y = &Operand::new("y", &FxN.lane_of()).with_doc("New lane value"); - let a = &Operand::new("a", FxN); - - ig.push( - Inst::new( - "x86_insertps", - r#" - Insert a lane of ``y`` into ``x`` at using ``Idx`` to encode both which lane the value is - extracted from and which it is inserted to. This is similar to x86_pinsr but inserts - floats, which are already stored in an XMM register. - "#, - &formats.ternary_imm8, - ) - .operands_in(vec![x, y, Idx]) - .operands_out(vec![a]), - ); - - let x = &Operand::new("x", TxN); - let y = &Operand::new("y", TxN); - let a = &Operand::new("a", TxN); - - ig.push( - Inst::new( - "x86_punpckh", - r#" - Unpack the high-order lanes of ``x`` and ``y`` and interleave into ``a``. With notional - i8x4 vectors, where ``x = [x3, x2, x1, x0]`` and ``y = [y3, y2, y1, y0]``, this operation - would result in ``a = [y3, x3, y2, x2]`` (using the Intel manual's right-to-left lane - ordering). - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - ig.push( - Inst::new( - "x86_punpckl", - r#" - Unpack the low-order lanes of ``x`` and ``y`` and interleave into ``a``. With notional - i8x4 vectors, where ``x = [x3, x2, x1, x0]`` and ``y = [y3, y2, y1, y0]``, this operation - would result in ``a = [y1, x1, y0, x0]`` (using the Intel manual's right-to-left lane - ordering). - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - let x = &Operand::new("x", FxN); - let y = &Operand::new("y", FxN); - let a = &Operand::new("a", FxN); - - ig.push( - Inst::new( - "x86_movsd", - r#" - Move the low 64 bits of the float vector ``y`` to the low 64 bits of float vector ``x`` - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - ig.push( - Inst::new( - "x86_movlhps", - r#" - Move the low 64 bits of the float vector ``y`` to the high 64 bits of float vector ``x`` - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - let IxN = &TypeVar::new( - "IxN", - "A SIMD vector type containing integers", - TypeSetBuilder::new() - .ints(Interval::All) - .simd_lanes(Interval::All) - .includes_scalars(false) - .build(), - ); - let I128 = &TypeVar::new( - "I128", - "A SIMD vector type containing one large integer (due to Cranelift type constraints, \ - this uses the Cranelift I64X2 type but should be understood as one large value, i.e., the \ - upper lane is concatenated with the lower lane to form the integer)", - TypeSetBuilder::new() - .ints(64..64) - .simd_lanes(2..2) - .includes_scalars(false) - .build(), - ); - - let x = &Operand::new("x", IxN).with_doc("Vector value to shift"); - let y = &Operand::new("y", I128).with_doc("Number of bits to shift"); - let a = &Operand::new("a", IxN); - - ig.push( - Inst::new( - "x86_psll", - r#" - Shift Packed Data Left Logical -- This implements the behavior of the shared instruction - ``ishl`` but alters the shift operand to live in an XMM register as expected by the PSLL* - family of instructions. - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - ig.push( - Inst::new( - "x86_psrl", - r#" - Shift Packed Data Right Logical -- This implements the behavior of the shared instruction - ``ushr`` but alters the shift operand to live in an XMM register as expected by the PSRL* - family of instructions. - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - ig.push( - Inst::new( - "x86_psra", - r#" - Shift Packed Data Right Arithmetic -- This implements the behavior of the shared - instruction ``sshr`` but alters the shift operand to live in an XMM register as expected by - the PSRA* family of instructions. - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - let I64x2 = &TypeVar::new( - "I64x2", - "A SIMD vector type containing two 64-bit integers", - TypeSetBuilder::new() - .ints(64..64) - .simd_lanes(2..2) - .includes_scalars(false) - .build(), - ); - - let x = &Operand::new("x", I64x2); - let y = &Operand::new("y", I64x2); - let a = &Operand::new("a", I64x2); - ig.push( - Inst::new( - "x86_pmullq", - r#" - Multiply Packed Integers -- Multiply two 64x2 integers and receive a 64x2 result with - lane-wise wrapping if the result overflows. This instruction is necessary to add distinct - encodings for CPUs with newer vector features. - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - ig.push( - Inst::new( - "x86_pmuludq", - r#" - Multiply Packed Integers -- Using only the bottom 32 bits in each lane, multiply two 64x2 - unsigned integers and receive a 64x2 result. This instruction avoids the need for handling - overflow as in `x86_pmullq`. - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - let x = &Operand::new("x", TxN); - let y = &Operand::new("y", TxN); - let f = &Operand::new("f", iflags); - ig.push( - Inst::new( - "x86_ptest", - r#" - Logical Compare -- PTEST will set the ZF flag if all bits in the result are 0 of the - bitwise AND of the first source operand (first operand) and the second source operand - (second operand). PTEST sets the CF flag if all bits in the result are 0 of the bitwise - AND of the second source operand (second operand) and the logical NOT of the destination - operand (first operand). - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![f]), - ); - - let x = &Operand::new("x", IxN); - let y = &Operand::new("y", IxN); - let a = &Operand::new("a", IxN); - ig.push( - Inst::new( - "x86_pmaxs", - r#" - Maximum of Packed Signed Integers -- Compare signed integers in the first and second - operand and return the maximum values. - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - ig.push( - Inst::new( - "x86_pmaxu", - r#" - Maximum of Packed Unsigned Integers -- Compare unsigned integers in the first and second - operand and return the maximum values. - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - ig.push( - Inst::new( - "x86_pmins", - r#" - Minimum of Packed Signed Integers -- Compare signed integers in the first and second - operand and return the minimum values. - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - ig.push( - Inst::new( - "x86_pminu", - r#" - Minimum of Packed Unsigned Integers -- Compare unsigned integers in the first and second - operand and return the minimum values. - "#, - &formats.binary, - ) - .operands_in(vec![x, y]) - .operands_out(vec![a]), - ); - - let c = &Operand::new("c", uimm8) - .with_doc("The number of bytes to shift right; see PALIGNR in Intel manual for details"); - ig.push( - Inst::new( - "x86_palignr", - r#" - Concatenate destination and source operands, extracting a byte-aligned result shifted to - the right by `c`. - "#, - &formats.ternary_imm8, - ) - .operands_in(vec![x, y, c]) - .operands_out(vec![a]), - ); - - let i64_t = &TypeVar::new( - "i64_t", - "A scalar 64bit integer", - TypeSetBuilder::new().ints(64..64).build(), - ); - - let GV = &Operand::new("GV", &entities.global_value); - let addr = &Operand::new("addr", i64_t); - - ig.push( - Inst::new( - "x86_elf_tls_get_addr", - r#" - Elf tls get addr -- This implements the GD TLS model for ELF. The clobber output should - not be used. - "#, - &formats.unary_global_value, - ) - // This is a bit overly broad to mark as clobbering *all* the registers, because it should - // only preserve caller-saved registers. There's no way to indicate this to register - // allocation yet, though, so mark as clobbering all registers instead. - .clobbers_all_regs(true) - .operands_in(vec![GV]) - .operands_out(vec![addr]), - ); - ig.push( - Inst::new( - "x86_macho_tls_get_addr", - r#" - Mach-O tls get addr -- This implements TLS access for Mach-O. The clobber output should - not be used. - "#, - &formats.unary_global_value, - ) - // See above comment for x86_elf_tls_get_addr. - .clobbers_all_regs(true) - .operands_in(vec![GV]) - .operands_out(vec![addr]), - ); - - ig.build() -} diff --git a/cranelift/codegen/meta/src/isa/x86/legalize.rs b/cranelift/codegen/meta/src/isa/x86/legalize.rs deleted file mode 100644 index de78c3b3b7..0000000000 --- a/cranelift/codegen/meta/src/isa/x86/legalize.rs +++ /dev/null @@ -1,827 +0,0 @@ -use crate::cdsl::ast::{constant, var, ExprBuilder, Literal}; -use crate::cdsl::instructions::{vector, Bindable, InstructionGroup}; -use crate::cdsl::types::{LaneType, ValueType}; -use crate::cdsl::xform::TransformGroupBuilder; -use crate::shared::types::Float::{F32, F64}; -use crate::shared::types::Int::{I16, I32, I64, I8}; -use crate::shared::Definitions as SharedDefinitions; - -#[allow(clippy::many_single_char_names)] -pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &InstructionGroup) { - let mut expand = TransformGroupBuilder::new( - "x86_expand", - r#" - Legalize instructions by expansion. - - Use x86-specific instructions if needed."#, - ) - .isa("x86") - .chain_with(shared.transform_groups.by_name("expand_flags").id); - - let mut narrow = TransformGroupBuilder::new( - "x86_narrow", - r#" - Legalize instructions by narrowing. - - Use x86-specific instructions if needed."#, - ) - .isa("x86") - .chain_with(shared.transform_groups.by_name("narrow_flags").id); - - let mut narrow_avx = TransformGroupBuilder::new( - "x86_narrow_avx", - r#" - Legalize instructions by narrowing with CPU feature checks. - - This special case converts using x86 AVX instructions where available."#, - ) - .isa("x86"); - // We cannot chain with the x86_narrow group until this group is built, see bottom of this - // function for where this is chained. - - let mut widen = TransformGroupBuilder::new( - "x86_widen", - r#" - Legalize instructions by widening. - - Use x86-specific instructions if needed."#, - ) - .isa("x86") - .chain_with(shared.transform_groups.by_name("widen").id); - - // List of instructions. - let insts = &shared.instructions; - let band = insts.by_name("band"); - let bor = insts.by_name("bor"); - let clz = insts.by_name("clz"); - let ctz = insts.by_name("ctz"); - let fcmp = insts.by_name("fcmp"); - let fcvt_from_uint = insts.by_name("fcvt_from_uint"); - let fcvt_to_sint = insts.by_name("fcvt_to_sint"); - let fcvt_to_uint = insts.by_name("fcvt_to_uint"); - let fcvt_to_sint_sat = insts.by_name("fcvt_to_sint_sat"); - let fcvt_to_uint_sat = insts.by_name("fcvt_to_uint_sat"); - let fmax = insts.by_name("fmax"); - let fmin = insts.by_name("fmin"); - let iadd = insts.by_name("iadd"); - let iconst = insts.by_name("iconst"); - let imul = insts.by_name("imul"); - let ineg = insts.by_name("ineg"); - let isub = insts.by_name("isub"); - let ishl = insts.by_name("ishl"); - let ireduce = insts.by_name("ireduce"); - let popcnt = insts.by_name("popcnt"); - let sdiv = insts.by_name("sdiv"); - let selectif = insts.by_name("selectif"); - let smulhi = insts.by_name("smulhi"); - let srem = insts.by_name("srem"); - let tls_value = insts.by_name("tls_value"); - let udiv = insts.by_name("udiv"); - let umulhi = insts.by_name("umulhi"); - let ushr = insts.by_name("ushr"); - let ushr_imm = insts.by_name("ushr_imm"); - let urem = insts.by_name("urem"); - - let x86_bsf = x86_instructions.by_name("x86_bsf"); - let x86_bsr = x86_instructions.by_name("x86_bsr"); - let x86_umulx = x86_instructions.by_name("x86_umulx"); - let x86_smulx = x86_instructions.by_name("x86_smulx"); - - let imm = &shared.imm; - - // Shift by a 64-bit amount is equivalent to a shift by that amount mod 32, so we can reduce - // the size of the shift amount. This is useful for x86_32, where an I64 shift amount is - // not encodable. - let a = var("a"); - let x = var("x"); - let y = var("y"); - let z = var("z"); - - for &ty in &[I8, I16, I32] { - let ishl_by_i64 = ishl.bind(ty).bind(I64); - let ireduce = ireduce.bind(I32); - expand.legalize( - def!(a = ishl_by_i64(x, y)), - vec![def!(z = ireduce(y)), def!(a = ishl(x, z))], - ); - } - - for &ty in &[I8, I16, I32] { - let ushr_by_i64 = ushr.bind(ty).bind(I64); - let ireduce = ireduce.bind(I32); - expand.legalize( - def!(a = ushr_by_i64(x, y)), - vec![def!(z = ireduce(y)), def!(a = ishl(x, z))], - ); - } - - // Division and remainder. - // - // The srem expansion requires custom code because srem INT_MIN, -1 is not - // allowed to trap. The other ops need to check avoid_div_traps. - expand.custom_legalize(sdiv, "expand_sdivrem"); - expand.custom_legalize(srem, "expand_sdivrem"); - expand.custom_legalize(udiv, "expand_udivrem"); - expand.custom_legalize(urem, "expand_udivrem"); - - // Double length (widening) multiplication. - let a = var("a"); - let x = var("x"); - let y = var("y"); - let a1 = var("a1"); - let a2 = var("a2"); - let res_lo = var("res_lo"); - let res_hi = var("res_hi"); - - expand.legalize( - def!(res_hi = umulhi(x, y)), - vec![def!((res_lo, res_hi) = x86_umulx(x, y))], - ); - - expand.legalize( - def!(res_hi = smulhi(x, y)), - vec![def!((res_lo, res_hi) = x86_smulx(x, y))], - ); - - // Floating point condition codes. - // - // The 8 condition codes in `supported_floatccs` are directly supported by a - // `ucomiss` or `ucomisd` instruction. The remaining codes need legalization - // patterns. - - let floatcc_eq = Literal::enumerator_for(&imm.floatcc, "eq"); - let floatcc_ord = Literal::enumerator_for(&imm.floatcc, "ord"); - let floatcc_ueq = Literal::enumerator_for(&imm.floatcc, "ueq"); - let floatcc_ne = Literal::enumerator_for(&imm.floatcc, "ne"); - let floatcc_uno = Literal::enumerator_for(&imm.floatcc, "uno"); - let floatcc_one = Literal::enumerator_for(&imm.floatcc, "one"); - - // Equality needs an explicit `ord` test which checks the parity bit. - expand.legalize( - def!(a = fcmp(floatcc_eq, x, y)), - vec![ - def!(a1 = fcmp(floatcc_ord, x, y)), - def!(a2 = fcmp(floatcc_ueq, x, y)), - def!(a = band(a1, a2)), - ], - ); - expand.legalize( - def!(a = fcmp(floatcc_ne, x, y)), - vec![ - def!(a1 = fcmp(floatcc_uno, x, y)), - def!(a2 = fcmp(floatcc_one, x, y)), - def!(a = bor(a1, a2)), - ], - ); - - let floatcc_lt = &Literal::enumerator_for(&imm.floatcc, "lt"); - let floatcc_gt = &Literal::enumerator_for(&imm.floatcc, "gt"); - let floatcc_le = &Literal::enumerator_for(&imm.floatcc, "le"); - let floatcc_ge = &Literal::enumerator_for(&imm.floatcc, "ge"); - let floatcc_ugt = &Literal::enumerator_for(&imm.floatcc, "ugt"); - let floatcc_ult = &Literal::enumerator_for(&imm.floatcc, "ult"); - let floatcc_uge = &Literal::enumerator_for(&imm.floatcc, "uge"); - let floatcc_ule = &Literal::enumerator_for(&imm.floatcc, "ule"); - - // Inequalities that need to be reversed. - for &(cc, rev_cc) in &[ - (floatcc_lt, floatcc_gt), - (floatcc_le, floatcc_ge), - (floatcc_ugt, floatcc_ult), - (floatcc_uge, floatcc_ule), - ] { - expand.legalize(def!(a = fcmp(cc, x, y)), vec![def!(a = fcmp(rev_cc, y, x))]); - } - - // We need to modify the CFG for min/max legalization. - expand.custom_legalize(fmin, "expand_minmax"); - expand.custom_legalize(fmax, "expand_minmax"); - - // Conversions from unsigned need special handling. - expand.custom_legalize(fcvt_from_uint, "expand_fcvt_from_uint"); - // Conversions from float to int can trap and modify the control flow graph. - expand.custom_legalize(fcvt_to_sint, "expand_fcvt_to_sint"); - expand.custom_legalize(fcvt_to_uint, "expand_fcvt_to_uint"); - expand.custom_legalize(fcvt_to_sint_sat, "expand_fcvt_to_sint_sat"); - expand.custom_legalize(fcvt_to_uint_sat, "expand_fcvt_to_uint_sat"); - - // Count leading and trailing zeroes, for baseline x86_64 - let c_minus_one = var("c_minus_one"); - let c_thirty_one = var("c_thirty_one"); - let c_thirty_two = var("c_thirty_two"); - let c_sixty_three = var("c_sixty_three"); - let c_sixty_four = var("c_sixty_four"); - let index1 = var("index1"); - let r2flags = var("r2flags"); - let index2 = var("index2"); - - let intcc_eq = Literal::enumerator_for(&imm.intcc, "eq"); - let imm64_minus_one = Literal::constant(&imm.imm64, -1); - let imm64_63 = Literal::constant(&imm.imm64, 63); - expand.legalize( - def!(a = clz.I64(x)), - vec![ - def!(c_minus_one = iconst(imm64_minus_one)), - def!(c_sixty_three = iconst(imm64_63)), - def!((index1, r2flags) = x86_bsr(x)), - def!(index2 = selectif(intcc_eq, r2flags, c_minus_one, index1)), - def!(a = isub(c_sixty_three, index2)), - ], - ); - - let imm64_31 = Literal::constant(&imm.imm64, 31); - expand.legalize( - def!(a = clz.I32(x)), - vec![ - def!(c_minus_one = iconst(imm64_minus_one)), - def!(c_thirty_one = iconst(imm64_31)), - def!((index1, r2flags) = x86_bsr(x)), - def!(index2 = selectif(intcc_eq, r2flags, c_minus_one, index1)), - def!(a = isub(c_thirty_one, index2)), - ], - ); - - let imm64_64 = Literal::constant(&imm.imm64, 64); - expand.legalize( - def!(a = ctz.I64(x)), - vec![ - def!(c_sixty_four = iconst(imm64_64)), - def!((index1, r2flags) = x86_bsf(x)), - def!(a = selectif(intcc_eq, r2flags, c_sixty_four, index1)), - ], - ); - - let imm64_32 = Literal::constant(&imm.imm64, 32); - expand.legalize( - def!(a = ctz.I32(x)), - vec![ - def!(c_thirty_two = iconst(imm64_32)), - def!((index1, r2flags) = x86_bsf(x)), - def!(a = selectif(intcc_eq, r2flags, c_thirty_two, index1)), - ], - ); - - // Population count for baseline x86_64 - let x = var("x"); - let r = var("r"); - - let qv3 = var("qv3"); - let qv4 = var("qv4"); - let qv5 = var("qv5"); - let qv6 = var("qv6"); - let qv7 = var("qv7"); - let qv8 = var("qv8"); - let qv9 = var("qv9"); - let qv10 = var("qv10"); - let qv11 = var("qv11"); - let qv12 = var("qv12"); - let qv13 = var("qv13"); - let qv14 = var("qv14"); - let qv15 = var("qv15"); - let qc77 = var("qc77"); - #[allow(non_snake_case)] - let qc0F = var("qc0F"); - let qc01 = var("qc01"); - - let imm64_1 = Literal::constant(&imm.imm64, 1); - let imm64_4 = Literal::constant(&imm.imm64, 4); - expand.legalize( - def!(r = popcnt.I64(x)), - vec![ - def!(qv3 = ushr_imm(x, imm64_1)), - def!(qc77 = iconst(Literal::constant(&imm.imm64, 0x7777_7777_7777_7777))), - def!(qv4 = band(qv3, qc77)), - def!(qv5 = isub(x, qv4)), - def!(qv6 = ushr_imm(qv4, imm64_1)), - def!(qv7 = band(qv6, qc77)), - def!(qv8 = isub(qv5, qv7)), - def!(qv9 = ushr_imm(qv7, imm64_1)), - def!(qv10 = band(qv9, qc77)), - def!(qv11 = isub(qv8, qv10)), - def!(qv12 = ushr_imm(qv11, imm64_4)), - def!(qv13 = iadd(qv11, qv12)), - def!(qc0F = iconst(Literal::constant(&imm.imm64, 0x0F0F_0F0F_0F0F_0F0F))), - def!(qv14 = band(qv13, qc0F)), - def!(qc01 = iconst(Literal::constant(&imm.imm64, 0x0101_0101_0101_0101))), - def!(qv15 = imul(qv14, qc01)), - def!(r = ushr_imm(qv15, Literal::constant(&imm.imm64, 56))), - ], - ); - - let lv3 = var("lv3"); - let lv4 = var("lv4"); - let lv5 = var("lv5"); - let lv6 = var("lv6"); - let lv7 = var("lv7"); - let lv8 = var("lv8"); - let lv9 = var("lv9"); - let lv10 = var("lv10"); - let lv11 = var("lv11"); - let lv12 = var("lv12"); - let lv13 = var("lv13"); - let lv14 = var("lv14"); - let lv15 = var("lv15"); - let lc77 = var("lc77"); - #[allow(non_snake_case)] - let lc0F = var("lc0F"); - let lc01 = var("lc01"); - - expand.legalize( - def!(r = popcnt.I32(x)), - vec![ - def!(lv3 = ushr_imm(x, imm64_1)), - def!(lc77 = iconst(Literal::constant(&imm.imm64, 0x7777_7777))), - def!(lv4 = band(lv3, lc77)), - def!(lv5 = isub(x, lv4)), - def!(lv6 = ushr_imm(lv4, imm64_1)), - def!(lv7 = band(lv6, lc77)), - def!(lv8 = isub(lv5, lv7)), - def!(lv9 = ushr_imm(lv7, imm64_1)), - def!(lv10 = band(lv9, lc77)), - def!(lv11 = isub(lv8, lv10)), - def!(lv12 = ushr_imm(lv11, imm64_4)), - def!(lv13 = iadd(lv11, lv12)), - def!(lc0F = iconst(Literal::constant(&imm.imm64, 0x0F0F_0F0F))), - def!(lv14 = band(lv13, lc0F)), - def!(lc01 = iconst(Literal::constant(&imm.imm64, 0x0101_0101))), - def!(lv15 = imul(lv14, lc01)), - def!(r = ushr_imm(lv15, Literal::constant(&imm.imm64, 24))), - ], - ); - - expand.custom_legalize(ineg, "convert_ineg"); - expand.custom_legalize(tls_value, "expand_tls_value"); - widen.custom_legalize(ineg, "convert_ineg"); - - // To reduce compilation times, separate out large blocks of legalizations by theme. - define_simd(shared, x86_instructions, &mut narrow, &mut narrow_avx); - - expand.build_and_add_to(&mut shared.transform_groups); - let narrow_id = narrow.build_and_add_to(&mut shared.transform_groups); - narrow_avx - .chain_with(narrow_id) - .build_and_add_to(&mut shared.transform_groups); - widen.build_and_add_to(&mut shared.transform_groups); -} - -fn define_simd( - shared: &mut SharedDefinitions, - x86_instructions: &InstructionGroup, - narrow: &mut TransformGroupBuilder, - narrow_avx: &mut TransformGroupBuilder, -) { - let insts = &shared.instructions; - let band = insts.by_name("band"); - let band_not = insts.by_name("band_not"); - let bitcast = insts.by_name("bitcast"); - let bitselect = insts.by_name("bitselect"); - let bor = insts.by_name("bor"); - let bnot = insts.by_name("bnot"); - let bxor = insts.by_name("bxor"); - let extractlane = insts.by_name("extractlane"); - let fabs = insts.by_name("fabs"); - let fcmp = insts.by_name("fcmp"); - let fcvt_from_uint = insts.by_name("fcvt_from_uint"); - let fcvt_to_sint_sat = insts.by_name("fcvt_to_sint_sat"); - let fcvt_to_uint_sat = insts.by_name("fcvt_to_uint_sat"); - let fmax = insts.by_name("fmax"); - let fmin = insts.by_name("fmin"); - let fneg = insts.by_name("fneg"); - let iadd_imm = insts.by_name("iadd_imm"); - let icmp = insts.by_name("icmp"); - let imax = insts.by_name("imax"); - let imin = insts.by_name("imin"); - let imul = insts.by_name("imul"); - let ineg = insts.by_name("ineg"); - let insertlane = insts.by_name("insertlane"); - let ishl = insts.by_name("ishl"); - let ishl_imm = insts.by_name("ishl_imm"); - let raw_bitcast = insts.by_name("raw_bitcast"); - let scalar_to_vector = insts.by_name("scalar_to_vector"); - let splat = insts.by_name("splat"); - let shuffle = insts.by_name("shuffle"); - let sshr = insts.by_name("sshr"); - let swizzle = insts.by_name("swizzle"); - let trueif = insts.by_name("trueif"); - let uadd_sat = insts.by_name("uadd_sat"); - let umax = insts.by_name("umax"); - let umin = insts.by_name("umin"); - let snarrow = insts.by_name("snarrow"); - let swiden_high = insts.by_name("swiden_high"); - let swiden_low = insts.by_name("swiden_low"); - let ushr_imm = insts.by_name("ushr_imm"); - let ushr = insts.by_name("ushr"); - let uwiden_high = insts.by_name("uwiden_high"); - let uwiden_low = insts.by_name("uwiden_low"); - let vconst = insts.by_name("vconst"); - let vall_true = insts.by_name("vall_true"); - let vany_true = insts.by_name("vany_true"); - let vselect = insts.by_name("vselect"); - - let x86_palignr = x86_instructions.by_name("x86_palignr"); - let x86_pmaxs = x86_instructions.by_name("x86_pmaxs"); - let x86_pmaxu = x86_instructions.by_name("x86_pmaxu"); - let x86_pmins = x86_instructions.by_name("x86_pmins"); - let x86_pminu = x86_instructions.by_name("x86_pminu"); - let x86_pshufb = x86_instructions.by_name("x86_pshufb"); - let x86_pshufd = x86_instructions.by_name("x86_pshufd"); - let x86_psra = x86_instructions.by_name("x86_psra"); - let x86_ptest = x86_instructions.by_name("x86_ptest"); - let x86_punpckh = x86_instructions.by_name("x86_punpckh"); - let x86_punpckl = x86_instructions.by_name("x86_punpckl"); - - let imm = &shared.imm; - - // Set up variables and immediates. - let uimm8_zero = Literal::constant(&imm.uimm8, 0x00); - let uimm8_one = Literal::constant(&imm.uimm8, 0x01); - let uimm8_eight = Literal::constant(&imm.uimm8, 8); - let u128_zeroes = constant(vec![0x00; 16]); - let u128_ones = constant(vec![0xff; 16]); - let u128_seventies = constant(vec![0x70; 16]); - let a = var("a"); - let b = var("b"); - let c = var("c"); - let d = var("d"); - let e = var("e"); - let f = var("f"); - let g = var("g"); - let h = var("h"); - let x = var("x"); - let y = var("y"); - let z = var("z"); - - // Limit the SIMD vector size: eventually multiple vector sizes may be supported - // but for now only SSE-sized vectors are available. - let sse_vector_size: u64 = 128; - let allowed_simd_type = |t: &LaneType| t.lane_bits() >= 8 && t.lane_bits() < 128; - - // SIMD splat: 8-bits - for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 8) { - let splat_any8x16 = splat.bind(vector(ty, sse_vector_size)); - narrow.legalize( - def!(y = splat_any8x16(x)), - vec![ - // Move into the lowest 8 bits of an XMM register. - def!(a = scalar_to_vector(x)), - // Zero out a different XMM register; the shuffle mask for moving the lowest byte - // to all other byte lanes is 0x0. - def!(b = vconst(u128_zeroes)), - // PSHUFB takes two XMM operands, one of which is a shuffle mask (i.e. b). - def!(y = x86_pshufb(a, b)), - ], - ); - } - - // SIMD splat: 16-bits - for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 16) { - let splat_x16x8 = splat.bind(vector(ty, sse_vector_size)); - let raw_bitcast_any16x8_to_i32x4 = raw_bitcast - .bind(vector(I32, sse_vector_size)) - .bind(vector(ty, sse_vector_size)); - let raw_bitcast_i32x4_to_any16x8 = raw_bitcast - .bind(vector(ty, sse_vector_size)) - .bind(vector(I32, sse_vector_size)); - narrow.legalize( - def!(y = splat_x16x8(x)), - vec![ - // Move into the lowest 16 bits of an XMM register. - def!(a = scalar_to_vector(x)), - // Insert the value again but in the next lowest 16 bits. - def!(b = insertlane(a, x, uimm8_one)), - // No instruction emitted; pretend this is an I32x4 so we can use PSHUFD. - def!(c = raw_bitcast_any16x8_to_i32x4(b)), - // Broadcast the bytes in the XMM register with PSHUFD. - def!(d = x86_pshufd(c, uimm8_zero)), - // No instruction emitted; pretend this is an X16x8 again. - def!(y = raw_bitcast_i32x4_to_any16x8(d)), - ], - ); - } - - // SIMD splat: 32-bits - for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 32) { - let splat_any32x4 = splat.bind(vector(ty, sse_vector_size)); - narrow.legalize( - def!(y = splat_any32x4(x)), - vec![ - // Translate to an x86 MOV to get the value in an XMM register. - def!(a = scalar_to_vector(x)), - // Broadcast the bytes in the XMM register with PSHUFD. - def!(y = x86_pshufd(a, uimm8_zero)), - ], - ); - } - - // SIMD splat: 64-bits - for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 64) { - let splat_any64x2 = splat.bind(vector(ty, sse_vector_size)); - narrow.legalize( - def!(y = splat_any64x2(x)), - vec![ - // Move into the lowest 64 bits of an XMM register. - def!(a = scalar_to_vector(x)), - // Move into the highest 64 bits of the same XMM register. - def!(y = insertlane(a, x, uimm8_one)), - ], - ); - } - - // SIMD swizzle; the following inefficient implementation is due to the Wasm SIMD spec requiring - // mask indexes greater than 15 to have the same semantics as a 0 index. For the spec discussion, - // see https://github.com/WebAssembly/simd/issues/93. - { - let swizzle = swizzle.bind(vector(I8, sse_vector_size)); - narrow.legalize( - def!(a = swizzle(x, y)), - vec![ - def!(b = vconst(u128_seventies)), - def!(c = uadd_sat(y, b)), - def!(a = x86_pshufb(x, c)), - ], - ); - } - - // SIMD bnot - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - let bnot = bnot.bind(vector(ty, sse_vector_size)); - narrow.legalize( - def!(y = bnot(x)), - vec![def!(a = vconst(u128_ones)), def!(y = bxor(a, x))], - ); - } - - // SIMD shift right (arithmetic, i16x8 and i32x4) - for ty in &[I16, I32] { - let sshr = sshr.bind(vector(*ty, sse_vector_size)); - let bitcast_i64x2 = bitcast.bind(vector(I64, sse_vector_size)); - narrow.legalize( - def!(a = sshr(x, y)), - vec![def!(b = bitcast_i64x2(y)), def!(a = x86_psra(x, b))], - ); - } - // SIMD shift right (arithmetic, i8x16) - { - let sshr = sshr.bind(vector(I8, sse_vector_size)); - let bitcast_i64x2 = bitcast.bind(vector(I64, sse_vector_size)); - let raw_bitcast_i16x8 = raw_bitcast.bind(vector(I16, sse_vector_size)); - let raw_bitcast_i16x8_again = raw_bitcast.bind(vector(I16, sse_vector_size)); - narrow.legalize( - def!(z = sshr(x, y)), - vec![ - // Since we will use the high byte of each 16x8 lane, shift an extra 8 bits. - def!(a = iadd_imm(y, uimm8_eight)), - def!(b = bitcast_i64x2(a)), - // Take the low 8 bytes of x, duplicate them in 16x8 lanes, then shift right. - def!(c = x86_punpckl(x, x)), - def!(d = raw_bitcast_i16x8(c)), - def!(e = x86_psra(d, b)), - // Take the high 8 bytes of x, duplicate them in 16x8 lanes, then shift right. - def!(f = x86_punpckh(x, x)), - def!(g = raw_bitcast_i16x8_again(f)), - def!(h = x86_psra(g, b)), - // Re-pack the vector. - def!(z = snarrow(e, h)), - ], - ); - } - // SIMD shift right (arithmetic, i64x2) - { - let sshr_vector = sshr.bind(vector(I64, sse_vector_size)); - let sshr_scalar_lane0 = sshr.bind(I64); - let sshr_scalar_lane1 = sshr.bind(I64); - narrow.legalize( - def!(z = sshr_vector(x, y)), - vec![ - // Use scalar operations to shift the first lane. - def!(a = extractlane(x, uimm8_zero)), - def!(b = sshr_scalar_lane0(a, y)), - def!(c = insertlane(x, b, uimm8_zero)), - // Do the same for the second lane. - def!(d = extractlane(x, uimm8_one)), - def!(e = sshr_scalar_lane1(d, y)), - def!(z = insertlane(c, e, uimm8_one)), - ], - ); - } - - // SIMD select - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - let bitselect = bitselect.bind(vector(ty, sse_vector_size)); // must bind both x/y and c - narrow.legalize( - def!(d = bitselect(c, x, y)), - vec![ - def!(a = band(x, c)), - def!(b = band_not(y, c)), - def!(d = bor(a, b)), - ], - ); - } - - // SIMD vselect; replace with bitselect if BLEND* instructions are not available. - // This works, because each lane of boolean vector is filled with zeroes or ones. - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - let vselect = vselect.bind(vector(ty, sse_vector_size)); - let raw_bitcast = raw_bitcast.bind(vector(ty, sse_vector_size)); - narrow.legalize( - def!(d = vselect(c, x, y)), - vec![def!(a = raw_bitcast(c)), def!(d = bitselect(a, x, y))], - ); - } - - // SIMD vany_true - let ne = Literal::enumerator_for(&imm.intcc, "ne"); - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - let vany_true = vany_true.bind(vector(ty, sse_vector_size)); - narrow.legalize( - def!(y = vany_true(x)), - vec![def!(a = x86_ptest(x, x)), def!(y = trueif(ne, a))], - ); - } - - // SIMD vall_true - let eq = Literal::enumerator_for(&imm.intcc, "eq"); - for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - let vall_true = vall_true.bind(vector(ty, sse_vector_size)); - if ty.is_int() { - // In the common case (Wasm's integer-only all_true), we do not require a - // bitcast. - narrow.legalize( - def!(y = vall_true(x)), - vec![ - def!(a = vconst(u128_zeroes)), - def!(c = icmp(eq, x, a)), - def!(d = x86_ptest(c, c)), - def!(y = trueif(eq, d)), - ], - ); - } else { - // However, to support other types we must bitcast them to an integer vector to - // use icmp. - let lane_type_as_int = LaneType::int_from_bits(ty.lane_bits() as u16); - let raw_bitcast_to_int = raw_bitcast.bind(vector(lane_type_as_int, sse_vector_size)); - narrow.legalize( - def!(y = vall_true(x)), - vec![ - def!(a = vconst(u128_zeroes)), - def!(b = raw_bitcast_to_int(x)), - def!(c = icmp(eq, b, a)), - def!(d = x86_ptest(c, c)), - def!(y = trueif(eq, d)), - ], - ); - } - } - - // SIMD icmp ne - let ne = Literal::enumerator_for(&imm.intcc, "ne"); - for ty in ValueType::all_lane_types().filter(|ty| allowed_simd_type(ty) && ty.is_int()) { - let icmp_ = icmp.bind(vector(ty, sse_vector_size)); - narrow.legalize( - def!(c = icmp_(ne, a, b)), - vec![def!(x = icmp(eq, a, b)), def!(c = bnot(x))], - ); - } - - // SIMD icmp greater-/less-than - let sgt = Literal::enumerator_for(&imm.intcc, "sgt"); - let ugt = Literal::enumerator_for(&imm.intcc, "ugt"); - let sge = Literal::enumerator_for(&imm.intcc, "sge"); - let uge = Literal::enumerator_for(&imm.intcc, "uge"); - let slt = Literal::enumerator_for(&imm.intcc, "slt"); - let ult = Literal::enumerator_for(&imm.intcc, "ult"); - let sle = Literal::enumerator_for(&imm.intcc, "sle"); - let ule = Literal::enumerator_for(&imm.intcc, "ule"); - for ty in &[I8, I16, I32] { - // greater-than - let icmp_ = icmp.bind(vector(*ty, sse_vector_size)); - narrow.legalize( - def!(c = icmp_(ugt, a, b)), - vec![ - def!(x = x86_pmaxu(a, b)), - def!(y = icmp(eq, x, b)), - def!(c = bnot(y)), - ], - ); - let icmp_ = icmp.bind(vector(*ty, sse_vector_size)); - narrow.legalize( - def!(c = icmp_(sge, a, b)), - vec![def!(x = x86_pmins(a, b)), def!(c = icmp(eq, x, b))], - ); - let icmp_ = icmp.bind(vector(*ty, sse_vector_size)); - narrow.legalize( - def!(c = icmp_(uge, a, b)), - vec![def!(x = x86_pminu(a, b)), def!(c = icmp(eq, x, b))], - ); - - // less-than - let icmp_ = icmp.bind(vector(*ty, sse_vector_size)); - narrow.legalize(def!(c = icmp_(slt, a, b)), vec![def!(c = icmp(sgt, b, a))]); - let icmp_ = icmp.bind(vector(*ty, sse_vector_size)); - narrow.legalize(def!(c = icmp_(ult, a, b)), vec![def!(c = icmp(ugt, b, a))]); - let icmp_ = icmp.bind(vector(*ty, sse_vector_size)); - narrow.legalize(def!(c = icmp_(sle, a, b)), vec![def!(c = icmp(sge, b, a))]); - let icmp_ = icmp.bind(vector(*ty, sse_vector_size)); - narrow.legalize(def!(c = icmp_(ule, a, b)), vec![def!(c = icmp(uge, b, a))]); - } - - // SIMD integer min/max - for ty in &[I8, I16, I32] { - let imin = imin.bind(vector(*ty, sse_vector_size)); - narrow.legalize(def!(c = imin(a, b)), vec![def!(c = x86_pmins(a, b))]); - let umin = umin.bind(vector(*ty, sse_vector_size)); - narrow.legalize(def!(c = umin(a, b)), vec![def!(c = x86_pminu(a, b))]); - let imax = imax.bind(vector(*ty, sse_vector_size)); - narrow.legalize(def!(c = imax(a, b)), vec![def!(c = x86_pmaxs(a, b))]); - let umax = umax.bind(vector(*ty, sse_vector_size)); - narrow.legalize(def!(c = umax(a, b)), vec![def!(c = x86_pmaxu(a, b))]); - } - - // SIMD fcmp greater-/less-than - let gt = Literal::enumerator_for(&imm.floatcc, "gt"); - let lt = Literal::enumerator_for(&imm.floatcc, "lt"); - let ge = Literal::enumerator_for(&imm.floatcc, "ge"); - let le = Literal::enumerator_for(&imm.floatcc, "le"); - let ugt = Literal::enumerator_for(&imm.floatcc, "ugt"); - let ult = Literal::enumerator_for(&imm.floatcc, "ult"); - let uge = Literal::enumerator_for(&imm.floatcc, "uge"); - let ule = Literal::enumerator_for(&imm.floatcc, "ule"); - for ty in &[F32, F64] { - let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size)); - narrow.legalize(def!(c = fcmp_(gt, a, b)), vec![def!(c = fcmp(lt, b, a))]); - let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size)); - narrow.legalize(def!(c = fcmp_(ge, a, b)), vec![def!(c = fcmp(le, b, a))]); - let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size)); - narrow.legalize(def!(c = fcmp_(ult, a, b)), vec![def!(c = fcmp(ugt, b, a))]); - let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size)); - narrow.legalize(def!(c = fcmp_(ule, a, b)), vec![def!(c = fcmp(uge, b, a))]); - } - - for ty in &[F32, F64] { - let fneg = fneg.bind(vector(*ty, sse_vector_size)); - let lane_type_as_int = LaneType::int_from_bits(LaneType::from(*ty).lane_bits() as u16); - let uimm8_shift = Literal::constant(&imm.uimm8, lane_type_as_int.lane_bits() as i64 - 1); - let vconst = vconst.bind(vector(lane_type_as_int, sse_vector_size)); - let bitcast_to_float = raw_bitcast.bind(vector(*ty, sse_vector_size)); - narrow.legalize( - def!(b = fneg(a)), - vec![ - def!(c = vconst(u128_ones)), - def!(d = ishl_imm(c, uimm8_shift)), // Create a mask of all 0s except the MSB. - def!(e = bitcast_to_float(d)), // Cast mask to the floating-point type. - def!(b = bxor(a, e)), // Flip the MSB. - ], - ); - } - - // SIMD fabs - for ty in &[F32, F64] { - let fabs = fabs.bind(vector(*ty, sse_vector_size)); - let lane_type_as_int = LaneType::int_from_bits(LaneType::from(*ty).lane_bits() as u16); - let vconst = vconst.bind(vector(lane_type_as_int, sse_vector_size)); - let bitcast_to_float = raw_bitcast.bind(vector(*ty, sse_vector_size)); - narrow.legalize( - def!(b = fabs(a)), - vec![ - def!(c = vconst(u128_ones)), - def!(d = ushr_imm(c, uimm8_one)), // Create a mask of all 1s except the MSB. - def!(e = bitcast_to_float(d)), // Cast mask to the floating-point type. - def!(b = band(a, e)), // Unset the MSB. - ], - ); - } - - // SIMD widen - for ty in &[I8, I16] { - let swiden_high = swiden_high.bind(vector(*ty, sse_vector_size)); - narrow.legalize( - def!(b = swiden_high(a)), - vec![ - def!(c = x86_palignr(a, a, uimm8_eight)), - def!(b = swiden_low(c)), - ], - ); - let uwiden_high = uwiden_high.bind(vector(*ty, sse_vector_size)); - narrow.legalize( - def!(b = uwiden_high(a)), - vec![ - def!(c = x86_palignr(a, a, uimm8_eight)), - def!(b = uwiden_low(c)), - ], - ); - } - - narrow.custom_legalize(shuffle, "convert_shuffle"); - narrow.custom_legalize(extractlane, "convert_extractlane"); - narrow.custom_legalize(insertlane, "convert_insertlane"); - narrow.custom_legalize(ineg, "convert_ineg"); - narrow.custom_legalize(ushr, "convert_ushr"); - narrow.custom_legalize(ishl, "convert_ishl"); - narrow.custom_legalize(fcvt_to_sint_sat, "expand_fcvt_to_sint_sat_vector"); - narrow.custom_legalize(fmin, "expand_minmax_vector"); - narrow.custom_legalize(fmax, "expand_minmax_vector"); - - narrow_avx.custom_legalize(imul, "convert_i64x2_imul"); - narrow_avx.custom_legalize(fcvt_from_uint, "expand_fcvt_from_uint_vector"); - narrow_avx.custom_legalize(fcvt_to_uint_sat, "expand_fcvt_to_uint_sat_vector"); -} diff --git a/cranelift/codegen/meta/src/isa/x86/mod.rs b/cranelift/codegen/meta/src/isa/x86/mod.rs index 26c833a77f..7c3e4c6877 100644 --- a/cranelift/codegen/meta/src/isa/x86/mod.rs +++ b/cranelift/codegen/meta/src/isa/x86/mod.rs @@ -1,87 +1,25 @@ -use crate::cdsl::cpu_modes::CpuMode; +use crate::cdsl::instructions::{InstructionGroupBuilder, InstructionPredicateMap}; use crate::cdsl::isa::TargetIsa; -use crate::cdsl::types::{ReferenceType, VectorType}; +use crate::cdsl::recipes::Recipes; +use crate::cdsl::regs::IsaRegsBuilder; -use crate::shared::types::Bool::B1; -use crate::shared::types::Float::{F32, F64}; -use crate::shared::types::Int::{I16, I32, I64, I8}; -use crate::shared::types::Reference::{R32, R64}; use crate::shared::Definitions as SharedDefinitions; -mod encodings; -mod instructions; -mod legalize; -mod opcodes; -mod recipes; -mod registers; pub(crate) mod settings; pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa { let settings = settings::define(&shared_defs.settings); - let regs = registers::define(); - let inst_group = instructions::define( - &mut shared_defs.all_instructions, - &shared_defs.formats, - &shared_defs.imm, - &shared_defs.entities, - ); - legalize::define(shared_defs, &inst_group); + let inst_group = InstructionGroupBuilder::new(&mut shared_defs.all_instructions).build(); - // CPU modes for 32-bit and 64-bit operations. - let mut x86_64 = CpuMode::new("I64"); - let mut x86_32 = CpuMode::new("I32"); - - let expand_flags = shared_defs.transform_groups.by_name("expand_flags"); - let x86_widen = shared_defs.transform_groups.by_name("x86_widen"); - let x86_narrow = shared_defs.transform_groups.by_name("x86_narrow"); - let x86_narrow_avx = shared_defs.transform_groups.by_name("x86_narrow_avx"); - let x86_expand = shared_defs.transform_groups.by_name("x86_expand"); - - x86_32.legalize_monomorphic(expand_flags); - x86_32.legalize_default(x86_narrow); - x86_32.legalize_type(B1, expand_flags); - x86_32.legalize_type(I8, x86_widen); - x86_32.legalize_type(I16, x86_widen); - x86_32.legalize_type(I32, x86_expand); - x86_32.legalize_value_type(ReferenceType(R32), x86_expand); - x86_32.legalize_type(F32, x86_expand); - x86_32.legalize_type(F64, x86_expand); - x86_32.legalize_value_type(VectorType::new(I32.into(), 4), x86_narrow_avx); - x86_32.legalize_value_type(VectorType::new(I64.into(), 2), x86_narrow_avx); - x86_32.legalize_value_type(VectorType::new(F32.into(), 4), x86_narrow_avx); - - x86_64.legalize_monomorphic(expand_flags); - x86_64.legalize_default(x86_narrow); - x86_64.legalize_type(B1, expand_flags); - x86_64.legalize_type(I8, x86_widen); - x86_64.legalize_type(I16, x86_widen); - x86_64.legalize_type(I32, x86_expand); - x86_64.legalize_type(I64, x86_expand); - x86_64.legalize_value_type(ReferenceType(R64), x86_expand); - x86_64.legalize_type(F32, x86_expand); - x86_64.legalize_type(F64, x86_expand); - x86_64.legalize_value_type(VectorType::new(I32.into(), 4), x86_narrow_avx); - x86_64.legalize_value_type(VectorType::new(I64.into(), 2), x86_narrow_avx); - x86_64.legalize_value_type(VectorType::new(F32.into(), 4), x86_narrow_avx); - - let recipes = recipes::define(shared_defs, &settings, ®s); - - let encodings = encodings::define(shared_defs, &settings, &inst_group, &recipes); - x86_32.set_encodings(encodings.enc32); - x86_64.set_encodings(encodings.enc64); - let encodings_predicates = encodings.inst_pred_reg.extract(); - - let recipes = encodings.recipes; - - let cpu_modes = vec![x86_64, x86_32]; + let cpu_modes = vec![]; TargetIsa::new( "x86", settings, - regs, - recipes, + IsaRegsBuilder::new().build(), + Recipes::new(), cpu_modes, - encodings_predicates, + InstructionPredicateMap::new(), ) } diff --git a/cranelift/codegen/meta/src/isa/x86/opcodes.rs b/cranelift/codegen/meta/src/isa/x86/opcodes.rs deleted file mode 100644 index 2e72a1744d..0000000000 --- a/cranelift/codegen/meta/src/isa/x86/opcodes.rs +++ /dev/null @@ -1,721 +0,0 @@ -//! Static, named definitions of instruction opcodes. - -/// Empty opcode for use as a default. -pub static EMPTY: [u8; 0] = []; - -/// Add with carry flag r{16,32,64} to r/m of the same size. -pub static ADC: [u8; 1] = [0x11]; - -/// Add r{16,32,64} to r/m of the same size. -pub static ADD: [u8; 1] = [0x01]; - -/// Add imm{16,32} to r/m{16,32,64}, possibly sign-extended. -pub static ADD_IMM: [u8; 1] = [0x81]; - -/// Add sign-extended imm8 to r/m{16,32,64}. -pub static ADD_IMM8_SIGN_EXTEND: [u8; 1] = [0x83]; - -/// Add packed double-precision floating-point values from xmm2/mem to xmm1 and store result in -/// xmm1 (SSE2). -pub static ADDPD: [u8; 3] = [0x66, 0x0f, 0x58]; - -/// Add packed single-precision floating-point values from xmm2/mem to xmm1 and store result in -/// xmm1 (SSE). -pub static ADDPS: [u8; 2] = [0x0f, 0x58]; - -/// Add the low double-precision floating-point value from xmm2/mem to xmm1 -/// and store the result in xmm1. -pub static ADDSD: [u8; 3] = [0xf2, 0x0f, 0x58]; - -/// Add the low single-precision floating-point value from xmm2/mem to xmm1 -/// and store the result in xmm1. -pub static ADDSS: [u8; 3] = [0xf3, 0x0f, 0x58]; - -/// r/m{16,32,64} AND register of the same size (Intel docs have a typo). -pub static AND: [u8; 1] = [0x21]; - -/// imm{16,32} AND r/m{16,32,64}, possibly sign-extended. -pub static AND_IMM: [u8; 1] = [0x81]; - -/// r/m{16,32,64} AND sign-extended imm8. -pub static AND_IMM8_SIGN_EXTEND: [u8; 1] = [0x83]; - -/// Return the bitwise logical AND NOT of packed single-precision floating-point -/// values in xmm1 and xmm2/mem. -pub static ANDNPS: [u8; 2] = [0x0f, 0x55]; - -/// Return the bitwise logical AND of packed single-precision floating-point values -/// in xmm1 and xmm2/mem. -pub static ANDPS: [u8; 2] = [0x0f, 0x54]; - -/// Bit scan forward (stores index of first encountered 1 from the front). -pub static BIT_SCAN_FORWARD: [u8; 2] = [0x0f, 0xbc]; - -/// Bit scan reverse (stores index of first encountered 1 from the back). -pub static BIT_SCAN_REVERSE: [u8; 2] = [0x0f, 0xbd]; - -/// Select packed single-precision floating-point values from xmm1 and xmm2/m128 -/// from mask specified in XMM0 and store the values into xmm1 (SSE4.1). -pub static BLENDVPS: [u8; 4] = [0x66, 0x0f, 0x38, 0x14]; - -/// Select packed double-precision floating-point values from xmm1 and xmm2/m128 -/// from mask specified in XMM0 and store the values into xmm1 (SSE4.1). -pub static BLENDVPD: [u8; 4] = [0x66, 0x0f, 0x38, 0x15]; - -/// Call near, relative, displacement relative to next instruction (sign-extended). -pub static CALL_RELATIVE: [u8; 1] = [0xe8]; - -/// Move r/m{16,32,64} if overflow (OF=1). -pub static CMOV_OVERFLOW: [u8; 2] = [0x0f, 0x40]; - -/// Compare imm{16,32} with r/m{16,32,64} (sign-extended if 64). -pub static CMP_IMM: [u8; 1] = [0x81]; - -/// Compare imm8 with r/m{16,32,64}. -pub static CMP_IMM8: [u8; 1] = [0x83]; - -/// Compare r{16,32,64} with r/m of the same size. -pub static CMP_REG: [u8; 1] = [0x39]; - -/// Compare packed double-precision floating-point value in xmm2/m32 and xmm1 using bits 2:0 of -/// imm8 as comparison predicate (SSE2). -pub static CMPPD: [u8; 3] = [0x66, 0x0f, 0xc2]; - -/// Compare packed single-precision floating-point value in xmm2/m32 and xmm1 using bits 2:0 of -/// imm8 as comparison predicate (SSE). -pub static CMPPS: [u8; 2] = [0x0f, 0xc2]; - -/// Convert four packed signed doubleword integers from xmm2/mem to four packed single-precision -/// floating-point values in xmm1 (SSE2). -pub static CVTDQ2PS: [u8; 2] = [0x0f, 0x5b]; - -/// Convert scalar double-precision floating-point value to scalar single-precision -/// floating-point value. -pub static CVTSD2SS: [u8; 3] = [0xf2, 0x0f, 0x5a]; - -/// Convert doubleword integer to scalar double-precision floating-point value. -pub static CVTSI2SD: [u8; 3] = [0xf2, 0x0f, 0x2a]; - -/// Convert doubleword integer to scalar single-precision floating-point value. -pub static CVTSI2SS: [u8; 3] = [0xf3, 0x0f, 0x2a]; - -/// Convert scalar single-precision floating-point value to scalar double-precision -/// float-point value. -pub static CVTSS2SD: [u8; 3] = [0xf3, 0x0f, 0x5a]; - -/// Convert four packed single-precision floating-point values from xmm2/mem to four packed signed -/// doubleword values in xmm1 using truncation (SSE2). -pub static CVTTPS2DQ: [u8; 3] = [0xf3, 0x0f, 0x5b]; - -/// Convert with truncation scalar double-precision floating-point value to signed -/// integer. -pub static CVTTSD2SI: [u8; 3] = [0xf2, 0x0f, 0x2c]; - -/// Convert with truncation scalar single-precision floating-point value to integer. -pub static CVTTSS2SI: [u8; 3] = [0xf3, 0x0f, 0x2c]; - -/// Unsigned divide for {16,32,64}-bit. -pub static DIV: [u8; 1] = [0xf7]; - -/// Divide packed double-precision floating-point values in xmm1 by packed double-precision -/// floating-point values in xmm2/mem (SSE2). -pub static DIVPD: [u8; 3] = [0x66, 0x0f, 0x5e]; - -/// Divide packed single-precision floating-point values in xmm1 by packed single-precision -/// floating-point values in xmm2/mem (SSE). -pub static DIVPS: [u8; 2] = [0x0f, 0x5e]; - -/// Divide low double-precision floating-point value in xmm1 by low double-precision -/// floating-point value in xmm2/m64. -pub static DIVSD: [u8; 3] = [0xf2, 0x0f, 0x5e]; - -/// Divide low single-precision floating-point value in xmm1 by low single-precision -/// floating-point value in xmm2/m32. -pub static DIVSS: [u8; 3] = [0xf3, 0x0f, 0x5e]; - -/// Signed divide for {16,32,64}-bit. -pub static IDIV: [u8; 1] = [0xf7]; - -/// Signed multiply for {16,32,64}-bit, generic registers. -pub static IMUL: [u8; 2] = [0x0f, 0xaf]; - -/// Signed multiply for {16,32,64}-bit, storing into RDX:RAX. -pub static IMUL_RDX_RAX: [u8; 1] = [0xf7]; - -/// Insert scalar single-precision floating-point value. -pub static INSERTPS: [u8; 4] = [0x66, 0x0f, 0x3a, 0x21]; - -/// Either: -/// 1. Jump near, absolute indirect, RIP = 64-bit offset from register or memory. -/// 2. Jump far, absolute indirect, address given in m16:64. -pub static JUMP_ABSOLUTE: [u8; 1] = [0xff]; - -/// Jump near, relative, RIP = RIP + 32-bit displacement sign extended to 64 bits. -pub static JUMP_NEAR_RELATIVE: [u8; 1] = [0xe9]; - -/// Jump near (rel32) if overflow (OF=1). -pub static JUMP_NEAR_IF_OVERFLOW: [u8; 2] = [0x0f, 0x80]; - -/// Jump short, relative, RIP = RIP + 8-bit displacement sign extended to 64 bits. -pub static JUMP_SHORT: [u8; 1] = [0xeb]; - -/// Jump short (rel8) if equal (ZF=1). -pub static JUMP_SHORT_IF_EQUAL: [u8; 1] = [0x74]; - -/// Jump short (rel8) if not equal (ZF=0). -pub static JUMP_SHORT_IF_NOT_EQUAL: [u8; 1] = [0x75]; - -/// Jump short (rel8) if overflow (OF=1). -pub static JUMP_SHORT_IF_OVERFLOW: [u8; 1] = [0x70]; - -/// Store effective address for m in register r{16,32,64}. -pub static LEA: [u8; 1] = [0x8d]; - -/// Count the number of leading zero bits. -pub static LZCNT: [u8; 3] = [0xf3, 0x0f, 0xbd]; - -/// Return the maximum packed double-precision floating-point values between xmm1 and xmm2/m128 -/// (SSE2). -pub static MAXPD: [u8; 3] = [0x66, 0x0f, 0x5f]; - -/// Return the maximum packed single-precision floating-point values between xmm1 and xmm2/m128 -/// (SSE). -pub static MAXPS: [u8; 2] = [0x0f, 0x5f]; - -/// Return the maximum scalar double-precision floating-point value between -/// xmm2/m64 and xmm1. -pub static MAXSD: [u8; 3] = [0xf2, 0x0f, 0x5f]; - -/// Return the maximum scalar single-precision floating-point value between -/// xmm2/m32 and xmm1. -pub static MAXSS: [u8; 3] = [0xf3, 0x0f, 0x5f]; - -/// Return the minimum packed double-precision floating-point values between xmm1 and xmm2/m128 -/// (SSE2). -pub static MINPD: [u8; 3] = [0x66, 0x0f, 0x5d]; - -/// Return the minimum packed single-precision floating-point values between xmm1 and xmm2/m128 -/// (SSE). -pub static MINPS: [u8; 2] = [0x0f, 0x5d]; - -/// Return the minimum scalar double-precision floating-point value between -/// xmm2/m64 and xmm1. -pub static MINSD: [u8; 3] = [0xf2, 0x0f, 0x5d]; - -/// Return the minimum scalar single-precision floating-point value between -/// xmm2/m32 and xmm1. -pub static MINSS: [u8; 3] = [0xf3, 0x0f, 0x5d]; - -/// Move r8 to r/m8. -pub static MOV_BYTE_STORE: [u8; 1] = [0x88]; - -/// Move imm{16,32,64} to same-sized register. -pub static MOV_IMM: [u8; 1] = [0xb8]; - -/// Move imm{16,32} to r{16,32,64}, sign-extended if 64-bit target. -pub static MOV_IMM_SIGNEXTEND: [u8; 1] = [0xc7]; - -/// Move {r/m16, r/m32, r/m64} to same-sized register. -pub static MOV_LOAD: [u8; 1] = [0x8b]; - -/// Move r16 to r/m16. -pub static MOV_STORE_16: [u8; 2] = [0x66, 0x89]; - -/// Move {r16, r32, r64} to same-sized register or memory. -pub static MOV_STORE: [u8; 1] = [0x89]; - -/// Move aligned packed single-precision floating-point values from x/m to xmm (SSE). -pub static MOVAPS_LOAD: [u8; 2] = [0x0f, 0x28]; - -/// Move doubleword from r/m32 to xmm (SSE2). Quadword with REX prefix. -pub static MOVD_LOAD_XMM: [u8; 3] = [0x66, 0x0f, 0x6e]; - -/// Move doubleword from xmm to r/m32 (SSE2). Quadword with REX prefix. -pub static MOVD_STORE_XMM: [u8; 3] = [0x66, 0x0f, 0x7e]; - -/// Move packed single-precision floating-point values low to high (SSE). -pub static MOVLHPS: [u8; 2] = [0x0f, 0x16]; - -/// Move scalar double-precision floating-point value (from reg/mem to reg). -pub static MOVSD_LOAD: [u8; 3] = [0xf2, 0x0f, 0x10]; - -/// Move scalar double-precision floating-point value (from reg to reg/mem). -pub static MOVSD_STORE: [u8; 3] = [0xf2, 0x0f, 0x11]; - -/// Move scalar single-precision floating-point value (from reg to reg/mem). -pub static MOVSS_STORE: [u8; 3] = [0xf3, 0x0f, 0x11]; - -/// Move scalar single-precision floating-point-value (from reg/mem to reg). -pub static MOVSS_LOAD: [u8; 3] = [0xf3, 0x0f, 0x10]; - -/// Move byte to register with sign-extension. -pub static MOVSX_BYTE: [u8; 2] = [0x0f, 0xbe]; - -/// Move word to register with sign-extension. -pub static MOVSX_WORD: [u8; 2] = [0x0f, 0xbf]; - -/// Move doubleword to register with sign-extension. -pub static MOVSXD: [u8; 1] = [0x63]; - -/// Move unaligned packed single-precision floating-point from x/m to xmm (SSE). -pub static MOVUPS_LOAD: [u8; 2] = [0x0f, 0x10]; - -/// Move unaligned packed single-precision floating-point value from xmm to x/m (SSE). -pub static MOVUPS_STORE: [u8; 2] = [0x0f, 0x11]; - -/// Move byte to register with zero-extension. -pub static MOVZX_BYTE: [u8; 2] = [0x0f, 0xb6]; - -/// Move word to register with zero-extension. -pub static MOVZX_WORD: [u8; 2] = [0x0f, 0xb7]; - -/// Unsigned multiply for {16,32,64}-bit. -pub static MUL: [u8; 1] = [0xf7]; - -/// Multiply packed double-precision floating-point values from xmm2/mem to xmm1 and store result -/// in xmm1 (SSE2). -pub static MULPD: [u8; 3] = [0x66, 0x0f, 0x59]; - -/// Multiply packed single-precision floating-point values from xmm2/mem to xmm1 and store result -/// in xmm1 (SSE). -pub static MULPS: [u8; 2] = [0x0f, 0x59]; - -/// Multiply the low double-precision floating-point value in xmm2/m64 by the -/// low double-precision floating-point value in xmm1. -pub static MULSD: [u8; 3] = [0xf2, 0x0f, 0x59]; - -/// Multiply the low single-precision floating-point value in xmm2/m32 by the -/// low single-precision floating-point value in xmm1. -pub static MULSS: [u8; 3] = [0xf3, 0x0f, 0x59]; - -/// Reverse each bit of r/m{16,32,64}. -pub static NOT: [u8; 1] = [0xf7]; - -/// r{16,32,64} OR register of same size. -pub static OR: [u8; 1] = [0x09]; - -/// imm{16,32} OR r/m{16,32,64}, possibly sign-extended. -pub static OR_IMM: [u8; 1] = [0x81]; - -/// r/m{16,32,64} OR sign-extended imm8. -pub static OR_IMM8_SIGN_EXTEND: [u8; 1] = [0x83]; - -/// Return the bitwise logical OR of packed single-precision values in xmm and x/m (SSE). -pub static ORPS: [u8; 2] = [0x0f, 0x56]; - -/// Compute the absolute value of bytes in xmm2/m128 and store the unsigned result in xmm1 (SSSE3). -pub static PABSB: [u8; 4] = [0x66, 0x0f, 0x38, 0x1c]; - -/// Compute the absolute value of 32-bit integers in xmm2/m128 and store the unsigned result in -/// xmm1 (SSSE3). -pub static PABSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x1e]; - -/// Compute the absolute value of 16-bit integers in xmm2/m128 and store the unsigned result in -/// xmm1 (SSSE3). -pub static PABSW: [u8; 4] = [0x66, 0x0f, 0x38, 0x1d]; - -/// Converts 8 packed signed word integers from xmm1 and from xmm2/m128 into 16 packed signed byte -/// integers in xmm1 using signed saturation (SSE2). -pub static PACKSSWB: [u8; 3] = [0x66, 0x0f, 0x63]; - -/// Converts 4 packed signed doubleword integers from xmm1 and from xmm2/m128 into 8 packed signed -/// word integers in xmm1 using signed saturation (SSE2). -pub static PACKSSDW: [u8; 3] = [0x66, 0x0f, 0x6b]; - -/// Converts 8 packed signed word integers from xmm1 and from xmm2/m128 into 16 packed unsigned byte -/// integers in xmm1 using unsigned saturation (SSE2). -pub static PACKUSWB: [u8; 3] = [0x66, 0x0f, 0x67]; - -/// Converts 4 packed signed doubleword integers from xmm1 and from xmm2/m128 into 8 unpacked signed -/// word integers in xmm1 using unsigned saturation (SSE4.1). -pub static PACKUSDW: [u8; 4] = [0x66, 0x0f, 0x38, 0x2b]; - -/// Add packed byte integers from xmm2/m128 and xmm1 (SSE2). -pub static PADDB: [u8; 3] = [0x66, 0x0f, 0xfc]; - -/// Add packed doubleword integers from xmm2/m128 and xmm1 (SSE2). -pub static PADDD: [u8; 3] = [0x66, 0x0f, 0xfe]; - -/// Add packed quadword integers from xmm2/m128 and xmm1 (SSE2). -pub static PADDQ: [u8; 3] = [0x66, 0x0f, 0xd4]; - -/// Add packed word integers from xmm2/m128 and xmm1 (SSE2). -pub static PADDW: [u8; 3] = [0x66, 0x0f, 0xfd]; - -/// Add packed signed byte integers from xmm2/m128 and xmm1 saturate the results (SSE). -pub static PADDSB: [u8; 3] = [0x66, 0x0f, 0xec]; - -/// Add packed signed word integers from xmm2/m128 and xmm1 saturate the results (SSE). -pub static PADDSW: [u8; 3] = [0x66, 0x0f, 0xed]; - -/// Add packed unsigned byte integers from xmm2/m128 and xmm1 saturate the results (SSE). -pub static PADDUSB: [u8; 3] = [0x66, 0x0f, 0xdc]; - -/// Add packed unsigned word integers from xmm2/m128 and xmm1 saturate the results (SSE). -pub static PADDUSW: [u8; 3] = [0x66, 0x0f, 0xdd]; - -/// Concatenate destination and source operands, extract a byte-aligned result into xmm1 that is -/// shifted to the right by the constant number of bytes in imm8 (SSSE3). -pub static PALIGNR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0f]; - -/// Bitwise AND of xmm2/m128 and xmm1 (SSE2). -pub static PAND: [u8; 3] = [0x66, 0x0f, 0xdb]; - -/// Bitwise AND NOT of xmm2/m128 and xmm1 (SSE2). -pub static PANDN: [u8; 3] = [0x66, 0x0f, 0xdf]; - -/// Average packed unsigned byte integers from xmm2/m128 and xmm1 with rounding (SSE2). -pub static PAVGB: [u8; 3] = [0x66, 0x0f, 0xE0]; - -/// Average packed unsigned word integers from xmm2/m128 and xmm1 with rounding (SSE2). -pub static PAVGW: [u8; 3] = [0x66, 0x0f, 0xE3]; - -/// Select byte values from xmm1 and xmm2/m128 from mask specified in the high bit of each byte -/// in XMM0 and store the values into xmm1 (SSE4.1). -pub static PBLENDVB: [u8; 4] = [0x66, 0x0f, 0x38, 0x10]; - -/// Select words from xmm1 and xmm2/m128 from mask specified in imm8 and store the values into xmm1 -/// (SSE4.1). -pub static PBLENDW: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0e]; - -/// Compare packed data for equal (SSE2). -pub static PCMPEQB: [u8; 3] = [0x66, 0x0f, 0x74]; - -/// Compare packed data for equal (SSE2). -pub static PCMPEQD: [u8; 3] = [0x66, 0x0f, 0x76]; - -/// Compare packed data for equal (SSE4.1). -pub static PCMPEQQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x29]; - -/// Compare packed data for equal (SSE2). -pub static PCMPEQW: [u8; 3] = [0x66, 0x0f, 0x75]; - -/// Compare packed signed byte integers for greater than (SSE2). -pub static PCMPGTB: [u8; 3] = [0x66, 0x0f, 0x64]; - -/// Compare packed signed doubleword integers for greater than (SSE2). -pub static PCMPGTD: [u8; 3] = [0x66, 0x0f, 0x66]; - -/// Compare packed signed quadword integers for greater than (SSE4.2). -pub static PCMPGTQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x37]; - -/// Compare packed signed word integers for greater than (SSE2). -pub static PCMPGTW: [u8; 3] = [0x66, 0x0f, 0x65]; - -/// Extract doubleword or quadword, depending on REX.W (SSE4.1). -pub static PEXTR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x16]; - -/// Extract byte (SSE4.1). -pub static PEXTRB: [u8; 4] = [0x66, 0x0f, 0x3a, 0x14]; - -/// Extract word (SSE4.1). There is a 3-byte SSE2 variant that can also move to m/16. -pub static PEXTRW: [u8; 4] = [0x66, 0x0f, 0x3a, 0x15]; - -/// Insert doubleword or quadword, depending on REX.W (SSE4.1). -pub static PINSR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x22]; - -/// Insert byte (SSE4.1). -pub static PINSRB: [u8; 4] = [0x66, 0x0f, 0x3a, 0x20]; - -/// Insert word (SSE2). -pub static PINSRW: [u8; 3] = [0x66, 0x0f, 0xc4]; - -/// Compare packed signed byte integers in xmm1 and xmm2/m128 and store packed maximum values in -/// xmm1 (SSE4.1). -pub static PMAXSB: [u8; 4] = [0x66, 0x0f, 0x38, 0x3c]; - -/// Compare packed signed doubleword integers in xmm1 and xmm2/m128 and store packed maximum -/// values in xmm1 (SSE4.1). -pub static PMAXSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x3d]; - -/// Compare packed signed word integers in xmm1 and xmm2/m128 and store packed maximum values in -/// xmm1 (SSE2). -pub static PMAXSW: [u8; 3] = [0x66, 0x0f, 0xee]; - -/// Compare packed unsigned byte integers in xmm1 and xmm2/m128 and store packed maximum values in -/// xmm1 (SSE2). -pub static PMAXUB: [u8; 3] = [0x66, 0x0f, 0xde]; - -/// Compare packed unsigned doubleword integers in xmm1 and xmm2/m128 and store packed maximum -/// values in xmm1 (SSE4.1). -pub static PMAXUD: [u8; 4] = [0x66, 0x0f, 0x38, 0x3f]; - -/// Compare packed unsigned word integers in xmm1 and xmm2/m128 and store packed maximum values in -/// xmm1 (SSE4.1). -pub static PMAXUW: [u8; 4] = [0x66, 0x0f, 0x38, 0x3e]; - -/// Compare packed signed byte integers in xmm1 and xmm2/m128 and store packed minimum values in -/// xmm1 (SSE4.1). -pub static PMINSB: [u8; 4] = [0x66, 0x0f, 0x38, 0x38]; - -/// Compare packed signed doubleword integers in xmm1 and xmm2/m128 and store packed minimum -/// values in xmm1 (SSE4.1). -pub static PMINSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x39]; - -/// Compare packed signed word integers in xmm1 and xmm2/m128 and store packed minimum values in -/// xmm1 (SSE2). -pub static PMINSW: [u8; 3] = [0x66, 0x0f, 0xea]; - -/// Compare packed unsigned byte integers in xmm1 and xmm2/m128 and store packed minimum values in -/// xmm1 (SSE2). -pub static PMINUB: [u8; 3] = [0x66, 0x0f, 0xda]; - -/// Compare packed unsigned doubleword integers in xmm1 and xmm2/m128 and store packed minimum -/// values in xmm1 (SSE4.1). -pub static PMINUD: [u8; 4] = [0x66, 0x0f, 0x38, 0x3b]; - -/// Compare packed unsigned word integers in xmm1 and xmm2/m128 and store packed minimum values in -/// xmm1 (SSE4.1). -pub static PMINUW: [u8; 4] = [0x66, 0x0f, 0x38, 0x3a]; - -/// Sign extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit -/// integers in xmm1 (SSE4.1). -pub static PMOVSXBW: [u8; 4] = [0x66, 0x0f, 0x38, 0x20]; - -/// Sign extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 32-bit -/// integers in xmm1 (SSE4.1). -pub static PMOVSXWD: [u8; 4] = [0x66, 0x0f, 0x38, 0x23]; - -/// Sign extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit -/// integers in xmm1 (SSE4.1). -pub static PMOVSXDQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x25]; - -/// Zero extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit -/// integers in xmm1 (SSE4.1). -pub static PMOVZXBW: [u8; 4] = [0x66, 0x0f, 0x38, 0x30]; - -/// Zero extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 32-bit -/// integers in xmm1 (SSE4.1). -pub static PMOVZXWD: [u8; 4] = [0x66, 0x0f, 0x38, 0x33]; - -/// Zero extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit -/// integers in xmm1 (SSE4.1). -pub static PMOVZXDQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x35]; - -/// Multiply the packed signed word integers in xmm1 and xmm2/m128, and store the low 16 bits of -/// the results in xmm1 (SSE2). -pub static PMULLW: [u8; 3] = [0x66, 0x0f, 0xd5]; - -/// Multiply the packed doubleword signed integers in xmm1 and xmm2/m128 and store the low 32 -/// bits of each product in xmm1 (SSE4.1). -pub static PMULLD: [u8; 4] = [0x66, 0x0f, 0x38, 0x40]; - -/// Multiply the packed quadword signed integers in xmm2 and xmm3/m128 and store the low 64 -/// bits of each product in xmm1 (AVX512VL/DQ). Requires an EVEX encoding. -pub static VPMULLQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x40]; - -/// Multiply packed unsigned doubleword integers in xmm1 by packed unsigned doubleword integers -/// in xmm2/m128, and store the quadword results in xmm1 (SSE2). -pub static PMULUDQ: [u8; 3] = [0x66, 0x0f, 0xf4]; - -/// Multiply the packed word integers, add adjacent doubleword results. -pub static PMADDWD: [u8; 3] = [0x66, 0x0f, 0xf5]; - -/// Pop top of stack into r{16,32,64}; increment stack pointer. -pub static POP_REG: [u8; 1] = [0x58]; - -/// Returns the count of number of bits set to 1. -pub static POPCNT: [u8; 3] = [0xf3, 0x0f, 0xb8]; - -/// Bitwise OR of xmm2/m128 and xmm1 (SSE2). -pub static POR: [u8; 3] = [0x66, 0x0f, 0xeb]; - -/// Shuffle bytes in xmm1 according to contents of xmm2/m128 (SSE3). -pub static PSHUFB: [u8; 4] = [0x66, 0x0f, 0x38, 0x00]; - -/// Shuffle the doublewords in xmm2/m128 based on the encoding in imm8 and -/// store the result in xmm1 (SSE2). -pub static PSHUFD: [u8; 3] = [0x66, 0x0f, 0x70]; - -/// Shift words in xmm1 by imm8; the direction and sign-bit behavior is controlled by the RRR -/// digit used in the ModR/M byte (SSE2). -pub static PS_W_IMM: [u8; 3] = [0x66, 0x0f, 0x71]; - -/// Shift doublewords in xmm1 by imm8; the direction and sign-bit behavior is controlled by the RRR -/// digit used in the ModR/M byte (SSE2). -pub static PS_D_IMM: [u8; 3] = [0x66, 0x0f, 0x72]; - -/// Shift quadwords in xmm1 by imm8; the direction and sign-bit behavior is controlled by the RRR -/// digit used in the ModR/M byte (SSE2). -pub static PS_Q_IMM: [u8; 3] = [0x66, 0x0f, 0x73]; - -/// Shift words in xmm1 left by xmm2/m128 while shifting in 0s (SSE2). -pub static PSLLW: [u8; 3] = [0x66, 0x0f, 0xf1]; - -/// Shift doublewords in xmm1 left by xmm2/m128 while shifting in 0s (SSE2). -pub static PSLLD: [u8; 3] = [0x66, 0x0f, 0xf2]; - -/// Shift quadwords in xmm1 left by xmm2/m128 while shifting in 0s (SSE2). -pub static PSLLQ: [u8; 3] = [0x66, 0x0f, 0xf3]; - -/// Shift words in xmm1 right by xmm2/m128 while shifting in 0s (SSE2). -pub static PSRLW: [u8; 3] = [0x66, 0x0f, 0xd1]; - -/// Shift doublewords in xmm1 right by xmm2/m128 while shifting in 0s (SSE2). -pub static PSRLD: [u8; 3] = [0x66, 0x0f, 0xd2]; - -/// Shift quadwords in xmm1 right by xmm2/m128 while shifting in 0s (SSE2). -pub static PSRLQ: [u8; 3] = [0x66, 0x0f, 0xd3]; - -/// Shift words in xmm1 right by xmm2/m128 while shifting in sign bits (SSE2). -pub static PSRAW: [u8; 3] = [0x66, 0x0f, 0xe1]; - -/// Shift doublewords in xmm1 right by xmm2/m128 while shifting in sign bits (SSE2). -pub static PSRAD: [u8; 3] = [0x66, 0x0f, 0xe2]; - -/// Subtract packed byte integers in xmm2/m128 from packed byte integers in xmm1 (SSE2). -pub static PSUBB: [u8; 3] = [0x66, 0x0f, 0xf8]; - -/// Subtract packed word integers in xmm2/m128 from packed word integers in xmm1 (SSE2). -pub static PSUBW: [u8; 3] = [0x66, 0x0f, 0xf9]; - -/// Subtract packed doubleword integers in xmm2/m128 from doubleword byte integers in xmm1 (SSE2). -pub static PSUBD: [u8; 3] = [0x66, 0x0f, 0xfa]; - -/// Subtract packed quadword integers in xmm2/m128 from xmm1 (SSE2). -pub static PSUBQ: [u8; 3] = [0x66, 0x0f, 0xfb]; - -/// Subtract packed signed byte integers in xmm2/m128 from packed signed byte integers in xmm1 -/// and saturate results (SSE2). -pub static PSUBSB: [u8; 3] = [0x66, 0x0f, 0xe8]; - -/// Subtract packed signed word integers in xmm2/m128 from packed signed word integers in xmm1 -/// and saturate results (SSE2). -pub static PSUBSW: [u8; 3] = [0x66, 0x0f, 0xe9]; - -/// Subtract packed unsigned byte integers in xmm2/m128 from packed unsigned byte integers in xmm1 -/// and saturate results (SSE2). -pub static PSUBUSB: [u8; 3] = [0x66, 0x0f, 0xd8]; - -/// Subtract packed unsigned word integers in xmm2/m128 from packed unsigned word integers in xmm1 -/// and saturate results (SSE2). -pub static PSUBUSW: [u8; 3] = [0x66, 0x0f, 0xd9]; - -/// Set ZF if xmm2/m128 AND xmm1 result is all 0s; set CF if xmm2/m128 AND NOT xmm1 result is all -/// 0s (SSE4.1). -pub static PTEST: [u8; 4] = [0x66, 0x0f, 0x38, 0x17]; - -/// Unpack and interleave high-order bytes from xmm1 and xmm2/m128 into xmm1 (SSE2). -pub static PUNPCKHBW: [u8; 3] = [0x66, 0x0f, 0x68]; - -/// Unpack and interleave high-order words from xmm1 and xmm2/m128 into xmm1 (SSE2). -pub static PUNPCKHWD: [u8; 3] = [0x66, 0x0f, 0x69]; - -/// Unpack and interleave high-order doublewords from xmm1 and xmm2/m128 into xmm1 (SSE2). -pub static PUNPCKHDQ: [u8; 3] = [0x66, 0x0f, 0x6A]; - -/// Unpack and interleave high-order quadwords from xmm1 and xmm2/m128 into xmm1 (SSE2). -pub static PUNPCKHQDQ: [u8; 3] = [0x66, 0x0f, 0x6D]; - -/// Unpack and interleave low-order bytes from xmm1 and xmm2/m128 into xmm1 (SSE2). -pub static PUNPCKLBW: [u8; 3] = [0x66, 0x0f, 0x60]; - -/// Unpack and interleave low-order words from xmm1 and xmm2/m128 into xmm1 (SSE2). -pub static PUNPCKLWD: [u8; 3] = [0x66, 0x0f, 0x61]; - -/// Unpack and interleave low-order doublewords from xmm1 and xmm2/m128 into xmm1 (SSE2). -pub static PUNPCKLDQ: [u8; 3] = [0x66, 0x0f, 0x62]; - -/// Unpack and interleave low-order quadwords from xmm1 and xmm2/m128 into xmm1 (SSE2). -pub static PUNPCKLQDQ: [u8; 3] = [0x66, 0x0f, 0x6C]; - -/// Push r{16,32,64}. -pub static PUSH_REG: [u8; 1] = [0x50]; - -/// Logical exclusive OR (SSE2). -pub static PXOR: [u8; 3] = [0x66, 0x0f, 0xef]; - -/// Near return to calling procedure. -pub static RET_NEAR: [u8; 1] = [0xc3]; - -/// General rotation opcode. Kind of rotation depends on encoding. -pub static ROTATE_CL: [u8; 1] = [0xd3]; - -/// General rotation opcode. Kind of rotation depends on encoding. -pub static ROTATE_IMM8: [u8; 1] = [0xc1]; - -/// Round scalar doubl-precision floating-point values. -pub static ROUNDSD: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0b]; - -/// Round scalar single-precision floating-point values. -pub static ROUNDSS: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0a]; - -/// Subtract with borrow r{16,32,64} from r/m of the same size. -pub static SBB: [u8; 1] = [0x19]; - -/// Set byte if overflow (OF=1). -pub static SET_BYTE_IF_OVERFLOW: [u8; 2] = [0x0f, 0x90]; - -/// Compute the square root of the packed double-precision floating-point values and store the -/// result in xmm1 (SSE2). -pub static SQRTPD: [u8; 3] = [0x66, 0x0f, 0x51]; - -/// Compute the square root of the packed double-precision floating-point values and store the -/// result in xmm1 (SSE). -pub static SQRTPS: [u8; 2] = [0x0f, 0x51]; - -/// Compute square root of scalar double-precision floating-point value. -pub static SQRTSD: [u8; 3] = [0xf2, 0x0f, 0x51]; - -/// Compute square root of scalar single-precision value. -pub static SQRTSS: [u8; 3] = [0xf3, 0x0f, 0x51]; - -/// Subtract r{16,32,64} from r/m of same size. -pub static SUB: [u8; 1] = [0x29]; - -/// Subtract packed double-precision floating-point values in xmm2/mem from xmm1 and store result -/// in xmm1 (SSE2). -pub static SUBPD: [u8; 3] = [0x66, 0x0f, 0x5c]; - -/// Subtract packed single-precision floating-point values in xmm2/mem from xmm1 and store result -/// in xmm1 (SSE). -pub static SUBPS: [u8; 2] = [0x0f, 0x5c]; - -/// Subtract the low double-precision floating-point value in xmm2/m64 from xmm1 -/// and store the result in xmm1. -pub static SUBSD: [u8; 3] = [0xf2, 0x0f, 0x5c]; - -/// Subtract the low single-precision floating-point value in xmm2/m32 from xmm1 -/// and store the result in xmm1. -pub static SUBSS: [u8; 3] = [0xf3, 0x0f, 0x5c]; - -/// AND r8 with r/m8; set SF, ZF, PF according to result. -pub static TEST_BYTE_REG: [u8; 1] = [0x84]; - -/// AND {r16, r32, r64} with r/m of the same size; set SF, ZF, PF according to result. -pub static TEST_REG: [u8; 1] = [0x85]; - -/// Count the number of trailing zero bits. -pub static TZCNT: [u8; 3] = [0xf3, 0x0f, 0xbc]; - -/// Compare low double-precision floating-point values in xmm1 and xmm2/mem64 -/// and set the EFLAGS flags accordingly. -pub static UCOMISD: [u8; 3] = [0x66, 0x0f, 0x2e]; - -/// Compare low single-precision floating-point values in xmm1 and xmm2/mem32 -/// and set the EFLAGS flags accordingly. -pub static UCOMISS: [u8; 2] = [0x0f, 0x2e]; - -/// Raise invalid opcode instruction. -pub static UNDEFINED2: [u8; 2] = [0x0f, 0x0b]; - -/// Convert four packed unsigned doubleword integers from xmm2/m128/m32bcst to packed -/// single-precision floating-point values in xmm1 with writemask k1. Rounding behavior -/// is controlled by MXCSR but can be overriden by EVEX.L'L in static rounding mode -/// (AVX512VL, AVX512F). -pub static VCVTUDQ2PS: [u8; 3] = [0xf2, 0x0f, 0x7a]; - -/// imm{16,32} XOR r/m{16,32,64}, possibly sign-extended. -pub static XOR_IMM: [u8; 1] = [0x81]; - -/// r/m{16,32,64} XOR sign-extended imm8. -pub static XOR_IMM8_SIGN_EXTEND: [u8; 1] = [0x83]; - -/// r/m{16,32,64} XOR register of the same size. -pub static XOR: [u8; 1] = [0x31]; - -/// Bitwise logical XOR of packed double-precision floating-point values. -pub static XORPD: [u8; 3] = [0x66, 0x0f, 0x57]; - -/// Bitwise logical XOR of packed single-precision floating-point values. -pub static XORPS: [u8; 2] = [0x0f, 0x57]; diff --git a/cranelift/codegen/meta/src/isa/x86/recipes.rs b/cranelift/codegen/meta/src/isa/x86/recipes.rs deleted file mode 100644 index f45f8dc673..0000000000 --- a/cranelift/codegen/meta/src/isa/x86/recipes.rs +++ /dev/null @@ -1,3445 +0,0 @@ -//! Encoding recipes for x86/x86_64. -use std::rc::Rc; - -use cranelift_codegen_shared::isa::x86::EncodingBits; - -use crate::cdsl::ast::Literal; -use crate::cdsl::formats::InstructionFormat; -use crate::cdsl::instructions::InstructionPredicate; -use crate::cdsl::recipes::{ - EncodingRecipe, EncodingRecipeBuilder, OperandConstraint, Register, Stack, -}; -use crate::cdsl::regs::IsaRegs; -use crate::cdsl::settings::SettingGroup; -use crate::shared::Definitions as SharedDefinitions; - -use crate::isa::x86::opcodes; - -/// Helper data structure to create recipes and template recipes. -/// It contains all the recipes and recipe templates that might be used in the encodings crate of -/// this same directory. -pub(crate) struct RecipeGroup<'builder> { - /// Memoized registers description, to pass it to builders later. - regs: &'builder IsaRegs, - - /// All the recipes explicitly created in this file. This is different from the final set of - /// recipes, which is definitive only once encodings have generated new recipes on the fly. - recipes: Vec, - - /// All the recipe templates created in this file. - templates: Vec>>, -} - -impl<'builder> RecipeGroup<'builder> { - fn new(regs: &'builder IsaRegs) -> Self { - Self { - regs, - recipes: Vec::new(), - templates: Vec::new(), - } - } - fn add_recipe(&mut self, recipe: EncodingRecipeBuilder) { - self.recipes.push(recipe.build()); - } - fn add_template_recipe(&mut self, recipe: EncodingRecipeBuilder) -> Rc> { - let template = Rc::new(Template::new(recipe, self.regs)); - self.templates.push(template.clone()); - template - } - fn add_template_inferred( - &mut self, - recipe: EncodingRecipeBuilder, - infer_function: &'static str, - ) -> Rc> { - let template = - Rc::new(Template::new(recipe, self.regs).inferred_rex_compute_size(infer_function)); - self.templates.push(template.clone()); - template - } - fn add_template(&mut self, template: Template<'builder>) -> Rc> { - let template = Rc::new(template); - self.templates.push(template.clone()); - template - } - pub fn recipe(&self, name: &str) -> &EncodingRecipe { - self.recipes - .iter() - .find(|recipe| recipe.name == name) - .unwrap_or_else(|| panic!("unknown recipe name: {}. Try template?", name)) - } - pub fn template(&self, name: &str) -> &Template { - self.templates - .iter() - .find(|recipe| recipe.name() == name) - .unwrap_or_else(|| panic!("unknown template name: {}. Try recipe?", name)) - } -} - -// Opcode representation. -// -// Cranelift requires each recipe to have a single encoding size in bytes, and x86 opcodes are -// variable length, so we use separate recipes for different styles of opcodes and prefixes. The -// opcode format is indicated by the recipe name prefix. -// -// The match case below does not include the REX prefix which goes after the mandatory prefix. -// VEX/XOP and EVEX prefixes are not yet supported. Encodings using any of these prefixes are -// represented by separate recipes. -// -// The encoding bits are: -// -// 0-7: The opcode byte . -// 8-9: pp, mandatory prefix: -// 00 none (Op*) -// 01 66 (Mp*) -// 10 F3 (Mp*) -// 11 F2 (Mp*) -// 10-11: mm, opcode map: -// 00 (Op1/Mp1) -// 01 0F (Op2/Mp2) -// 10 0F 38 (Op3/Mp3) -// 11 0F 3A (Op3/Mp3) -// 12-14 rrr, opcode bits for the ModR/M byte for certain opcodes. -// 15: REX.W bit (or VEX.W/E) -// -// There is some redundancy between bits 8-11 and the recipe names, but we have enough bits, and -// the pp+mm format is ready for supporting VEX prefixes. -// -// TODO Cranelift doesn't actually require recipe to have different encoding sizes anymore, so this -// could be simplified. - -/// Given a sequence of opcode bytes, compute the recipe name prefix and encoding bits. -fn decode_opcodes(op_bytes: &[u8], rrr: u16, w: u16) -> (&'static str, u16) { - let enc = EncodingBits::new(op_bytes, rrr, w); - (enc.prefix().recipe_name_prefix(), enc.bits()) -} - -/// Given a snippet of Rust code (or None), replace the `PUT_OP` macro with the -/// corresponding `put_*` function from the `binemit.rs` module. -fn replace_put_op(code: Option, prefix: &str) -> Option { - code.map(|code| code.replace("{{PUT_OP}}", &format!("put_{}", prefix.to_lowercase()))) -} - -/// Replaces constraints to a REX-prefixed register class by the equivalent non-REX register class. -fn replace_nonrex_constraints( - regs: &IsaRegs, - constraints: Vec, -) -> Vec { - constraints - .into_iter() - .map(|constraint| match constraint { - OperandConstraint::RegClass(rc_index) => { - let new_rc_index = if rc_index == regs.class_by_name("GPR") { - regs.class_by_name("GPR8") - } else if rc_index == regs.class_by_name("FPR") { - regs.class_by_name("FPR8") - } else { - rc_index - }; - OperandConstraint::RegClass(new_rc_index) - } - _ => constraint, - }) - .collect() -} - -fn replace_evex_constraints( - _: &IsaRegs, - constraints: Vec, -) -> Vec { - constraints - .into_iter() - .map(|constraint| match constraint { - OperandConstraint::RegClass(rc_index) => { - // FIXME(#1306) this should be able to upgrade the register class to FPR32 as in - // `replace_nonrex_constraints` above, e.g. When FPR32 is re-added, add back in the - // rc_index conversion to FPR32. In the meantime, this is effectively a no-op - // conversion--the register class stays the same. - OperandConstraint::RegClass(rc_index) - } - _ => constraint, - }) - .collect() -} - -/// Specifies how the prefix (e.g. REX) is emitted by a Recipe. -#[derive(Copy, Clone, PartialEq)] -pub enum RecipePrefixKind { - /// The REX emission behavior is not hardcoded for the Recipe - /// and may be overridden when using the Template. - Unspecified, - - /// The Recipe must hardcode the non-emission of the REX prefix. - NeverEmitRex, - - /// The Recipe must hardcode the emission of the REX prefix. - AlwaysEmitRex, - - /// The Recipe should infer the emission of the REX.RXB bits from registers, - /// and the REX.W bit from the EncodingBits. - /// - /// Because such a Recipe has a non-constant instruction size, it must have - /// a special `compute_size` handler for the inferrable-REX case. - InferRex, - - /// The Recipe must hardcode the emission of an EVEX prefix. - Evex, -} - -impl Default for RecipePrefixKind { - fn default() -> Self { - Self::Unspecified - } -} - -/// Previously called a TailRecipe in the Python meta language, this allows to create multiple -/// variants of a single base EncodingRecipe (rex prefix, specialized w/rrr bits, different -/// opcodes). It serves as a prototype of an EncodingRecipe, which is then used when actually creating -/// Encodings, in encodings.rs. This is an idiosyncrasy of the x86 meta-language, and could be -/// reconsidered later. -#[derive(Clone)] -pub(crate) struct Template<'builder> { - /// Description of registers, used in the build() method. - regs: &'builder IsaRegs, - - /// The recipe template, which is to be specialized (by copy). - recipe: EncodingRecipeBuilder, - - /// How is the REX prefix emitted? - rex_kind: RecipePrefixKind, - - /// Function for `compute_size()` when REX is inferrable. - inferred_rex_compute_size: Option<&'static str>, - - /// Other recipe to use when REX-prefixed. - when_prefixed: Option>>, - - // Parameters passed in the EncodingBits. - /// Value of the W bit (0 or 1), stored in the EncodingBits. - w_bit: u16, - /// Value of the RRR bits (between 0 and 0b111). - rrr_bits: u16, - /// Opcode bytes. - op_bytes: &'static [u8], -} - -impl<'builder> Template<'builder> { - fn new(recipe: EncodingRecipeBuilder, regs: &'builder IsaRegs) -> Self { - Self { - regs, - recipe, - rex_kind: RecipePrefixKind::default(), - inferred_rex_compute_size: None, - when_prefixed: None, - w_bit: 0, - rrr_bits: 0, - op_bytes: &opcodes::EMPTY, - } - } - - fn name(&self) -> &str { - &self.recipe.name - } - fn rex_kind(self, kind: RecipePrefixKind) -> Self { - Self { - rex_kind: kind, - ..self - } - } - fn inferred_rex_compute_size(self, function: &'static str) -> Self { - Self { - inferred_rex_compute_size: Some(function), - ..self - } - } - fn when_prefixed(self, template: Rc>) -> Self { - assert!(self.when_prefixed.is_none()); - Self { - when_prefixed: Some(template), - ..self - } - } - - // Copy setters. - pub fn opcodes(&self, op_bytes: &'static [u8]) -> Self { - assert!(!op_bytes.is_empty()); - let mut copy = self.clone(); - copy.op_bytes = op_bytes; - copy - } - pub fn w(&self) -> Self { - let mut copy = self.clone(); - copy.w_bit = 1; - copy - } - pub fn rrr(&self, value: u16) -> Self { - assert!(value <= 0b111); - let mut copy = self.clone(); - copy.rrr_bits = value; - copy - } - pub fn nonrex(&self) -> Self { - assert!( - self.rex_kind != RecipePrefixKind::AlwaysEmitRex, - "Template requires REX prefix." - ); - let mut copy = self.clone(); - copy.rex_kind = RecipePrefixKind::NeverEmitRex; - copy - } - pub fn rex(&self) -> Self { - assert!( - self.rex_kind != RecipePrefixKind::NeverEmitRex, - "Template requires no REX prefix." - ); - if let Some(prefixed) = &self.when_prefixed { - let mut ret = prefixed.rex(); - // Forward specialized parameters. - ret.op_bytes = self.op_bytes; - ret.w_bit = self.w_bit; - ret.rrr_bits = self.rrr_bits; - return ret; - } - let mut copy = self.clone(); - copy.rex_kind = RecipePrefixKind::AlwaysEmitRex; - copy - } - pub fn infer_rex(&self) -> Self { - assert!( - self.rex_kind != RecipePrefixKind::NeverEmitRex, - "Template requires no REX prefix." - ); - assert!( - self.when_prefixed.is_none(), - "infer_rex used with when_prefixed()." - ); - let mut copy = self.clone(); - copy.rex_kind = RecipePrefixKind::InferRex; - copy - } - - pub fn build(mut self) -> (EncodingRecipe, u16) { - let (opcode, bits) = decode_opcodes(&self.op_bytes, self.rrr_bits, self.w_bit); - - let (recipe_name, size_addendum) = match self.rex_kind { - RecipePrefixKind::Unspecified | RecipePrefixKind::NeverEmitRex => { - // Ensure the operands are limited to non-REX constraints. - let operands_in = self.recipe.operands_in.unwrap_or_default(); - self.recipe.operands_in = Some(replace_nonrex_constraints(self.regs, operands_in)); - let operands_out = self.recipe.operands_out.unwrap_or_default(); - self.recipe.operands_out = - Some(replace_nonrex_constraints(self.regs, operands_out)); - - (opcode.into(), self.op_bytes.len() as u64) - } - RecipePrefixKind::AlwaysEmitRex => { - ("Rex".to_string() + opcode, self.op_bytes.len() as u64 + 1) - } - RecipePrefixKind::InferRex => { - assert_eq!(self.w_bit, 0, "A REX.W bit always requires a REX prefix; avoid using `infer_rex().w()` and use `rex().w()` instead."); - // Hook up the right function for inferred compute_size(). - assert!( - self.inferred_rex_compute_size.is_some(), - "InferRex recipe '{}' needs an inferred_rex_compute_size function.", - &self.recipe.name - ); - self.recipe.compute_size = self.inferred_rex_compute_size; - - ("DynRex".to_string() + opcode, self.op_bytes.len() as u64) - } - RecipePrefixKind::Evex => { - // Allow the operands to expand limits to EVEX constraints. - let operands_in = self.recipe.operands_in.unwrap_or_default(); - self.recipe.operands_in = Some(replace_evex_constraints(self.regs, operands_in)); - let operands_out = self.recipe.operands_out.unwrap_or_default(); - self.recipe.operands_out = Some(replace_evex_constraints(self.regs, operands_out)); - - ("Evex".to_string() + opcode, 4 + 1) - } - }; - - self.recipe.base_size += size_addendum; - - // Branch ranges are relative to the end of the instruction. - // For InferRex, the range should be the minimum, assuming no REX. - if let Some(range) = self.recipe.branch_range.as_mut() { - range.inst_size += size_addendum; - } - - self.recipe.emit = replace_put_op(self.recipe.emit, &recipe_name); - self.recipe.name = recipe_name + &self.recipe.name; - - (self.recipe.build(), bits) - } -} - -/// Returns a predicate checking that the "cond" field of the instruction contains one of the -/// directly supported floating point condition codes. -fn supported_floatccs_predicate( - supported_cc: &[Literal], - format: &InstructionFormat, -) -> InstructionPredicate { - supported_cc - .iter() - .fold(InstructionPredicate::new(), |pred, literal| { - pred.or(InstructionPredicate::new_is_field_equal( - format, - "cond", - literal.to_rust_code(), - )) - }) -} - -/// Return an instruction predicate that checks if `iform.imm` is a valid `scale` for a SIB byte. -fn valid_scale(format: &InstructionFormat) -> InstructionPredicate { - ["1", "2", "4", "8"] - .iter() - .fold(InstructionPredicate::new(), |pred, &literal| { - pred.or(InstructionPredicate::new_is_field_equal( - format, - "imm", - literal.into(), - )) - }) -} - -pub(crate) fn define<'shared>( - shared_defs: &'shared SharedDefinitions, - settings: &'shared SettingGroup, - regs: &'shared IsaRegs, -) -> RecipeGroup<'shared> { - // The set of floating point condition codes that are directly supported. - // Other condition codes need to be reversed or expressed as two tests. - let floatcc = &shared_defs.imm.floatcc; - let supported_floatccs: Vec = ["ord", "uno", "one", "ueq", "gt", "ge", "ult", "ule"] - .iter() - .map(|name| Literal::enumerator_for(floatcc, name)) - .collect(); - - // Register classes shorthands. - let abcd = regs.class_by_name("ABCD"); - let gpr = regs.class_by_name("GPR"); - let fpr = regs.class_by_name("FPR"); - let flag = regs.class_by_name("FLAG"); - - // Operand constraints shorthands. - let reg_rflags = Register::new(flag, regs.regunit_by_name(flag, "rflags")); - let reg_rax = Register::new(gpr, regs.regunit_by_name(gpr, "rax")); - let reg_rcx = Register::new(gpr, regs.regunit_by_name(gpr, "rcx")); - let reg_rdx = Register::new(gpr, regs.regunit_by_name(gpr, "rdx")); - let reg_r15 = Register::new(gpr, regs.regunit_by_name(gpr, "r15")); - let reg_xmm0 = Register::new(fpr, regs.regunit_by_name(fpr, "xmm0")); - - // Stack operand with a 32-bit signed displacement from either RBP or RSP. - let stack_gpr32 = Stack::new(gpr); - let stack_fpr32 = Stack::new(fpr); - - let formats = &shared_defs.formats; - - // Predicates shorthands. - let use_sse41 = settings.predicate_by_name("use_sse41"); - - // Definitions. - let mut recipes = RecipeGroup::new(regs); - - // A null unary instruction that takes a GPR register. Can be used for identity copies and - // no-op conversions. - recipes.add_recipe( - EncodingRecipeBuilder::new("null", &formats.unary, 0) - .operands_in(vec![gpr]) - .operands_out(vec![0]) - .emit(""), - ); - recipes.add_recipe( - EncodingRecipeBuilder::new("null_fpr", &formats.unary, 0) - .operands_in(vec![fpr]) - .operands_out(vec![0]) - .emit(""), - ); - recipes.add_recipe( - EncodingRecipeBuilder::new("stacknull", &formats.unary, 0) - .operands_in(vec![stack_gpr32]) - .operands_out(vec![stack_gpr32]) - .emit(""), - ); - - recipes.add_recipe( - EncodingRecipeBuilder::new("get_pinned_reg", &formats.nullary, 0) - .operands_out(vec![reg_r15]) - .emit(""), - ); - // umr with a fixed register output that's r15. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("set_pinned_reg", &formats.unary, 1) - .operands_in(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - let r15 = RU::r15.into(); - {{PUT_OP}}(bits, rex2(r15, in_reg0), sink); - modrm_rr(r15, in_reg0, sink); - "#, - ), - ); - - // No-op fills, created by late-stage redundant-fill removal. - recipes.add_recipe( - EncodingRecipeBuilder::new("fillnull", &formats.unary, 0) - .operands_in(vec![stack_gpr32]) - .operands_out(vec![gpr]) - .clobbers_flags(false) - .emit(""), - ); - recipes.add_recipe( - EncodingRecipeBuilder::new("ffillnull", &formats.unary, 0) - .operands_in(vec![stack_gpr32]) - .operands_out(vec![fpr]) - .clobbers_flags(false) - .emit(""), - ); - - recipes.add_recipe( - EncodingRecipeBuilder::new("debugtrap", &formats.nullary, 1).emit("sink.put1(0xcc);"), - ); - - // XX opcode, no ModR/M. - recipes.add_template_recipe(EncodingRecipeBuilder::new("trap", &formats.trap, 0).emit( - r#" - sink.trap(code, func.srclocs[inst]); - {{PUT_OP}}(bits, BASE_REX, sink); - "#, - )); - - // Macro: conditional jump over a ud2. - recipes.add_recipe( - EncodingRecipeBuilder::new("trapif", &formats.int_cond_trap, 4) - .operands_in(vec![reg_rflags]) - .clobbers_flags(false) - .emit( - r#" - // Jump over a 2-byte ud2. - sink.put1(0x70 | (icc2opc(cond.inverse()) as u8)); - sink.put1(2); - // ud2. - sink.trap(code, func.srclocs[inst]); - sink.put1(0x0f); - sink.put1(0x0b); - "#, - ), - ); - - recipes.add_recipe( - EncodingRecipeBuilder::new("trapff", &formats.float_cond_trap, 4) - .operands_in(vec![reg_rflags]) - .clobbers_flags(false) - .inst_predicate(supported_floatccs_predicate( - &supported_floatccs, - &*formats.float_cond_trap, - )) - .emit( - r#" - // Jump over a 2-byte ud2. - sink.put1(0x70 | (fcc2opc(cond.inverse()) as u8)); - sink.put1(2); - // ud2. - sink.trap(code, func.srclocs[inst]); - sink.put1(0x0f); - sink.put1(0x0b); - "#, - ), - ); - - // XX /r - recipes.add_template_inferred( - EncodingRecipeBuilder::new("rr", &formats.binary, 1) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![0]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); - modrm_rr(in_reg0, in_reg1, sink); - "#, - ), - "size_with_inferred_rex_for_inreg0_inreg1", - ); - - // XX /r with operands swapped. (RM form). - recipes.add_template_inferred( - EncodingRecipeBuilder::new("rrx", &formats.binary, 1) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![0]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - modrm_rr(in_reg1, in_reg0, sink); - "#, - ), - "size_with_inferred_rex_for_inreg0_inreg1", - ); - - // XX /r with FPR ins and outs. A form. - recipes.add_template_inferred( - EncodingRecipeBuilder::new("fa", &formats.binary, 1) - .operands_in(vec![fpr, fpr]) - .operands_out(vec![0]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - modrm_rr(in_reg1, in_reg0, sink); - "#, - ), - "size_with_inferred_rex_for_inreg0_inreg1", - ); - - // XX /r with FPR ins and outs. A form with input operands swapped. - recipes.add_template_inferred( - EncodingRecipeBuilder::new("fax", &formats.binary, 1) - .operands_in(vec![fpr, fpr]) - .operands_out(vec![1]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); - modrm_rr(in_reg0, in_reg1, sink); - "#, - ), - // The operand order does not matter for calculating whether a REX prefix is needed. - "size_with_inferred_rex_for_inreg0_inreg1", - ); - - // XX /r with FPR ins and outs. A form with a byte immediate. - { - recipes.add_template_inferred( - EncodingRecipeBuilder::new("fa_ib", &formats.ternary_imm8, 2) - .operands_in(vec![fpr, fpr]) - .operands_out(vec![0]) - .inst_predicate(InstructionPredicate::new_is_unsigned_int( - &*formats.ternary_imm8, - "imm", - 8, - 0, - )) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - modrm_rr(in_reg1, in_reg0, sink); - let imm: i64 = imm.into(); - sink.put1(imm as u8); - "#, - ), - "size_with_inferred_rex_for_inreg0_inreg1", - ); - } - - // XX /n for a unary operation with extension bits. - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("ur", &formats.unary, 1) - .operands_in(vec![gpr]) - .operands_out(vec![0]) - .emit( - r#" - {{PUT_OP}}(bits, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), - ); - - // XX /r, but for a unary operator with separate input/output register, like - // copies. MR form, preserving flags. - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("umr", &formats.unary, 1) - .operands_in(vec![gpr]) - .operands_out(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(out_reg0, in_reg0), sink); - modrm_rr(out_reg0, in_reg0, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_outreg0"), - ); - - // Same as umr, but with FPR -> GPR registers. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("rfumr", &formats.unary, 1) - .operands_in(vec![fpr]) - .operands_out(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(out_reg0, in_reg0), sink); - modrm_rr(out_reg0, in_reg0, sink); - "#, - ), - ); - - // Same as umr, but with the source register specified directly. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("umr_reg_to_ssa", &formats.copy_to_ssa, 1) - // No operands_in to mention, because a source register is specified directly. - .operands_out(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(out_reg0, src), sink); - modrm_rr(out_reg0, src, sink); - "#, - ), - ); - - // XX /r, but for a unary operator with separate input/output register. - // RM form. Clobbers FLAGS. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("urm", &formats.unary, 1) - .operands_in(vec![gpr]) - .operands_out(vec![gpr]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - modrm_rr(in_reg0, out_reg0, sink); - "#, - ), - ); - - // XX /r. Same as urm, but doesn't clobber FLAGS. - let urm_noflags = recipes.add_template_recipe( - EncodingRecipeBuilder::new("urm_noflags", &formats.unary, 1) - .operands_in(vec![gpr]) - .operands_out(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - modrm_rr(in_reg0, out_reg0, sink); - "#, - ), - ); - - // XX /r. Same as urm_noflags, but input limited to ABCD. - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("urm_noflags_abcd", &formats.unary, 1) - .operands_in(vec![abcd]) - .operands_out(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - modrm_rr(in_reg0, out_reg0, sink); - "#, - ), - regs, - ) - .when_prefixed(urm_noflags), - ); - - // XX /r, RM form, FPR -> FPR. - recipes.add_template_inferred( - EncodingRecipeBuilder::new("furm", &formats.unary, 1) - .operands_in(vec![fpr]) - .operands_out(vec![fpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - modrm_rr(in_reg0, out_reg0, sink); - "#, - ), - "size_with_inferred_rex_for_inreg0_outreg0", - ); - - // Same as furm, but with the source register specified directly. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("furm_reg_to_ssa", &formats.copy_to_ssa, 1) - // No operands_in to mention, because a source register is specified directly. - .operands_out(vec![fpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(src, out_reg0), sink); - modrm_rr(src, out_reg0, sink); - "#, - ), - ); - - // XX /r, RM form, GPR -> FPR. - recipes.add_template_inferred( - EncodingRecipeBuilder::new("frurm", &formats.unary, 1) - .operands_in(vec![gpr]) - .operands_out(vec![fpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - modrm_rr(in_reg0, out_reg0, sink); - "#, - ), - "size_with_inferred_rex_for_inreg0_outreg0", - ); - - // XX /r, RM form, FPR -> GPR. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("rfurm", &formats.unary, 1) - .operands_in(vec![fpr]) - .operands_out(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - modrm_rr(in_reg0, out_reg0, sink); - "#, - ), - ); - - // XX /r, RMI form for one of the roundXX SSE 4.1 instructions. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("furmi_rnd", &formats.unary, 2) - .operands_in(vec![fpr]) - .operands_out(vec![fpr]) - .isa_predicate(use_sse41) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - modrm_rr(in_reg0, out_reg0, sink); - sink.put1(match opcode { - Opcode::Nearest => 0b00, - Opcode::Floor => 0b01, - Opcode::Ceil => 0b10, - Opcode::Trunc => 0b11, - x => panic!("{} unexpected for furmi_rnd", opcode), - }); - "#, - ), - ); - - // XX /r, for regmove instructions. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("rmov", &formats.reg_move, 1) - .operands_in(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(dst, src), sink); - modrm_rr(dst, src, sink); - "#, - ), - ); - - // XX /r, for regmove instructions (FPR version, RM encoded). - recipes.add_template_recipe( - EncodingRecipeBuilder::new("frmov", &formats.reg_move, 1) - .operands_in(vec![fpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(src, dst), sink); - modrm_rr(src, dst, sink); - "#, - ), - ); - - // XX /n with one arg in %rcx, for shifts. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("rc", &formats.binary, 1) - .operands_in(vec![ - OperandConstraint::RegClass(gpr), - OperandConstraint::FixedReg(reg_rcx), - ]) - .operands_out(vec![0]) - .emit( - r#" - {{PUT_OP}}(bits, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - "#, - ), - ); - - // XX /n for division: inputs in %rax, %rdx, r. Outputs in %rax, %rdx. - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("div", &formats.ternary, 1) - .operands_in(vec![ - OperandConstraint::FixedReg(reg_rax), - OperandConstraint::FixedReg(reg_rdx), - OperandConstraint::RegClass(gpr), - ]) - .operands_out(vec![reg_rax, reg_rdx]) - .emit( - r#" - sink.trap(TrapCode::IntegerDivisionByZero, func.srclocs[inst]); - {{PUT_OP}}(bits, rex1(in_reg2), sink); - modrm_r_bits(in_reg2, bits, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg2"), - ); - - // XX /n for {s,u}mulx: inputs in %rax, r. Outputs in %rdx(hi):%rax(lo) - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("mulx", &formats.binary, 1) - .operands_in(vec![ - OperandConstraint::FixedReg(reg_rax), - OperandConstraint::RegClass(gpr), - ]) - .operands_out(vec![ - OperandConstraint::FixedReg(reg_rax), - OperandConstraint::FixedReg(reg_rdx), - ]) - .emit( - r#" - {{PUT_OP}}(bits, rex1(in_reg1), sink); - modrm_r_bits(in_reg1, bits, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg1"), - ); - - // XX /r for BLEND* instructions - recipes.add_template_inferred( - EncodingRecipeBuilder::new("blend", &formats.ternary, 1) - .operands_in(vec![ - OperandConstraint::FixedReg(reg_xmm0), - OperandConstraint::RegClass(fpr), - OperandConstraint::RegClass(fpr), - ]) - .operands_out(vec![2]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg1, in_reg2), sink); - modrm_rr(in_reg1, in_reg2, sink); - "#, - ), - "size_with_inferred_rex_for_inreg1_inreg2", - ); - - // XX /n ib with 8-bit immediate sign-extended. - { - recipes.add_template_inferred( - EncodingRecipeBuilder::new("r_ib", &formats.binary_imm64, 2) - .operands_in(vec![gpr]) - .operands_out(vec![0]) - .inst_predicate(InstructionPredicate::new_is_signed_int( - &*formats.binary_imm64, - "imm", - 8, - 0, - )) - .emit( - r#" - {{PUT_OP}}(bits, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - let imm: i64 = imm.into(); - sink.put1(imm as u8); - "#, - ), - "size_with_inferred_rex_for_inreg0", - ); - - recipes.add_template_inferred( - EncodingRecipeBuilder::new("f_ib", &formats.binary_imm64, 2) - .operands_in(vec![fpr]) - .operands_out(vec![0]) - .inst_predicate(InstructionPredicate::new_is_signed_int( - &*formats.binary_imm64, - "imm", - 8, - 0, - )) - .emit( - r#" - {{PUT_OP}}(bits, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - let imm: i64 = imm.into(); - sink.put1(imm as u8); - "#, - ), - "size_with_inferred_rex_for_inreg0", - ); - - // XX /n id with 32-bit immediate sign-extended. - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("r_id", &formats.binary_imm64, 5) - .operands_in(vec![gpr]) - .operands_out(vec![0]) - .inst_predicate(InstructionPredicate::new_is_signed_int( - &*formats.binary_imm64, - "imm", - 32, - 0, - )) - .emit( - r#" - {{PUT_OP}}(bits, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - let imm: i64 = imm.into(); - sink.put4(imm as u32); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), - ); - } - - // XX /r ib with 8-bit unsigned immediate (e.g. for pshufd) - { - recipes.add_template_inferred( - EncodingRecipeBuilder::new("r_ib_unsigned_fpr", &formats.binary_imm8, 2) - .operands_in(vec![fpr]) - .operands_out(vec![fpr]) - .inst_predicate(InstructionPredicate::new_is_unsigned_int( - &*formats.binary_imm8, - "imm", - 8, - 0, - )) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - modrm_rr(in_reg0, out_reg0, sink); - let imm: i64 = imm.into(); - sink.put1(imm as u8); - "#, - ), - "size_with_inferred_rex_for_inreg0_outreg0", - ); - } - - // XX /r ib with 8-bit unsigned immediate (e.g. for extractlane) - { - recipes.add_template_inferred( - EncodingRecipeBuilder::new("r_ib_unsigned_gpr", &formats.binary_imm8, 2) - .operands_in(vec![fpr]) - .operands_out(vec![gpr]) - .inst_predicate(InstructionPredicate::new_is_unsigned_int( - &*formats.binary_imm8, "imm", 8, 0, - )) - .emit( - r#" - {{PUT_OP}}(bits, rex2(out_reg0, in_reg0), sink); - modrm_rr(out_reg0, in_reg0, sink); // note the flipped register in the ModR/M byte - let imm: i64 = imm.into(); - sink.put1(imm as u8); - "#, - ), "size_with_inferred_rex_for_inreg0_outreg0" - ); - } - - // XX /r ib with 8-bit unsigned immediate (e.g. for insertlane) - { - recipes.add_template_inferred( - EncodingRecipeBuilder::new("r_ib_unsigned_r", &formats.ternary_imm8, 2) - .operands_in(vec![fpr, gpr]) - .operands_out(vec![0]) - .inst_predicate(InstructionPredicate::new_is_unsigned_int( - &*formats.ternary_imm8, - "imm", - 8, - 0, - )) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - modrm_rr(in_reg1, in_reg0, sink); - let imm: i64 = imm.into(); - sink.put1(imm as u8); - "#, - ), - "size_with_inferred_rex_for_inreg0_inreg1", - ); - } - - { - // XX /n id with 32-bit immediate sign-extended. UnaryImm version. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("u_id", &formats.unary_imm, 5) - .operands_out(vec![gpr]) - .inst_predicate(InstructionPredicate::new_is_signed_int( - &*formats.unary_imm, - "imm", - 32, - 0, - )) - .emit( - r#" - {{PUT_OP}}(bits, rex1(out_reg0), sink); - modrm_r_bits(out_reg0, bits, sink); - let imm: i64 = imm.into(); - sink.put4(imm as u32); - "#, - ), - ); - } - - // XX+rd id unary with 32-bit immediate. Note no recipe predicate. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("pu_id", &formats.unary_imm, 4) - .operands_out(vec![gpr]) - .emit( - r#" - // The destination register is encoded in the low bits of the opcode. - // No ModR/M. - {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); - let imm: i64 = imm.into(); - sink.put4(imm as u32); - "#, - ), - ); - - // XX+rd id unary with bool immediate. Note no recipe predicate. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("pu_id_bool", &formats.unary_bool, 4) - .operands_out(vec![gpr]) - .emit( - r#" - // The destination register is encoded in the low bits of the opcode. - // No ModR/M. - {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); - let imm: u32 = if imm { 1 } else { 0 }; - sink.put4(imm); - "#, - ), - ); - - // XX+rd id nullary with 0 as 32-bit immediate. Note no recipe predicate. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("pu_id_ref", &formats.nullary, 4) - .operands_out(vec![gpr]) - .emit( - r#" - // The destination register is encoded in the low bits of the opcode. - // No ModR/M. - {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); - sink.put4(0); - "#, - ), - ); - - // XX+rd iq unary with 64-bit immediate. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("pu_iq", &formats.unary_imm, 8) - .operands_out(vec![gpr]) - .emit( - r#" - {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); - let imm: i64 = imm.into(); - sink.put8(imm as u64); - "#, - ), - ); - - // XX+rd id unary with zero immediate. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("u_id_z", &formats.unary_imm, 1) - .operands_out(vec![gpr]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink); - modrm_rr(out_reg0, out_reg0, sink); - "#, - ), - ); - - // XX /n Unary with floating point 32-bit immediate equal to zero. - { - recipes.add_template_recipe( - EncodingRecipeBuilder::new("f32imm_z", &formats.unary_ieee32, 1) - .operands_out(vec![fpr]) - .inst_predicate(InstructionPredicate::new_is_zero_32bit_float( - &*formats.unary_ieee32, - "imm", - )) - .emit( - r#" - {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink); - modrm_rr(out_reg0, out_reg0, sink); - "#, - ), - ); - } - - // XX /n Unary with floating point 64-bit immediate equal to zero. - { - recipes.add_template_recipe( - EncodingRecipeBuilder::new("f64imm_z", &formats.unary_ieee64, 1) - .operands_out(vec![fpr]) - .inst_predicate(InstructionPredicate::new_is_zero_64bit_float( - &*formats.unary_ieee64, - "imm", - )) - .emit( - r#" - {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink); - modrm_rr(out_reg0, out_reg0, sink); - "#, - ), - ); - } - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("pushq", &formats.unary, 0) - .operands_in(vec![gpr]) - .emit( - r#" - sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); - {{PUT_OP}}(bits | (in_reg0 & 7), rex1(in_reg0), sink); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("popq", &formats.nullary, 0) - .operands_out(vec![gpr]) - .emit( - r#" - {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); - "#, - ), - ); - - // XX /r, for regmove instructions. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("copysp", &formats.copy_special, 1) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(dst, src), sink); - modrm_rr(dst, src, sink); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("adjustsp", &formats.unary, 1) - .operands_in(vec![gpr]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(RU::rsp.into(), in_reg0), sink); - modrm_rr(RU::rsp.into(), in_reg0, sink); - "#, - ), - ); - - { - recipes.add_template_recipe( - EncodingRecipeBuilder::new("adjustsp_ib", &formats.unary_imm, 2) - .inst_predicate(InstructionPredicate::new_is_signed_int( - &*formats.unary_imm, - "imm", - 8, - 0, - )) - .emit( - r#" - {{PUT_OP}}(bits, rex1(RU::rsp.into()), sink); - modrm_r_bits(RU::rsp.into(), bits, sink); - let imm: i64 = imm.into(); - sink.put1(imm as u8); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("adjustsp_id", &formats.unary_imm, 5) - .inst_predicate(InstructionPredicate::new_is_signed_int( - &*formats.unary_imm, - "imm", - 32, - 0, - )) - .emit( - r#" - {{PUT_OP}}(bits, rex1(RU::rsp.into()), sink); - modrm_r_bits(RU::rsp.into(), bits, sink); - let imm: i64 = imm.into(); - sink.put4(imm as u32); - "#, - ), - ); - } - - recipes.add_recipe( - EncodingRecipeBuilder::new("dummy_sarg_t", &formats.nullary, 0) - .operands_out(vec![Stack::new(gpr)]) - .emit(""), - ); - - // XX+rd id with Abs4 function relocation. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("fnaddr4", &formats.func_addr, 4) - .operands_out(vec![gpr]) - .emit( - r#" - {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); - sink.reloc_external(func.srclocs[inst], - Reloc::Abs4, - &func.dfg.ext_funcs[func_ref].name, - 0); - sink.put4(0); - "#, - ), - ); - - // XX+rd iq with Abs8 function relocation. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("fnaddr8", &formats.func_addr, 8) - .operands_out(vec![gpr]) - .emit( - r#" - {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); - sink.reloc_external(func.srclocs[inst], - Reloc::Abs8, - &func.dfg.ext_funcs[func_ref].name, - 0); - sink.put8(0); - "#, - ), - ); - - // Similar to fnaddr4, but writes !0 (this is used by BaldrMonkey). - recipes.add_template_recipe( - EncodingRecipeBuilder::new("allones_fnaddr4", &formats.func_addr, 4) - .operands_out(vec![gpr]) - .emit( - r#" - {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); - sink.reloc_external(func.srclocs[inst], - Reloc::Abs4, - &func.dfg.ext_funcs[func_ref].name, - 0); - // Write the immediate as `!0` for the benefit of BaldrMonkey. - sink.put4(!0); - "#, - ), - ); - - // Similar to fnaddr8, but writes !0 (this is used by BaldrMonkey). - recipes.add_template_recipe( - EncodingRecipeBuilder::new("allones_fnaddr8", &formats.func_addr, 8) - .operands_out(vec![gpr]) - .emit( - r#" - {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); - sink.reloc_external(func.srclocs[inst], - Reloc::Abs8, - &func.dfg.ext_funcs[func_ref].name, - 0); - // Write the immediate as `!0` for the benefit of BaldrMonkey. - sink.put8(!0); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("pcrel_fnaddr8", &formats.func_addr, 5) - .operands_out(vec![gpr]) - // rex2 gets passed 0 for r/m register because the upper bit of - // r/m doesn't get decoded when in rip-relative addressing mode. - .emit( - r#" - {{PUT_OP}}(bits, rex2(0, out_reg0), sink); - modrm_riprel(out_reg0, sink); - // The addend adjusts for the difference between the end of the - // instruction and the beginning of the immediate field. - sink.reloc_external(func.srclocs[inst], - Reloc::X86PCRel4, - &func.dfg.ext_funcs[func_ref].name, - -4); - sink.put4(0); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("got_fnaddr8", &formats.func_addr, 5) - .operands_out(vec![gpr]) - // rex2 gets passed 0 for r/m register because the upper bit of - // r/m doesn't get decoded when in rip-relative addressing mode. - .emit( - r#" - {{PUT_OP}}(bits, rex2(0, out_reg0), sink); - modrm_riprel(out_reg0, sink); - // The addend adjusts for the difference between the end of the - // instruction and the beginning of the immediate field. - sink.reloc_external(func.srclocs[inst], - Reloc::X86GOTPCRel4, - &func.dfg.ext_funcs[func_ref].name, - -4); - sink.put4(0); - "#, - ), - ); - - // XX+rd id with Abs4 globalsym relocation. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("gvaddr4", &formats.unary_global_value, 4) - .operands_out(vec![gpr]) - .emit( - r#" - {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); - sink.reloc_external(func.srclocs[inst], - Reloc::Abs4, - &func.global_values[global_value].symbol_name(), - 0); - sink.put4(0); - "#, - ), - ); - - // XX+rd iq with Abs8 globalsym relocation. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("gvaddr8", &formats.unary_global_value, 8) - .operands_out(vec![gpr]) - .emit( - r#" - {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); - sink.reloc_external(func.srclocs[inst], - Reloc::Abs8, - &func.global_values[global_value].symbol_name(), - 0); - sink.put8(0); - "#, - ), - ); - - // XX+rd iq with PCRel4 globalsym relocation. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("pcrel_gvaddr8", &formats.unary_global_value, 5) - .operands_out(vec![gpr]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(0, out_reg0), sink); - modrm_rm(5, out_reg0, sink); - // The addend adjusts for the difference between the end of the - // instruction and the beginning of the immediate field. - sink.reloc_external(func.srclocs[inst], - Reloc::X86PCRel4, - &func.global_values[global_value].symbol_name(), - -4); - sink.put4(0); - "#, - ), - ); - - // XX+rd iq with Abs8 globalsym relocation. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("got_gvaddr8", &formats.unary_global_value, 5) - .operands_out(vec![gpr]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(0, out_reg0), sink); - modrm_rm(5, out_reg0, sink); - // The addend adjusts for the difference between the end of the - // instruction and the beginning of the immediate field. - sink.reloc_external(func.srclocs[inst], - Reloc::X86GOTPCRel4, - &func.global_values[global_value].symbol_name(), - -4); - sink.put4(0); - "#, - ), - ); - - // Stack addresses. - // - // TODO Alternative forms for 8-bit immediates, when applicable. - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("spaddr_id", &formats.stack_load, 6) - .operands_out(vec![gpr]) - .emit( - r#" - let sp = StackRef::sp(stack_slot, &func.stack_slots); - let base = stk_base(sp.base); - {{PUT_OP}}(bits, rex2(base, out_reg0), sink); - modrm_sib_disp32(out_reg0, sink); - sib_noindex(base, sink); - let imm : i32 = offset.into(); - sink.put4(sp.offset.checked_add(imm).unwrap() as u32); - "#, - ), - ); - - // Constant addresses. - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("const_addr", &formats.unary_const, 5) - .operands_out(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(0, out_reg0), sink); - modrm_riprel(out_reg0, sink); - const_disp4(constant_handle, func, sink); - "#, - ), - ); - - // Store recipes. - - { - // Simple stores. - - // A predicate asking if the offset is zero. - let has_no_offset = - InstructionPredicate::new_is_field_equal(&*formats.store, "offset", "0".into()); - - // XX /r register-indirect store with no offset. - let st = recipes.add_template_recipe( - EncodingRecipeBuilder::new("st", &formats.store, 1) - .operands_in(vec![gpr, gpr]) - .inst_predicate(has_no_offset.clone()) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_or_offset_for_inreg_1") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - if needs_sib_byte(in_reg1) { - modrm_sib(in_reg0, sink); - sib_noindex(in_reg1, sink); - } else if needs_offset(in_reg1) { - modrm_disp8(in_reg1, in_reg0, sink); - sink.put1(0); - } else { - modrm_rm(in_reg1, in_reg0, sink); - } - "#, - ), - ); - - // XX /r register-indirect store with no offset. - // Only ABCD allowed for stored value. This is for byte stores with no REX. - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("st_abcd", &formats.store, 1) - .operands_in(vec![abcd, gpr]) - .inst_predicate(has_no_offset.clone()) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_or_offset_for_inreg_1") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - if needs_sib_byte(in_reg1) { - modrm_sib(in_reg0, sink); - sib_noindex(in_reg1, sink); - } else if needs_offset(in_reg1) { - modrm_disp8(in_reg1, in_reg0, sink); - sink.put1(0); - } else { - modrm_rm(in_reg1, in_reg0, sink); - } - "#, - ), - regs, - ) - .when_prefixed(st), - ); - - // XX /r register-indirect store of FPR with no offset. - recipes.add_template_inferred( - EncodingRecipeBuilder::new("fst", &formats.store, 1) - .operands_in(vec![fpr, gpr]) - .inst_predicate(has_no_offset) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_or_offset_for_inreg_1") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - if needs_sib_byte(in_reg1) { - modrm_sib(in_reg0, sink); - sib_noindex(in_reg1, sink); - } else if needs_offset(in_reg1) { - modrm_disp8(in_reg1, in_reg0, sink); - sink.put1(0); - } else { - modrm_rm(in_reg1, in_reg0, sink); - } - "#, - ), - "size_plus_maybe_sib_or_offset_inreg1_plus_rex_prefix_for_inreg0_inreg1", - ); - - let has_small_offset = - InstructionPredicate::new_is_signed_int(&*formats.store, "offset", 8, 0); - - // XX /r register-indirect store with 8-bit offset. - let st_disp8 = recipes.add_template_recipe( - EncodingRecipeBuilder::new("stDisp8", &formats.store, 2) - .operands_in(vec![gpr, gpr]) - .inst_predicate(has_small_offset.clone()) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_for_inreg_1") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - if needs_sib_byte(in_reg1) { - modrm_sib_disp8(in_reg0, sink); - sib_noindex(in_reg1, sink); - } else { - modrm_disp8(in_reg1, in_reg0, sink); - } - let offset: i32 = offset.into(); - sink.put1(offset as u8); - "#, - ), - ); - - // XX /r register-indirect store with 8-bit offset. - // Only ABCD allowed for stored value. This is for byte stores with no REX. - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("stDisp8_abcd", &formats.store, 2) - .operands_in(vec![abcd, gpr]) - .inst_predicate(has_small_offset.clone()) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_for_inreg_1") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - if needs_sib_byte(in_reg1) { - modrm_sib_disp8(in_reg0, sink); - sib_noindex(in_reg1, sink); - } else { - modrm_disp8(in_reg1, in_reg0, sink); - } - let offset: i32 = offset.into(); - sink.put1(offset as u8); - "#, - ), - regs, - ) - .when_prefixed(st_disp8), - ); - - // XX /r register-indirect store with 8-bit offset of FPR. - recipes.add_template_inferred( - EncodingRecipeBuilder::new("fstDisp8", &formats.store, 2) - .operands_in(vec![fpr, gpr]) - .inst_predicate(has_small_offset) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_for_inreg_1") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - if needs_sib_byte(in_reg1) { - modrm_sib_disp8(in_reg0, sink); - sib_noindex(in_reg1, sink); - } else { - modrm_disp8(in_reg1, in_reg0, sink); - } - let offset: i32 = offset.into(); - sink.put1(offset as u8); - "#, - ), - "size_plus_maybe_sib_inreg1_plus_rex_prefix_for_inreg0_inreg1", - ); - - // XX /r register-indirect store with 32-bit offset. - let st_disp32 = recipes.add_template_recipe( - EncodingRecipeBuilder::new("stDisp32", &formats.store, 5) - .operands_in(vec![gpr, gpr]) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_for_inreg_1") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - if needs_sib_byte(in_reg1) { - modrm_sib_disp32(in_reg0, sink); - sib_noindex(in_reg1, sink); - } else { - modrm_disp32(in_reg1, in_reg0, sink); - } - let offset: i32 = offset.into(); - sink.put4(offset as u32); - "#, - ), - ); - - // XX /r register-indirect store with 32-bit offset. - // Only ABCD allowed for stored value. This is for byte stores with no REX. - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("stDisp32_abcd", &formats.store, 5) - .operands_in(vec![abcd, gpr]) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_for_inreg_1") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - if needs_sib_byte(in_reg1) { - modrm_sib_disp32(in_reg0, sink); - sib_noindex(in_reg1, sink); - } else { - modrm_disp32(in_reg1, in_reg0, sink); - } - let offset: i32 = offset.into(); - sink.put4(offset as u32); - "#, - ), - regs, - ) - .when_prefixed(st_disp32), - ); - - // XX /r register-indirect store with 32-bit offset of FPR. - recipes.add_template_inferred( - EncodingRecipeBuilder::new("fstDisp32", &formats.store, 5) - .operands_in(vec![fpr, gpr]) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_for_inreg_1") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - if needs_sib_byte(in_reg1) { - modrm_sib_disp32(in_reg0, sink); - sib_noindex(in_reg1, sink); - } else { - modrm_disp32(in_reg1, in_reg0, sink); - } - let offset: i32 = offset.into(); - sink.put4(offset as u32); - "#, - ), - "size_plus_maybe_sib_inreg1_plus_rex_prefix_for_inreg0_inreg1", - ); - } - - { - // Complex stores. - - // A predicate asking if the offset is zero. - let has_no_offset = - InstructionPredicate::new_is_field_equal(&*formats.store_complex, "offset", "0".into()); - - // XX /r register-indirect store with index and no offset. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("stWithIndex", &formats.store_complex, 2) - .operands_in(vec![gpr, gpr, gpr]) - .inst_predicate(has_no_offset.clone()) - .clobbers_flags(false) - .compute_size("size_plus_maybe_offset_for_inreg_1") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); - // The else branch always inserts an SIB byte. - if needs_offset(in_reg1) { - modrm_sib_disp8(in_reg0, sink); - sib(0, in_reg2, in_reg1, sink); - sink.put1(0); - } else { - modrm_sib(in_reg0, sink); - sib(0, in_reg2, in_reg1, sink); - } - "#, - ), - ); - - // XX /r register-indirect store with index and no offset. - // Only ABCD allowed for stored value. This is for byte stores with no REX. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("stWithIndex_abcd", &formats.store_complex, 2) - .operands_in(vec![abcd, gpr, gpr]) - .inst_predicate(has_no_offset.clone()) - .clobbers_flags(false) - .compute_size("size_plus_maybe_offset_for_inreg_1") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); - // The else branch always inserts an SIB byte. - if needs_offset(in_reg1) { - modrm_sib_disp8(in_reg0, sink); - sib(0, in_reg2, in_reg1, sink); - sink.put1(0); - } else { - modrm_sib(in_reg0, sink); - sib(0, in_reg2, in_reg1, sink); - } - "#, - ), - ); - - // XX /r register-indirect store with index and no offset of FPR. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("fstWithIndex", &formats.store_complex, 2) - .operands_in(vec![fpr, gpr, gpr]) - .inst_predicate(has_no_offset) - .clobbers_flags(false) - .compute_size("size_plus_maybe_offset_for_inreg_1") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); - // The else branch always inserts an SIB byte. - if needs_offset(in_reg1) { - modrm_sib_disp8(in_reg0, sink); - sib(0, in_reg2, in_reg1, sink); - sink.put1(0); - } else { - modrm_sib(in_reg0, sink); - sib(0, in_reg2, in_reg1, sink); - } - "#, - ), - ); - - let has_small_offset = - InstructionPredicate::new_is_signed_int(&*formats.store_complex, "offset", 8, 0); - - // XX /r register-indirect store with index and 8-bit offset. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("stWithIndexDisp8", &formats.store_complex, 3) - .operands_in(vec![gpr, gpr, gpr]) - .inst_predicate(has_small_offset.clone()) - .clobbers_flags(false) - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); - modrm_sib_disp8(in_reg0, sink); - sib(0, in_reg2, in_reg1, sink); - let offset: i32 = offset.into(); - sink.put1(offset as u8); - "#, - ), - ); - - // XX /r register-indirect store with index and 8-bit offset. - // Only ABCD allowed for stored value. This is for byte stores with no REX. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("stWithIndexDisp8_abcd", &formats.store_complex, 3) - .operands_in(vec![abcd, gpr, gpr]) - .inst_predicate(has_small_offset.clone()) - .clobbers_flags(false) - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); - modrm_sib_disp8(in_reg0, sink); - sib(0, in_reg2, in_reg1, sink); - let offset: i32 = offset.into(); - sink.put1(offset as u8); - "#, - ), - ); - - // XX /r register-indirect store with index and 8-bit offset of FPR. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("fstWithIndexDisp8", &formats.store_complex, 3) - .operands_in(vec![fpr, gpr, gpr]) - .inst_predicate(has_small_offset) - .clobbers_flags(false) - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); - modrm_sib_disp8(in_reg0, sink); - sib(0, in_reg2, in_reg1, sink); - let offset: i32 = offset.into(); - sink.put1(offset as u8); - "#, - ), - ); - - let has_big_offset = - InstructionPredicate::new_is_signed_int(&*formats.store_complex, "offset", 32, 0); - - // XX /r register-indirect store with index and 32-bit offset. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("stWithIndexDisp32", &formats.store_complex, 6) - .operands_in(vec![gpr, gpr, gpr]) - .inst_predicate(has_big_offset.clone()) - .clobbers_flags(false) - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); - modrm_sib_disp32(in_reg0, sink); - sib(0, in_reg2, in_reg1, sink); - let offset: i32 = offset.into(); - sink.put4(offset as u32); - "#, - ), - ); - - // XX /r register-indirect store with index and 32-bit offset. - // Only ABCD allowed for stored value. This is for byte stores with no REX. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("stWithIndexDisp32_abcd", &formats.store_complex, 6) - .operands_in(vec![abcd, gpr, gpr]) - .inst_predicate(has_big_offset.clone()) - .clobbers_flags(false) - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); - modrm_sib_disp32(in_reg0, sink); - sib(0, in_reg2, in_reg1, sink); - let offset: i32 = offset.into(); - sink.put4(offset as u32); - "#, - ), - ); - - // XX /r register-indirect store with index and 32-bit offset of FPR. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("fstWithIndexDisp32", &formats.store_complex, 6) - .operands_in(vec![fpr, gpr, gpr]) - .inst_predicate(has_big_offset) - .clobbers_flags(false) - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); - modrm_sib_disp32(in_reg0, sink); - sib(0, in_reg2, in_reg1, sink); - let offset: i32 = offset.into(); - sink.put4(offset as u32); - "#, - ), - ); - } - - // Unary spill with SIB and 32-bit displacement. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("spillSib32", &formats.unary, 6) - .operands_in(vec![gpr]) - .operands_out(vec![stack_gpr32]) - .clobbers_flags(false) - .emit( - r#" - sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); - let base = stk_base(out_stk0.base); - {{PUT_OP}}(bits, rex2(base, in_reg0), sink); - modrm_sib_disp32(in_reg0, sink); - sib_noindex(base, sink); - sink.put4(out_stk0.offset as u32); - "#, - ), - ); - - // Like spillSib32, but targeting an FPR rather than a GPR. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("fspillSib32", &formats.unary, 6) - .operands_in(vec![fpr]) - .operands_out(vec![stack_fpr32]) - .clobbers_flags(false) - .emit( - r#" - sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); - let base = stk_base(out_stk0.base); - {{PUT_OP}}(bits, rex2(base, in_reg0), sink); - modrm_sib_disp32(in_reg0, sink); - sib_noindex(base, sink); - sink.put4(out_stk0.offset as u32); - "#, - ), - ); - - // Regspill using RSP-relative addressing. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("regspill32", &formats.reg_spill, 6) - .operands_in(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); - let dst = StackRef::sp(dst, &func.stack_slots); - let base = stk_base(dst.base); - {{PUT_OP}}(bits, rex2(base, src), sink); - modrm_sib_disp32(src, sink); - sib_noindex(base, sink); - sink.put4(dst.offset as u32); - "#, - ), - ); - - // Like regspill32, but targeting an FPR rather than a GPR. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("fregspill32", &formats.reg_spill, 6) - .operands_in(vec![fpr]) - .clobbers_flags(false) - .emit( - r#" - sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); - let dst = StackRef::sp(dst, &func.stack_slots); - let base = stk_base(dst.base); - {{PUT_OP}}(bits, rex2(base, src), sink); - modrm_sib_disp32(src, sink); - sib_noindex(base, sink); - sink.put4(dst.offset as u32); - "#, - ), - ); - - // Load recipes. - - { - // Simple loads. - - // A predicate asking if the offset is zero. - let has_no_offset = - InstructionPredicate::new_is_field_equal(&*formats.load, "offset", "0".into()); - - // XX /r load with no offset. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("ld", &formats.load, 1) - .operands_in(vec![gpr]) - .operands_out(vec![gpr]) - .inst_predicate(has_no_offset.clone()) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_or_offset_for_inreg_0") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - if needs_sib_byte(in_reg0) { - modrm_sib(out_reg0, sink); - sib_noindex(in_reg0, sink); - } else if needs_offset(in_reg0) { - modrm_disp8(in_reg0, out_reg0, sink); - sink.put1(0); - } else { - modrm_rm(in_reg0, out_reg0, sink); - } - "#, - ), - ); - - // XX /r float load with no offset. - recipes.add_template_inferred( - EncodingRecipeBuilder::new("fld", &formats.load, 1) - .operands_in(vec![gpr]) - .operands_out(vec![fpr]) - .inst_predicate(has_no_offset) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_or_offset_for_inreg_0") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - if needs_sib_byte(in_reg0) { - modrm_sib(out_reg0, sink); - sib_noindex(in_reg0, sink); - } else if needs_offset(in_reg0) { - modrm_disp8(in_reg0, out_reg0, sink); - sink.put1(0); - } else { - modrm_rm(in_reg0, out_reg0, sink); - } - "#, - ), - "size_plus_maybe_sib_or_offset_for_inreg_0_plus_rex_prefix_for_inreg0_outreg0", - ); - - let has_small_offset = - InstructionPredicate::new_is_signed_int(&*formats.load, "offset", 8, 0); - - // XX /r load with 8-bit offset. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("ldDisp8", &formats.load, 2) - .operands_in(vec![gpr]) - .operands_out(vec![gpr]) - .inst_predicate(has_small_offset.clone()) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_for_inreg_0") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - if needs_sib_byte(in_reg0) { - modrm_sib_disp8(out_reg0, sink); - sib_noindex(in_reg0, sink); - } else { - modrm_disp8(in_reg0, out_reg0, sink); - } - let offset: i32 = offset.into(); - sink.put1(offset as u8); - "#, - ), - ); - - // XX /r float load with 8-bit offset. - recipes.add_template_inferred( - EncodingRecipeBuilder::new("fldDisp8", &formats.load, 2) - .operands_in(vec![gpr]) - .operands_out(vec![fpr]) - .inst_predicate(has_small_offset) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_for_inreg_0") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - if needs_sib_byte(in_reg0) { - modrm_sib_disp8(out_reg0, sink); - sib_noindex(in_reg0, sink); - } else { - modrm_disp8(in_reg0, out_reg0, sink); - } - let offset: i32 = offset.into(); - sink.put1(offset as u8); - "#, - ), - "size_plus_maybe_sib_for_inreg_0_plus_rex_prefix_for_inreg0_outreg0", - ); - - let has_big_offset = - InstructionPredicate::new_is_signed_int(&*formats.load, "offset", 32, 0); - - // XX /r load with 32-bit offset. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("ldDisp32", &formats.load, 5) - .operands_in(vec![gpr]) - .operands_out(vec![gpr]) - .inst_predicate(has_big_offset.clone()) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_for_inreg_0") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - if needs_sib_byte(in_reg0) { - modrm_sib_disp32(out_reg0, sink); - sib_noindex(in_reg0, sink); - } else { - modrm_disp32(in_reg0, out_reg0, sink); - } - let offset: i32 = offset.into(); - sink.put4(offset as u32); - "#, - ), - ); - - // XX /r float load with 32-bit offset. - recipes.add_template_inferred( - EncodingRecipeBuilder::new("fldDisp32", &formats.load, 5) - .operands_in(vec![gpr]) - .operands_out(vec![fpr]) - .inst_predicate(has_big_offset) - .clobbers_flags(false) - .compute_size("size_plus_maybe_sib_for_inreg_0") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - if needs_sib_byte(in_reg0) { - modrm_sib_disp32(out_reg0, sink); - sib_noindex(in_reg0, sink); - } else { - modrm_disp32(in_reg0, out_reg0, sink); - } - let offset: i32 = offset.into(); - sink.put4(offset as u32); - "#, - ), - "size_plus_maybe_sib_for_inreg_0_plus_rex_prefix_for_inreg0_outreg0", - ); - } - - { - // Complex loads. - - // A predicate asking if the offset is zero. - let has_no_offset = - InstructionPredicate::new_is_field_equal(&*formats.load_complex, "offset", "0".into()); - - // XX /r load with index and no offset. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("ldWithIndex", &formats.load_complex, 2) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![gpr]) - .inst_predicate(has_no_offset.clone()) - .clobbers_flags(false) - .compute_size("size_plus_maybe_offset_for_inreg_0") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink); - // The else branch always inserts an SIB byte. - if needs_offset(in_reg0) { - modrm_sib_disp8(out_reg0, sink); - sib(0, in_reg1, in_reg0, sink); - sink.put1(0); - } else { - modrm_sib(out_reg0, sink); - sib(0, in_reg1, in_reg0, sink); - } - "#, - ), - ); - - // XX /r float load with index and no offset. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("fldWithIndex", &formats.load_complex, 2) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![fpr]) - .inst_predicate(has_no_offset) - .clobbers_flags(false) - .compute_size("size_plus_maybe_offset_for_inreg_0") - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink); - // The else branch always inserts an SIB byte. - if needs_offset(in_reg0) { - modrm_sib_disp8(out_reg0, sink); - sib(0, in_reg1, in_reg0, sink); - sink.put1(0); - } else { - modrm_sib(out_reg0, sink); - sib(0, in_reg1, in_reg0, sink); - } - "#, - ), - ); - - let has_small_offset = - InstructionPredicate::new_is_signed_int(&*formats.load_complex, "offset", 8, 0); - - // XX /r load with index and 8-bit offset. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("ldWithIndexDisp8", &formats.load_complex, 3) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![gpr]) - .inst_predicate(has_small_offset.clone()) - .clobbers_flags(false) - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink); - modrm_sib_disp8(out_reg0, sink); - sib(0, in_reg1, in_reg0, sink); - let offset: i32 = offset.into(); - sink.put1(offset as u8); - "#, - ), - ); - - // XX /r float load with 8-bit offset. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("fldWithIndexDisp8", &formats.load_complex, 3) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![fpr]) - .inst_predicate(has_small_offset) - .clobbers_flags(false) - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink); - modrm_sib_disp8(out_reg0, sink); - sib(0, in_reg1, in_reg0, sink); - let offset: i32 = offset.into(); - sink.put1(offset as u8); - "#, - ), - ); - - let has_big_offset = - InstructionPredicate::new_is_signed_int(&*formats.load_complex, "offset", 32, 0); - - // XX /r load with index and 32-bit offset. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("ldWithIndexDisp32", &formats.load_complex, 6) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![gpr]) - .inst_predicate(has_big_offset.clone()) - .clobbers_flags(false) - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink); - modrm_sib_disp32(out_reg0, sink); - sib(0, in_reg1, in_reg0, sink); - let offset: i32 = offset.into(); - sink.put4(offset as u32); - "#, - ), - ); - - // XX /r float load with index and 32-bit offset. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("fldWithIndexDisp32", &formats.load_complex, 6) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![fpr]) - .inst_predicate(has_big_offset) - .clobbers_flags(false) - .emit( - r#" - if !flags.notrap() { - sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); - } - {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink); - modrm_sib_disp32(out_reg0, sink); - sib(0, in_reg1, in_reg0, sink); - let offset: i32 = offset.into(); - sink.put4(offset as u32); - "#, - ), - ); - } - - // Unary fill with SIB and 32-bit displacement. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("fillSib32", &formats.unary, 6) - .operands_in(vec![stack_gpr32]) - .operands_out(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - let base = stk_base(in_stk0.base); - {{PUT_OP}}(bits, rex2(base, out_reg0), sink); - modrm_sib_disp32(out_reg0, sink); - sib_noindex(base, sink); - sink.put4(in_stk0.offset as u32); - "#, - ), - ); - - // Like fillSib32, but targeting an FPR rather than a GPR. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("ffillSib32", &formats.unary, 6) - .operands_in(vec![stack_fpr32]) - .operands_out(vec![fpr]) - .clobbers_flags(false) - .emit( - r#" - let base = stk_base(in_stk0.base); - {{PUT_OP}}(bits, rex2(base, out_reg0), sink); - modrm_sib_disp32(out_reg0, sink); - sib_noindex(base, sink); - sink.put4(in_stk0.offset as u32); - "#, - ), - ); - - // Regfill with RSP-relative 32-bit displacement. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("regfill32", &formats.reg_fill, 6) - .operands_in(vec![stack_gpr32]) - .clobbers_flags(false) - .emit( - r#" - let src = StackRef::sp(src, &func.stack_slots); - let base = stk_base(src.base); - {{PUT_OP}}(bits, rex2(base, dst), sink); - modrm_sib_disp32(dst, sink); - sib_noindex(base, sink); - sink.put4(src.offset as u32); - "#, - ), - ); - - // Like regfill32, but targeting an FPR rather than a GPR. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("fregfill32", &formats.reg_fill, 6) - .operands_in(vec![stack_fpr32]) - .clobbers_flags(false) - .emit( - r#" - let src = StackRef::sp(src, &func.stack_slots); - let base = stk_base(src.base); - {{PUT_OP}}(bits, rex2(base, dst), sink); - modrm_sib_disp32(dst, sink); - sib_noindex(base, sink); - sink.put4(src.offset as u32); - "#, - ), - ); - - // Call/return. - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("call_id", &formats.call, 4).emit( - r#" - sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); - {{PUT_OP}}(bits, BASE_REX, sink); - // The addend adjusts for the difference between the end of the - // instruction and the beginning of the immediate field. - sink.reloc_external(func.srclocs[inst], - Reloc::X86CallPCRel4, - &func.dfg.ext_funcs[func_ref].name, - -4); - sink.put4(0); - sink.add_call_site(opcode, func.srclocs[inst]); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("call_plt_id", &formats.call, 4).emit( - r#" - sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); - {{PUT_OP}}(bits, BASE_REX, sink); - sink.reloc_external(func.srclocs[inst], - Reloc::X86CallPLTRel4, - &func.dfg.ext_funcs[func_ref].name, - -4); - sink.put4(0); - sink.add_call_site(opcode, func.srclocs[inst]); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("call_r", &formats.call_indirect, 1) - .operands_in(vec![gpr]) - .emit( - r#" - sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); - {{PUT_OP}}(bits, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - sink.add_call_site(opcode, func.srclocs[inst]); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("ret", &formats.multiary, 0) - .emit("{{PUT_OP}}(bits, BASE_REX, sink);"), - ); - - // Branches. - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("jmpb", &formats.jump, 1) - .branch_range((1, 8)) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, BASE_REX, sink); - disp1(destination, func, sink); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("jmpd", &formats.jump, 4) - .branch_range((4, 32)) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, BASE_REX, sink); - disp4(destination, func, sink); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("brib", &formats.branch_int, 1) - .operands_in(vec![reg_rflags]) - .branch_range((1, 8)) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits | icc2opc(cond), BASE_REX, sink); - disp1(destination, func, sink); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("brid", &formats.branch_int, 4) - .operands_in(vec![reg_rflags]) - .branch_range((4, 32)) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits | icc2opc(cond), BASE_REX, sink); - disp4(destination, func, sink); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("brfb", &formats.branch_float, 1) - .operands_in(vec![reg_rflags]) - .branch_range((1, 8)) - .clobbers_flags(false) - .inst_predicate(supported_floatccs_predicate( - &supported_floatccs, - &*formats.branch_float, - )) - .emit( - r#" - {{PUT_OP}}(bits | fcc2opc(cond), BASE_REX, sink); - disp1(destination, func, sink); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("brfd", &formats.branch_float, 4) - .operands_in(vec![reg_rflags]) - .branch_range((4, 32)) - .clobbers_flags(false) - .inst_predicate(supported_floatccs_predicate( - &supported_floatccs, - &*formats.branch_float, - )) - .emit( - r#" - {{PUT_OP}}(bits | fcc2opc(cond), BASE_REX, sink); - disp4(destination, func, sink); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("indirect_jmp", &formats.indirect_jump, 1) - .operands_in(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("jt_entry", &formats.branch_table_entry, 2) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![gpr]) - .clobbers_flags(false) - .inst_predicate(valid_scale(&*formats.branch_table_entry)) - .compute_size("size_plus_maybe_offset_for_inreg_1") - .emit( - r#" - {{PUT_OP}}(bits, rex3(in_reg1, out_reg0, in_reg0), sink); - if needs_offset(in_reg1) { - modrm_sib_disp8(out_reg0, sink); - sib(imm.trailing_zeros() as u8, in_reg0, in_reg1, sink); - sink.put1(0); - } else { - modrm_sib(out_reg0, sink); - sib(imm.trailing_zeros() as u8, in_reg0, in_reg1, sink); - } - "#, - ), - ); - - recipes.add_template_inferred( - EncodingRecipeBuilder::new("vconst", &formats.unary_const, 5) - .operands_out(vec![fpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(0, out_reg0), sink); - modrm_riprel(out_reg0, sink); - const_disp4(constant_handle, func, sink); - "#, - ), - "size_with_inferred_rex_for_outreg0", - ); - - recipes.add_template_inferred( - EncodingRecipeBuilder::new("vconst_optimized", &formats.unary_const, 1) - .operands_out(vec![fpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink); - modrm_rr(out_reg0, out_reg0, sink); - "#, - ), - "size_with_inferred_rex_for_outreg0", - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("jt_base", &formats.branch_table_base, 5) - .operands_out(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits, rex2(0, out_reg0), sink); - modrm_riprel(out_reg0, sink); - - // No reloc is needed here as the jump table is emitted directly after - // the function body. - jt_disp4(table, func, sink); - "#, - ), - ); - - // Test flags and set a register. - // - // These setCC instructions only set the low 8 bits, and they can only write ABCD registers - // without a REX prefix. - // - // Other instruction encodings accepting `b1` inputs have the same constraints and only look at - // the low 8 bits of the input register. - - let seti = recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("seti", &formats.int_cond, 1) - .operands_in(vec![reg_rflags]) - .operands_out(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits | icc2opc(cond), rex1(out_reg0), sink); - modrm_r_bits(out_reg0, bits, sink); - "#, - ), - regs, - ) - .rex_kind(RecipePrefixKind::AlwaysEmitRex), - ); - - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("seti_abcd", &formats.int_cond, 1) - .operands_in(vec![reg_rflags]) - .operands_out(vec![abcd]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits | icc2opc(cond), rex1(out_reg0), sink); - modrm_r_bits(out_reg0, bits, sink); - "#, - ), - regs, - ) - .when_prefixed(seti), - ); - - let setf = recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("setf", &formats.float_cond, 1) - .operands_in(vec![reg_rflags]) - .operands_out(vec![gpr]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits | fcc2opc(cond), rex1(out_reg0), sink); - modrm_r_bits(out_reg0, bits, sink); - "#, - ), - regs, - ) - .rex_kind(RecipePrefixKind::AlwaysEmitRex), - ); - - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("setf_abcd", &formats.float_cond, 1) - .operands_in(vec![reg_rflags]) - .operands_out(vec![abcd]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits | fcc2opc(cond), rex1(out_reg0), sink); - modrm_r_bits(out_reg0, bits, sink); - "#, - ), - regs, - ) - .when_prefixed(setf), - ); - - // Conditional move (a.k.a integer select) - // (maybe-REX.W) 0F 4x modrm(r,r) - // 1 byte, modrm(r,r), is after the opcode - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("cmov", &formats.int_select, 1) - .operands_in(vec![ - OperandConstraint::FixedReg(reg_rflags), - OperandConstraint::RegClass(gpr), - OperandConstraint::RegClass(gpr), - ]) - .operands_out(vec![2]) - .clobbers_flags(false) - .emit( - r#" - {{PUT_OP}}(bits | icc2opc(cond), rex2(in_reg1, in_reg2), sink); - modrm_rr(in_reg1, in_reg2, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_cmov"), - ); - - // Bit scan forwards and reverse - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("bsf_and_bsr", &formats.unary, 1) - .operands_in(vec![gpr]) - .operands_out(vec![ - OperandConstraint::RegClass(gpr), - OperandConstraint::FixedReg(reg_rflags), - ]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); - modrm_rr(in_reg0, out_reg0, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_outreg0"), - ); - - // Arithematic with flag I/O. - - // XX /r, MR form. Add two GPR registers and set carry flag. - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("rout", &formats.binary, 1) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![ - OperandConstraint::TiedInput(0), - OperandConstraint::FixedReg(reg_rflags), - ]) - .clobbers_flags(true) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); - modrm_rr(in_reg0, in_reg1, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"), - ); - - // XX /r, MR form. Add two GPR registers and get carry flag. - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("rin", &formats.ternary, 1) - .operands_in(vec![ - OperandConstraint::RegClass(gpr), - OperandConstraint::RegClass(gpr), - OperandConstraint::FixedReg(reg_rflags), - ]) - .operands_out(vec![0]) - .clobbers_flags(true) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); - modrm_rr(in_reg0, in_reg1, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"), - ); - - // XX /r, MR form. Add two GPR registers with carry flag. - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("rio", &formats.ternary, 1) - .operands_in(vec![ - OperandConstraint::RegClass(gpr), - OperandConstraint::RegClass(gpr), - OperandConstraint::FixedReg(reg_rflags), - ]) - .operands_out(vec![ - OperandConstraint::TiedInput(0), - OperandConstraint::FixedReg(reg_rflags), - ]) - .clobbers_flags(true) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); - modrm_rr(in_reg0, in_reg1, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"), - ); - - // Compare and set flags. - - // XX /r, MR form. Compare two GPR registers and set flags. - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("rcmp", &formats.binary, 1) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![reg_rflags]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); - modrm_rr(in_reg0, in_reg1, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"), - ); - - // Same as rcmp, but second operand is the stack pointer. - recipes.add_template_recipe( - EncodingRecipeBuilder::new("rcmp_sp", &formats.unary, 1) - .operands_in(vec![gpr]) - .operands_out(vec![reg_rflags]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg0, RU::rsp.into()), sink); - modrm_rr(in_reg0, RU::rsp.into(), sink); - "#, - ), - ); - - // XX /r, RM form. Compare two FPR registers and set flags. - recipes.add_template_inferred( - EncodingRecipeBuilder::new("fcmp", &formats.binary, 1) - .operands_in(vec![fpr, fpr]) - .operands_out(vec![reg_rflags]) - .emit( - r#" - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - modrm_rr(in_reg1, in_reg0, sink); - "#, - ), - "size_with_inferred_rex_for_inreg0_inreg1", - ); - - { - let has_small_offset = - InstructionPredicate::new_is_signed_int(&*formats.binary_imm64, "imm", 8, 0); - - // XX /n, MI form with imm8. - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("rcmp_ib", &formats.binary_imm64, 2) - .operands_in(vec![gpr]) - .operands_out(vec![reg_rflags]) - .inst_predicate(has_small_offset) - .emit( - r#" - {{PUT_OP}}(bits, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - let imm: i64 = imm.into(); - sink.put1(imm as u8); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), - ); - - let has_big_offset = - InstructionPredicate::new_is_signed_int(&*formats.binary_imm64, "imm", 32, 0); - - // XX /n, MI form with imm32. - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("rcmp_id", &formats.binary_imm64, 5) - .operands_in(vec![gpr]) - .operands_out(vec![reg_rflags]) - .inst_predicate(has_big_offset) - .emit( - r#" - {{PUT_OP}}(bits, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - let imm: i64 = imm.into(); - sink.put4(imm as u32); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), - ); - } - - // Test-and-branch. - // - // This recipe represents the macro fusion of a test and a conditional branch. - // This serves two purposes: - // - // 1. Guarantee that the test and branch get scheduled next to each other so - // macro fusion is guaranteed to be possible. - // 2. Hide the status flags from Cranelift which doesn't currently model flags. - // - // The encoding bits affect both the test and the branch instruction: - // - // Bits 0-7 are the Jcc opcode. - // Bits 8-15 control the test instruction which always has opcode byte 0x85. - - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("tjccb", &formats.branch, 1 + 2) - .operands_in(vec![gpr]) - .branch_range((3, 8)) - .emit( - r#" - // test r, r. - {{PUT_OP}}((bits & 0xff00) | 0x85, rex2(in_reg0, in_reg0), sink); - modrm_rr(in_reg0, in_reg0, sink); - // Jcc instruction. - sink.put1(bits as u8); - disp1(destination, func, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), - ); - - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("tjccd", &formats.branch, 1 + 6) - .operands_in(vec![gpr]) - .branch_range((7, 32)) - .emit( - r#" - // test r, r. - {{PUT_OP}}((bits & 0xff00) | 0x85, rex2(in_reg0, in_reg0), sink); - modrm_rr(in_reg0, in_reg0, sink); - // Jcc instruction. - sink.put1(0x0f); - sink.put1(bits as u8); - disp4(destination, func, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), - ); - - // 8-bit test-and-branch. - - let t8jccb = recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("t8jccb", &formats.branch, 1 + 2) - .operands_in(vec![gpr]) - .branch_range((3, 8)) - .emit( - r#" - // test8 r, r. - {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink); - modrm_rr(in_reg0, in_reg0, sink); - // Jcc instruction. - sink.put1(bits as u8); - disp1(destination, func, sink); - "#, - ), - regs, - ) - .rex_kind(RecipePrefixKind::AlwaysEmitRex), - ); - - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("t8jccb_abcd", &formats.branch, 1 + 2) - .operands_in(vec![abcd]) - .branch_range((3, 8)) - .emit( - r#" - // test8 r, r. - {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink); - modrm_rr(in_reg0, in_reg0, sink); - // Jcc instruction. - sink.put1(bits as u8); - disp1(destination, func, sink); - "#, - ), - regs, - ) - .when_prefixed(t8jccb), - ); - - let t8jccd = recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("t8jccd", &formats.branch, 1 + 6) - .operands_in(vec![gpr]) - .branch_range((7, 32)) - .emit( - r#" - // test8 r, r. - {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink); - modrm_rr(in_reg0, in_reg0, sink); - // Jcc instruction. - sink.put1(0x0f); - sink.put1(bits as u8); - disp4(destination, func, sink); - "#, - ), - regs, - ) - .rex_kind(RecipePrefixKind::AlwaysEmitRex), - ); - - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("t8jccd_abcd", &formats.branch, 1 + 6) - .operands_in(vec![abcd]) - .branch_range((7, 32)) - .emit( - r#" - // test8 r, r. - {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink); - modrm_rr(in_reg0, in_reg0, sink); - // Jcc instruction. - sink.put1(0x0f); - sink.put1(bits as u8); - disp4(destination, func, sink); - "#, - ), - regs, - ) - .when_prefixed(t8jccd), - ); - - // Worst case test-and-branch recipe for brz.b1 and brnz.b1 in 32-bit mode. - // The register allocator can't handle a branch instruction with constrained - // operands like the t8jccd_abcd above. This variant can accept the b1 opernd in - // any register, but is is larger because it uses a 32-bit test instruction with - // a 0xff immediate. - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("t8jccd_long", &formats.branch, 5 + 6) - .operands_in(vec![gpr]) - .branch_range((11, 32)) - .emit( - r#" - // test32 r, 0xff. - {{PUT_OP}}((bits & 0xff00) | 0xf7, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - sink.put4(0xff); - // Jcc instruction. - sink.put1(0x0f); - sink.put1(bits as u8); - disp4(destination, func, sink); - "#, - ), - ); - - // Comparison that produces a `b1` result in a GPR. - // - // This is a macro of a `cmp` instruction followed by a `setCC` instruction. - // - // TODO This is not a great solution because: - // - // - The cmp+setcc combination is not recognized by CPU's macro fusion. - // - The 64-bit encoding has issues with REX prefixes. The `cmp` and `setCC` - // instructions may need a REX independently. - // - Modeling CPU flags in the type system would be better. - // - // Since the `setCC` instructions only write an 8-bit register, we use that as - // our `b1` representation: A `b1` value is represented as a GPR where the low 8 - // bits are known to be 0 or 1. The high bits are undefined. - // - // This bandaid macro doesn't support a REX prefix for the final `setCC` - // instruction, so it is limited to the `ABCD` register class for booleans. - // The omission of a `when_prefixed` alternative is deliberate here. - - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("icscc", &formats.int_compare, 1 + 3) - .operands_in(vec![gpr, gpr]) - .operands_out(vec![abcd]) - .emit( - r#" - // Comparison instruction. - {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); - modrm_rr(in_reg0, in_reg1, sink); - // `setCC` instruction, no REX. - let setcc = 0x90 | icc2opc(cond); - sink.put1(0x0f); - sink.put1(setcc as u8); - modrm_rr(out_reg0, 0, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"), - ); - - recipes.add_template_inferred( - EncodingRecipeBuilder::new("icscc_fpr", &formats.int_compare, 1) - .operands_in(vec![fpr, fpr]) - .operands_out(vec![0]) - .emit( - r#" - // Comparison instruction. - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - modrm_rr(in_reg1, in_reg0, sink); - "#, - ), - "size_with_inferred_rex_for_inreg0_inreg1", - ); - - { - let is_small_imm = - InstructionPredicate::new_is_signed_int(&*formats.int_compare_imm, "imm", 8, 0); - - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("icscc_ib", &formats.int_compare_imm, 2 + 3) - .operands_in(vec![gpr]) - .operands_out(vec![abcd]) - .inst_predicate(is_small_imm) - .emit( - r#" - // Comparison instruction. - {{PUT_OP}}(bits, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - let imm: i64 = imm.into(); - sink.put1(imm as u8); - // `setCC` instruction, no REX. - let setcc = 0x90 | icc2opc(cond); - sink.put1(0x0f); - sink.put1(setcc as u8); - modrm_rr(out_reg0, 0, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), - ); - - let is_big_imm = - InstructionPredicate::new_is_signed_int(&*formats.int_compare_imm, "imm", 32, 0); - - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("icscc_id", &formats.int_compare_imm, 5 + 3) - .operands_in(vec![gpr]) - .operands_out(vec![abcd]) - .inst_predicate(is_big_imm) - .emit( - r#" - // Comparison instruction. - {{PUT_OP}}(bits, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - let imm: i64 = imm.into(); - sink.put4(imm as u32); - // `setCC` instruction, no REX. - let setcc = 0x90 | icc2opc(cond); - sink.put1(0x0f); - sink.put1(setcc as u8); - modrm_rr(out_reg0, 0, sink); - "#, - ), - regs, - ) - .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), - ); - } - - // Make a FloatCompare instruction predicate with the supported condition codes. - // - // Same thing for floating point. - // - // The ucomiss/ucomisd instructions set the FLAGS bits CF/PF/CF like this: - // - // ZPC OSA - // UN 111 000 - // GT 000 000 - // LT 001 000 - // EQ 100 000 - // - // Not all floating point condition codes are supported. - // The omission of a `when_prefixed` alternative is deliberate here. - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("fcscc", &formats.float_compare, 1 + 3) - .operands_in(vec![fpr, fpr]) - .operands_out(vec![abcd]) - .inst_predicate(supported_floatccs_predicate( - &supported_floatccs, - &*formats.float_compare, - )) - .emit( - r#" - // Comparison instruction. - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - modrm_rr(in_reg1, in_reg0, sink); - // `setCC` instruction, no REX. - use crate::ir::condcodes::FloatCC::*; - let setcc = match cond { - Ordered => 0x9b, // EQ|LT|GT => setnp (P=0) - Unordered => 0x9a, // UN => setp (P=1) - OrderedNotEqual => 0x95, // LT|GT => setne (Z=0), - UnorderedOrEqual => 0x94, // UN|EQ => sete (Z=1) - GreaterThan => 0x97, // GT => seta (C=0&Z=0) - GreaterThanOrEqual => 0x93, // GT|EQ => setae (C=0) - UnorderedOrLessThan => 0x92, // UN|LT => setb (C=1) - UnorderedOrLessThanOrEqual => 0x96, // UN|LT|EQ => setbe (Z=1|C=1) - Equal | // EQ - NotEqual | // UN|LT|GT - LessThan | // LT - LessThanOrEqual | // LT|EQ - UnorderedOrGreaterThan | // UN|GT - UnorderedOrGreaterThanOrEqual // UN|GT|EQ - => panic!("{} not supported by fcscc", cond), - }; - sink.put1(0x0f); - sink.put1(setcc); - modrm_rr(out_reg0, 0, sink); - "#, - ), - ); - - { - let supported_floatccs: Vec = ["eq", "lt", "le", "uno", "ne", "uge", "ugt", "ord"] - .iter() - .map(|name| Literal::enumerator_for(floatcc, name)) - .collect(); - recipes.add_template_inferred( - EncodingRecipeBuilder::new("pfcmp", &formats.float_compare, 2) - .operands_in(vec![fpr, fpr]) - .operands_out(vec![0]) - .inst_predicate(supported_floatccs_predicate( - &supported_floatccs[..], - &*formats.float_compare, - )) - .emit( - r#" - // Comparison instruction. - {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); - modrm_rr(in_reg1, in_reg0, sink); - // Add immediate byte indicating what type of comparison. - use crate::ir::condcodes::FloatCC::*; - let imm = match cond { - Equal => 0x00, - LessThan => 0x01, - LessThanOrEqual => 0x02, - Unordered => 0x03, - NotEqual => 0x04, - UnorderedOrGreaterThanOrEqual => 0x05, - UnorderedOrGreaterThan => 0x06, - Ordered => 0x07, - _ => panic!("{} not supported by pfcmp", cond), - }; - sink.put1(imm); - "#, - ), - "size_with_inferred_rex_for_inreg0_inreg1", - ); - } - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("is_zero", &formats.unary, 2 + 2) - .operands_in(vec![gpr]) - .operands_out(vec![abcd]) - .emit( - r#" - // Test instruction. - {{PUT_OP}}(bits, rex2(in_reg0, in_reg0), sink); - modrm_rr(in_reg0, in_reg0, sink); - // Check ZF = 1 flag to see if register holds 0. - sink.put1(0x0f); - sink.put1(0x94); - modrm_rr(out_reg0, 0, sink); - "#, - ), - ); - - recipes.add_template_recipe( - EncodingRecipeBuilder::new("is_invalid", &formats.unary, 2 + 3) - .operands_in(vec![gpr]) - .operands_out(vec![abcd]) - .emit( - r#" - // Comparison instruction. - {{PUT_OP}}(bits, rex1(in_reg0), sink); - modrm_r_bits(in_reg0, bits, sink); - sink.put1(0xff); - // `setCC` instruction, no REX. - use crate::ir::condcodes::IntCC::*; - let setcc = 0x90 | icc2opc(Equal); - sink.put1(0x0f); - sink.put1(setcc as u8); - modrm_rr(out_reg0, 0, sink); - "#, - ), - ); - - recipes.add_recipe( - EncodingRecipeBuilder::new("safepoint", &formats.multiary, 0).emit( - r#" - sink.add_stack_map(args, func, isa); - "#, - ), - ); - - // Both `elf_tls_get_addr` and `macho_tls_get_addr` require all caller-saved registers to be spilled. - // This is currently special cased in `regalloc/spilling.rs` in the `visit_inst` function. - - recipes.add_recipe( - EncodingRecipeBuilder::new("elf_tls_get_addr", &formats.unary_global_value, 16) - // FIXME Correct encoding for non rax registers - .operands_out(vec![reg_rax]) - .emit( - r#" - // output %rax - // clobbers %rdi - - // Those data16 prefixes are necessary to pad to 16 bytes. - - // data16 lea gv@tlsgd(%rip),%rdi - sink.put1(0x66); // data16 - sink.put1(0b01001000); // rex.w - const LEA: u8 = 0x8d; - sink.put1(LEA); // lea - modrm_riprel(0b111/*out_reg0*/, sink); // 0x3d - sink.reloc_external(func.srclocs[inst], - Reloc::ElfX86_64TlsGd, - &func.global_values[global_value].symbol_name(), - -4); - sink.put4(0); - - // data16 data16 callq __tls_get_addr-4 - sink.put1(0x66); // data16 - sink.put1(0x66); // data16 - sink.put1(0b01001000); // rex.w - sink.put1(0xe8); // call - sink.reloc_external(func.srclocs[inst], - Reloc::X86CallPLTRel4, - &ExternalName::LibCall(LibCall::ElfTlsGetAddr), - -4); - sink.put4(0); - "#, - ), - ); - - recipes.add_recipe( - EncodingRecipeBuilder::new("macho_tls_get_addr", &formats.unary_global_value, 9) - // FIXME Correct encoding for non rax registers - .operands_out(vec![reg_rax]) - .emit( - r#" - // output %rax - // clobbers %rdi - - // movq gv@tlv(%rip), %rdi - sink.put1(0x48); // rex - sink.put1(0x8b); // mov - modrm_riprel(0b111/*out_reg0*/, sink); // 0x3d - sink.reloc_external(func.srclocs[inst], - Reloc::MachOX86_64Tlv, - &func.global_values[global_value].symbol_name(), - -4); - sink.put4(0); - - // callq *(%rdi) - sink.put1(0xff); - sink.put1(0x17); - "#, - ), - ); - - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("evex_reg_vvvv_rm_128", &formats.binary, 1) - .operands_in(vec![fpr, fpr]) - .operands_out(vec![fpr]) - .emit( - r#" - // instruction encoding operands: reg (op1, w), vvvv (op2, r), rm (op3, r) - // this maps to: out_reg0, in_reg0, in_reg1 - let context = EvexContext::Other { length: EvexVectorLength::V128 }; - let masking = EvexMasking::None; - put_evex(bits, out_reg0, in_reg0, in_reg1, context, masking, sink); // params: reg, vvvv, rm - modrm_rr(in_reg1, out_reg0, sink); // params: rm, reg - "#, - ), - regs).rex_kind(RecipePrefixKind::Evex) - ); - - recipes.add_template( - Template::new( - EncodingRecipeBuilder::new("evex_reg_rm_128", &formats.unary, 1) - .operands_in(vec![fpr]) - .operands_out(vec![fpr]) - .emit( - r#" - // instruction encoding operands: reg (op1, w), rm (op2, r) - // this maps to: out_reg0, in_reg0 - let context = EvexContext::Other { length: EvexVectorLength::V128 }; - let masking = EvexMasking::None; - put_evex(bits, out_reg0, 0, in_reg0, context, masking, sink); // params: reg, vvvv, rm - modrm_rr(in_reg0, out_reg0, sink); // params: rm, reg - "#, - ), - regs).rex_kind(RecipePrefixKind::Evex) - ); - - recipes -} diff --git a/cranelift/codegen/meta/src/isa/x86/registers.rs b/cranelift/codegen/meta/src/isa/x86/registers.rs deleted file mode 100644 index 85a8965f89..0000000000 --- a/cranelift/codegen/meta/src/isa/x86/registers.rs +++ /dev/null @@ -1,43 +0,0 @@ -use crate::cdsl::regs::{IsaRegs, IsaRegsBuilder, RegBankBuilder, RegClassBuilder}; - -pub(crate) fn define() -> IsaRegs { - let mut regs = IsaRegsBuilder::new(); - - let builder = RegBankBuilder::new("FloatRegs", "xmm") - .units(16) - .track_pressure(true); - let float_regs = regs.add_bank(builder); - - let builder = RegBankBuilder::new("IntRegs", "r") - .units(16) - .names(vec!["rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi"]) - .track_pressure(true) - .pinned_reg(15); - let int_regs = regs.add_bank(builder); - - let builder = RegBankBuilder::new("FlagRegs", "") - .units(1) - .names(vec!["rflags"]) - .track_pressure(false); - let flag_reg = regs.add_bank(builder); - - let builder = RegClassBuilder::new_toplevel("GPR", int_regs); - let gpr = regs.add_class(builder); - - let builder = RegClassBuilder::new_toplevel("FPR", float_regs); - let fpr = regs.add_class(builder); - - let builder = RegClassBuilder::new_toplevel("FLAG", flag_reg); - regs.add_class(builder); - - let builder = RegClassBuilder::subclass_of("GPR8", gpr, 0, 8); - let gpr8 = regs.add_class(builder); - - let builder = RegClassBuilder::subclass_of("ABCD", gpr8, 0, 4); - regs.add_class(builder); - - let builder = RegClassBuilder::subclass_of("FPR8", fpr, 0, 8); - regs.add_class(builder); - - regs.build() -} diff --git a/cranelift/codegen/shared/src/isa/mod.rs b/cranelift/codegen/shared/src/isa/mod.rs deleted file mode 100644 index 4d8e485f6c..0000000000 --- a/cranelift/codegen/shared/src/isa/mod.rs +++ /dev/null @@ -1,3 +0,0 @@ -//! Shared ISA-specific definitions. - -pub mod x86; diff --git a/cranelift/codegen/shared/src/isa/x86/encoding_bits.rs b/cranelift/codegen/shared/src/isa/x86/encoding_bits.rs deleted file mode 100644 index 9edb2a6e6f..0000000000 --- a/cranelift/codegen/shared/src/isa/x86/encoding_bits.rs +++ /dev/null @@ -1,419 +0,0 @@ -//! Provides a named interface to the `u16` Encoding bits. - -use std::ops::RangeInclusive; - -/// Named interface to the `u16` Encoding bits, representing an opcode. -/// -/// Cranelift requires each recipe to have a single encoding size in bytes. -/// X86 opcodes are variable length, so we use separate recipes for different -/// styles of opcodes and prefixes. The opcode format is indicated by the -/// recipe name prefix. -/// -/// VEX/XOP and EVEX prefixes are not yet supported. -/// Encodings using any of these prefixes are represented by separate recipes. -/// -/// The encoding bits are: -/// -/// 0-7: The opcode byte . -/// 8-9: pp, mandatory prefix: -/// 00: none (Op*) -/// 01: 66 (Mp*) -/// 10: F3 (Mp*) -/// 11: F2 (Mp*) -/// 10-11: mm, opcode map: -/// 00: (Op1/Mp1) -/// 01: 0F (Op2/Mp2) -/// 10: 0F 38 (Op3/Mp3) -/// 11: 0F 3A (Op3/Mp3) -/// 12-14 rrr, opcode bits for the ModR/M byte for certain opcodes. -/// 15: REX.W bit (or VEX.W/E) -#[derive(Copy, Clone, PartialEq)] -pub struct EncodingBits(u16); -const OPCODE: RangeInclusive = 0..=7; -const OPCODE_PREFIX: RangeInclusive = 8..=11; // Includes pp and mm. -const RRR: RangeInclusive = 12..=14; -const REX_W: RangeInclusive = 15..=15; - -impl From for EncodingBits { - fn from(bits: u16) -> Self { - Self(bits) - } -} - -impl EncodingBits { - /// Constructs a new EncodingBits from parts. - pub fn new(op_bytes: &[u8], rrr: u16, rex_w: u16) -> Self { - assert!( - !op_bytes.is_empty(), - "op_bytes must include at least one opcode byte" - ); - let mut new = Self::from(0); - let last_byte = op_bytes[op_bytes.len() - 1]; - new.write(OPCODE, last_byte as u16); - let prefix: u8 = OpcodePrefix::from_opcode(op_bytes).into(); - new.write(OPCODE_PREFIX, prefix as u16); - new.write(RRR, rrr); - new.write(REX_W, rex_w); - new - } - - /// Returns a copy of the EncodingBits with the RRR bits set. - #[inline] - pub fn with_rrr(mut self, rrr: u8) -> Self { - debug_assert_eq!(self.rrr(), 0); - self.write(RRR, rrr.into()); - self - } - - /// Returns a copy of the EncodingBits with the REX.W bit set. - #[inline] - pub fn with_rex_w(mut self) -> Self { - debug_assert_eq!(self.rex_w(), 0); - self.write(REX_W, 1); - self - } - - /// Returns the raw bits. - #[inline] - pub fn bits(self) -> u16 { - self.0 - } - - /// Convenience method for writing bits to specific range. - #[inline] - fn write(&mut self, range: RangeInclusive, value: u16) { - assert!(ExactSizeIterator::len(&range) > 0); - let size = range.end() - range.start() + 1; // Calculate the number of bits in the range. - let mask = (1 << size) - 1; // Generate a bit mask. - debug_assert!( - value <= mask, - "The written value should have fewer than {} bits.", - size - ); - let mask_complement = !(mask << *range.start()); // Create the bitwise complement for the clear mask. - self.0 &= mask_complement; // Clear the bits in `range`. - let value = (value & mask) << *range.start(); // Place the value in the correct location. - self.0 |= value; // Modify the bits in `range`. - } - - /// Convenience method for reading bits from a specific range. - #[inline] - fn read(self, range: RangeInclusive) -> u8 { - assert!(ExactSizeIterator::len(&range) > 0); - let size = range.end() - range.start() + 1; // Calculate the number of bits in the range. - debug_assert!(size <= 8, "This structure expects ranges of at most 8 bits"); - let mask = (1 << size) - 1; // Generate a bit mask. - ((self.0 >> *range.start()) & mask) as u8 - } - - /// Instruction opcode byte, without the prefix. - #[inline] - pub fn opcode_byte(self) -> u8 { - self.read(OPCODE) - } - - /// Prefix kind for the instruction, as an enum. - #[inline] - pub fn prefix(self) -> OpcodePrefix { - OpcodePrefix::from(self.read(OPCODE_PREFIX)) - } - - /// Extracts the PP bits of the OpcodePrefix. - #[inline] - pub fn pp(self) -> u8 { - self.prefix().to_primitive() & 0x3 - } - - /// Extracts the MM bits of the OpcodePrefix. - #[inline] - pub fn mm(self) -> u8 { - (self.prefix().to_primitive() >> 2) & 0x3 - } - - /// Bits for the ModR/M byte for certain opcodes. - #[inline] - pub fn rrr(self) -> u8 { - self.read(RRR) - } - - /// REX.W bit (or VEX.W/E). - #[inline] - pub fn rex_w(self) -> u8 { - self.read(REX_W) - } -} - -/// Opcode prefix representation. -/// -/// The prefix type occupies four of the EncodingBits. -#[allow(non_camel_case_types)] -#[allow(missing_docs)] -#[derive(Copy, Clone, Debug, Eq, PartialEq)] -pub enum OpcodePrefix { - Op1, - Mp1_66, - Mp1_f3, - Mp1_f2, - Op2_0f, - Mp2_66_0f, - Mp2_f3_0f, - Mp2_f2_0f, - Op3_0f_38, - Mp3_66_0f_38, - Mp3_f3_0f_38, - Mp3_f2_0f_38, - Op3_0f_3a, - Mp3_66_0f_3a, - Mp3_f3_0f_3a, - Mp3_f2_0f_3a, -} - -impl From for OpcodePrefix { - fn from(n: u8) -> Self { - use OpcodePrefix::*; - match n { - 0b0000 => Op1, - 0b0001 => Mp1_66, - 0b0010 => Mp1_f3, - 0b0011 => Mp1_f2, - 0b0100 => Op2_0f, - 0b0101 => Mp2_66_0f, - 0b0110 => Mp2_f3_0f, - 0b0111 => Mp2_f2_0f, - 0b1000 => Op3_0f_38, - 0b1001 => Mp3_66_0f_38, - 0b1010 => Mp3_f3_0f_38, - 0b1011 => Mp3_f2_0f_38, - 0b1100 => Op3_0f_3a, - 0b1101 => Mp3_66_0f_3a, - 0b1110 => Mp3_f3_0f_3a, - 0b1111 => Mp3_f2_0f_3a, - _ => panic!("invalid opcode prefix"), - } - } -} - -impl Into for OpcodePrefix { - fn into(self) -> u8 { - use OpcodePrefix::*; - match self { - Op1 => 0b0000, - Mp1_66 => 0b0001, - Mp1_f3 => 0b0010, - Mp1_f2 => 0b0011, - Op2_0f => 0b0100, - Mp2_66_0f => 0b0101, - Mp2_f3_0f => 0b0110, - Mp2_f2_0f => 0b0111, - Op3_0f_38 => 0b1000, - Mp3_66_0f_38 => 0b1001, - Mp3_f3_0f_38 => 0b1010, - Mp3_f2_0f_38 => 0b1011, - Op3_0f_3a => 0b1100, - Mp3_66_0f_3a => 0b1101, - Mp3_f3_0f_3a => 0b1110, - Mp3_f2_0f_3a => 0b1111, - } - } -} - -impl OpcodePrefix { - /// Convert an opcode prefix to a `u8`; this is a convenience proxy for `Into`. - fn to_primitive(self) -> u8 { - self.into() - } - - /// Extracts the OpcodePrefix from the opcode. - pub fn from_opcode(op_bytes: &[u8]) -> Self { - assert!(!op_bytes.is_empty(), "at least one opcode byte"); - - let prefix_bytes = &op_bytes[..op_bytes.len() - 1]; - match prefix_bytes { - [] => Self::Op1, - [0x66] => Self::Mp1_66, - [0xf3] => Self::Mp1_f3, - [0xf2] => Self::Mp1_f2, - [0x0f] => Self::Op2_0f, - [0x66, 0x0f] => Self::Mp2_66_0f, - [0xf3, 0x0f] => Self::Mp2_f3_0f, - [0xf2, 0x0f] => Self::Mp2_f2_0f, - [0x0f, 0x38] => Self::Op3_0f_38, - [0x66, 0x0f, 0x38] => Self::Mp3_66_0f_38, - [0xf3, 0x0f, 0x38] => Self::Mp3_f3_0f_38, - [0xf2, 0x0f, 0x38] => Self::Mp3_f2_0f_38, - [0x0f, 0x3a] => Self::Op3_0f_3a, - [0x66, 0x0f, 0x3a] => Self::Mp3_66_0f_3a, - [0xf3, 0x0f, 0x3a] => Self::Mp3_f3_0f_3a, - [0xf2, 0x0f, 0x3a] => Self::Mp3_f2_0f_3a, - _ => { - panic!("unexpected opcode sequence: {:?}", op_bytes); - } - } - } - - /// Returns the recipe name prefix. - /// - /// At the moment, each similar OpcodePrefix group is given its own Recipe. - /// In order to distinguish them, this string is prefixed. - pub fn recipe_name_prefix(self) -> &'static str { - use OpcodePrefix::*; - match self { - Op1 => "Op1", - Op2_0f => "Op2", - Op3_0f_38 | Op3_0f_3a => "Op3", - Mp1_66 | Mp1_f3 | Mp1_f2 => "Mp1", - Mp2_66_0f | Mp2_f3_0f | Mp2_f2_0f => "Mp2", - Mp3_66_0f_38 | Mp3_f3_0f_38 | Mp3_f2_0f_38 => "Mp3", - Mp3_66_0f_3a | Mp3_f3_0f_3a | Mp3_f2_0f_3a => "Mp3", - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - /// Helper function for prefix_roundtrip() to avoid long lines. - fn test_roundtrip(p: OpcodePrefix) { - assert_eq!(p, OpcodePrefix::from(p.to_primitive())); - } - - /// Tests that to/from each opcode matches. - #[test] - fn prefix_roundtrip() { - test_roundtrip(OpcodePrefix::Op1); - test_roundtrip(OpcodePrefix::Mp1_66); - test_roundtrip(OpcodePrefix::Mp1_f3); - test_roundtrip(OpcodePrefix::Mp1_f2); - test_roundtrip(OpcodePrefix::Op2_0f); - test_roundtrip(OpcodePrefix::Mp2_66_0f); - test_roundtrip(OpcodePrefix::Mp2_f3_0f); - test_roundtrip(OpcodePrefix::Mp2_f2_0f); - test_roundtrip(OpcodePrefix::Op3_0f_38); - test_roundtrip(OpcodePrefix::Mp3_66_0f_38); - test_roundtrip(OpcodePrefix::Mp3_f3_0f_38); - test_roundtrip(OpcodePrefix::Mp3_f2_0f_38); - test_roundtrip(OpcodePrefix::Op3_0f_3a); - test_roundtrip(OpcodePrefix::Mp3_66_0f_3a); - test_roundtrip(OpcodePrefix::Mp3_f3_0f_3a); - test_roundtrip(OpcodePrefix::Mp3_f2_0f_3a); - } - - #[test] - fn prefix_to_name() { - assert_eq!(OpcodePrefix::Op1.recipe_name_prefix(), "Op1"); - assert_eq!(OpcodePrefix::Op2_0f.recipe_name_prefix(), "Op2"); - assert_eq!(OpcodePrefix::Op3_0f_38.recipe_name_prefix(), "Op3"); - assert_eq!(OpcodePrefix::Mp1_66.recipe_name_prefix(), "Mp1"); - assert_eq!(OpcodePrefix::Mp2_66_0f.recipe_name_prefix(), "Mp2"); - assert_eq!(OpcodePrefix::Mp3_66_0f_3a.recipe_name_prefix(), "Mp3"); - } - - /// Tests that the opcode_byte is the lower of the EncodingBits. - #[test] - fn encodingbits_opcode_byte() { - let enc = EncodingBits::from(0x00ff); - assert_eq!(enc.opcode_byte(), 0xff); - assert_eq!(enc.prefix().to_primitive(), 0x0); - assert_eq!(enc.rrr(), 0x0); - assert_eq!(enc.rex_w(), 0x0); - - let enc = EncodingBits::from(0x00cd); - assert_eq!(enc.opcode_byte(), 0xcd); - } - - /// Tests that the OpcodePrefix is encoded correctly. - #[test] - fn encodingbits_prefix() { - let enc = EncodingBits::from(0x0c00); - assert_eq!(enc.opcode_byte(), 0x00); - assert_eq!(enc.prefix().to_primitive(), 0xc); - assert_eq!(enc.prefix(), OpcodePrefix::Op3_0f_3a); - assert_eq!(enc.rrr(), 0x0); - assert_eq!(enc.rex_w(), 0x0); - } - - /// Tests that the PP bits are encoded correctly. - #[test] - fn encodingbits_pp() { - let enc = EncodingBits::from(0x0300); - assert_eq!(enc.opcode_byte(), 0x0); - assert_eq!(enc.pp(), 0x3); - assert_eq!(enc.mm(), 0x0); - assert_eq!(enc.rrr(), 0x0); - assert_eq!(enc.rex_w(), 0x0); - } - - /// Tests that the MM bits are encoded correctly. - #[test] - fn encodingbits_mm() { - let enc = EncodingBits::from(0x0c00); - assert_eq!(enc.opcode_byte(), 0x0); - assert_eq!(enc.pp(), 0x00); - assert_eq!(enc.mm(), 0x3); - assert_eq!(enc.rrr(), 0x0); - assert_eq!(enc.rex_w(), 0x0); - } - - /// Tests that the ModR/M bits are encoded correctly. - #[test] - fn encodingbits_rrr() { - let enc = EncodingBits::from(0x5000); - assert_eq!(enc.opcode_byte(), 0x0); - assert_eq!(enc.prefix().to_primitive(), 0x0); - assert_eq!(enc.rrr(), 0x5); - assert_eq!(enc.rex_w(), 0x0); - } - - /// Tests that the REX.W bit is encoded correctly. - #[test] - fn encodingbits_rex_w() { - let enc = EncodingBits::from(0x8000); - assert_eq!(enc.opcode_byte(), 0x00); - assert_eq!(enc.prefix().to_primitive(), 0x0); - assert_eq!(enc.rrr(), 0x0); - assert_eq!(enc.rex_w(), 0x1); - } - - /// Tests setting and unsetting a bit using EncodingBits::write. - #[test] - fn encodingbits_flip() { - let mut bits = EncodingBits::from(0); - let range = 2..=2; - - bits.write(range.clone(), 1); - assert_eq!(bits.bits(), 0b100); - - bits.write(range, 0); - assert_eq!(bits.bits(), 0b000); - } - - /// Tests a round-trip of EncodingBits from/to a u16 (hardcoded endianness). - #[test] - fn encodingbits_roundtrip() { - let bits: u16 = 0x1234; - assert_eq!(EncodingBits::from(bits).bits(), bits); - } - - #[test] - // I purposely want to divide the bits using the ranges defined above. - #[allow(clippy::inconsistent_digit_grouping)] - fn encodingbits_construction() { - assert_eq!( - EncodingBits::new(&[0x66, 0x40], 5, 1).bits(), - 0b1_101_0001_01000000 // 1 = rex_w, 101 = rrr, 0001 = prefix, 01000000 = opcode - ); - } - - #[test] - #[should_panic] - fn encodingbits_panics_at_write_to_invalid_range() { - EncodingBits::from(0).write(1..=0, 42); - } - - #[test] - #[should_panic] - fn encodingbits_panics_at_read_to_invalid_range() { - EncodingBits::from(0).read(1..=0); - } -} diff --git a/cranelift/codegen/shared/src/isa/x86/mod.rs b/cranelift/codegen/shared/src/isa/x86/mod.rs deleted file mode 100644 index fb45ae56c3..0000000000 --- a/cranelift/codegen/shared/src/isa/x86/mod.rs +++ /dev/null @@ -1,4 +0,0 @@ -//! Shared x86-specific definitions. - -mod encoding_bits; -pub use encoding_bits::*; diff --git a/cranelift/codegen/shared/src/lib.rs b/cranelift/codegen/shared/src/lib.rs index 9b4cb941ed..c031ee7440 100644 --- a/cranelift/codegen/shared/src/lib.rs +++ b/cranelift/codegen/shared/src/lib.rs @@ -22,7 +22,6 @@ pub mod condcodes; pub mod constant_hash; pub mod constants; -pub mod isa; /// Version number of this crate. pub const VERSION: &str = env!("CARGO_PKG_VERSION"); diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 6a4e18cbe3..18004b5c03 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -3565,45 +3565,6 @@ pub(crate) fn lower_insn_to_regs>( panic!("ALU+imm and ALU+carry ops should not appear here!"); } - #[cfg(feature = "x86")] - Opcode::X86Udivmodx - | Opcode::X86Sdivmodx - | Opcode::X86Umulx - | Opcode::X86Smulx - | Opcode::X86Cvtt2si - | Opcode::X86Fmin - | Opcode::X86Fmax - | Opcode::X86Push - | Opcode::X86Pop - | Opcode::X86Bsr - | Opcode::X86Bsf - | Opcode::X86Pblendw - | Opcode::X86Pshufd - | Opcode::X86Pshufb - | Opcode::X86Pextr - | Opcode::X86Pinsr - | Opcode::X86Insertps - | Opcode::X86Movsd - | Opcode::X86Movlhps - | Opcode::X86Palignr - | Opcode::X86Psll - | Opcode::X86Psrl - | Opcode::X86Psra - | Opcode::X86Ptest - | Opcode::X86Pmaxs - | Opcode::X86Pmaxu - | Opcode::X86Pmins - | Opcode::X86Pminu - | Opcode::X86Pmullq - | Opcode::X86Pmuludq - | Opcode::X86Punpckh - | Opcode::X86Punpckl - | Opcode::X86Vcvtudq2ps - | Opcode::X86ElfTlsGetAddr - | Opcode::X86MachoTlsGetAddr => { - panic!("x86-specific opcode in supposedly arch-neutral IR!"); - } - Opcode::DummySargT => unreachable!(), Opcode::Iabs => { diff --git a/cranelift/codegen/src/isa/legacy/mod.rs b/cranelift/codegen/src/isa/legacy/mod.rs index a89230f941..15900b9509 100644 --- a/cranelift/codegen/src/isa/legacy/mod.rs +++ b/cranelift/codegen/src/isa/legacy/mod.rs @@ -1,12 +1,4 @@ //! Legacy ("old-style") backends that will be removed in the future. -// N.B.: the old x86-64 backend (`x86`) and the new one (`x64`) are both -// included whenever building with x86 support. The new backend is the default, -// but the old can be requested with `BackendVariant::Legacy`. However, if this -// crate is built with the `old-x86-backend` feature, then the old backend is -// default instead. -#[cfg(feature = "x86")] -pub(crate) mod x86; - #[cfg(feature = "riscv")] pub(crate) mod riscv; diff --git a/cranelift/codegen/src/isa/legacy/x86/abi.rs b/cranelift/codegen/src/isa/legacy/x86/abi.rs deleted file mode 100644 index 934cfec4dd..0000000000 --- a/cranelift/codegen/src/isa/legacy/x86/abi.rs +++ /dev/null @@ -1,1102 +0,0 @@ -//! x86 ABI implementation. - -use super::super::super::settings as shared_settings; -use super::registers::{FPR, GPR, RU}; -use super::settings as isa_settings; -use crate::abi::{legalize_args, ArgAction, ArgAssigner, ValueConversion}; -use crate::cursor::{Cursor, CursorPosition, EncCursor}; -use crate::ir; -use crate::ir::immediates::Imm64; -use crate::ir::stackslot::{StackOffset, StackSize}; -use crate::ir::types; -use crate::ir::{ - get_probestack_funcref, AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose, InstBuilder, - ValueLoc, -}; -use crate::isa::{CallConv, RegClass, RegUnit, TargetIsa}; -use crate::regalloc::RegisterSet; -use crate::result::CodegenResult; -use crate::stack_layout::layout_stack; -use alloc::borrow::Cow; -use core::i32; -use target_lexicon::{PointerWidth, Triple}; - -/// Argument registers for x86-64 -static ARG_GPRS: [RU; 6] = [RU::rdi, RU::rsi, RU::rdx, RU::rcx, RU::r8, RU::r9]; - -/// Return value registers. -static RET_GPRS: [RU; 3] = [RU::rax, RU::rdx, RU::rcx]; - -/// Argument registers for x86-64, when using windows fastcall -static ARG_GPRS_WIN_FASTCALL_X64: [RU; 4] = [RU::rcx, RU::rdx, RU::r8, RU::r9]; - -/// Return value registers for x86-64, when using windows fastcall -static RET_GPRS_WIN_FASTCALL_X64: [RU; 1] = [RU::rax]; - -/// The win64 fastcall ABI uses some shadow stack space, allocated by the caller, that can be used -/// by the callee for temporary values. -/// -/// [1] "Space is allocated on the call stack as a shadow store for callees to save" This shadow -/// store contains the parameters which are passed through registers (ARG_GPRS) and is eventually -/// used by the callee to save & restore the values of the arguments. -/// -/// [2] https://blogs.msdn.microsoft.com/oldnewthing/20110302-00/?p=11333 "Although the x64 calling -/// convention reserves spill space for parameters, you don’t have to use them as such" -const WIN_SHADOW_STACK_SPACE: StackSize = 32; - -/// Stack alignment requirement for functions. -/// -/// 16 bytes is the perfect stack alignment, because: -/// -/// - On Win64, "The primary exceptions are the stack pointer and malloc or alloca memory, which -/// are aligned to 16 bytes in order to aid performance". -/// - The original 32-bit x86 ELF ABI had a 4-byte aligned stack pointer, but newer versions use a -/// 16-byte aligned stack pointer. -/// - This allows using aligned loads and stores on SIMD vectors of 16 bytes that are located -/// higher up in the stack. -const STACK_ALIGNMENT: u32 = 16; - -#[derive(Clone)] -struct Args { - pointer_bytes: u8, - pointer_bits: u8, - pointer_type: ir::Type, - gpr: &'static [RU], - gpr_used: usize, - fpr_limit: usize, - fpr_used: usize, - offset: u32, - call_conv: CallConv, - shared_flags: shared_settings::Flags, - #[allow(dead_code)] - isa_flags: isa_settings::Flags, - assigning_returns: bool, -} - -impl Args { - fn new( - bits: u8, - gpr: &'static [RU], - fpr_limit: usize, - call_conv: CallConv, - shared_flags: &shared_settings::Flags, - isa_flags: &isa_settings::Flags, - assigning_returns: bool, - ) -> Self { - let offset = if call_conv.extends_windows_fastcall() { - WIN_SHADOW_STACK_SPACE - } else { - 0 - }; - - Self { - pointer_bytes: bits / 8, - pointer_bits: bits, - pointer_type: ir::Type::int(u16::from(bits)).unwrap(), - gpr, - gpr_used: 0, - fpr_limit, - fpr_used: 0, - offset, - call_conv, - shared_flags: shared_flags.clone(), - isa_flags: isa_flags.clone(), - assigning_returns, - } - } -} - -impl ArgAssigner for Args { - fn assign(&mut self, arg: &AbiParam) -> ArgAction { - if let ArgumentPurpose::StructArgument(size) = arg.purpose { - if self.call_conv != CallConv::SystemV { - panic!( - "The sarg argument purpose is not yet implemented for non-systemv call conv {:?}", - self.call_conv, - ); - } - let loc = ArgumentLoc::Stack(self.offset as i32); - self.offset += size; - debug_assert!(self.offset <= i32::MAX as u32); - return ArgAction::AssignAndChangeType(loc, types::SARG_T); - } - - let ty = arg.value_type; - - if ty.bits() > u16::from(self.pointer_bits) { - if !self.assigning_returns && self.call_conv.extends_windows_fastcall() { - // "Any argument that doesn't fit in 8 bytes, or isn't - // 1, 2, 4, or 8 bytes, must be passed by reference" - return ValueConversion::Pointer(self.pointer_type).into(); - } else if !ty.is_vector() && !ty.is_float() { - // On SystemV large integers and booleans are broken down to fit in a register. - return ValueConversion::IntSplit.into(); - } - } - - // Vectors should stay in vector registers unless SIMD is not enabled--then they are split - if ty.is_vector() { - if self.shared_flags.enable_simd() { - let reg = FPR.unit(self.fpr_used); - self.fpr_used += 1; - return ArgumentLoc::Reg(reg).into(); - } - return ValueConversion::VectorSplit.into(); - } - - // Small integers are extended to the size of a pointer register, but - // only in ABIs that require this. The Baldrdash (SpiderMonkey) ABI - // does, but our other supported ABIs on x86 do not. - if ty.is_int() - && ty.bits() < u16::from(self.pointer_bits) - && self.call_conv.extends_baldrdash() - { - match arg.extension { - ArgumentExtension::None => {} - ArgumentExtension::Uext => return ValueConversion::Uext(self.pointer_type).into(), - ArgumentExtension::Sext => return ValueConversion::Sext(self.pointer_type).into(), - } - } - - // Handle special-purpose arguments. - if ty.is_int() && self.call_conv.extends_baldrdash() { - match arg.purpose { - // This is SpiderMonkey's `WasmTlsReg`. - ArgumentPurpose::VMContext => { - return ArgumentLoc::Reg(if self.pointer_bits == 64 { - RU::r14 - } else { - RU::rsi - } as RegUnit) - .into(); - } - // This is SpiderMonkey's `WasmTableCallSigReg`. - ArgumentPurpose::SignatureId => { - return ArgumentLoc::Reg(if self.pointer_bits == 64 { - RU::r10 - } else { - RU::rcx - } as RegUnit) - .into() - } - _ => {} - } - } - - // Try to use a GPR. - if !ty.is_float() && self.gpr_used < self.gpr.len() { - let reg = self.gpr[self.gpr_used] as RegUnit; - self.gpr_used += 1; - return ArgumentLoc::Reg(reg).into(); - } - - // Try to use an FPR. - let fpr_offset = if self.call_conv.extends_windows_fastcall() { - // Float and general registers on windows share the same parameter index. - // The used register depends entirely on the parameter index: Even if XMM0 - // is not used for the first parameter, it cannot be used for the second parameter. - debug_assert_eq!(self.fpr_limit, self.gpr.len()); - &mut self.gpr_used - } else { - &mut self.fpr_used - }; - - if ty.is_float() && *fpr_offset < self.fpr_limit { - let reg = FPR.unit(*fpr_offset); - *fpr_offset += 1; - return ArgumentLoc::Reg(reg).into(); - } - - // Assign a stack location. - let loc = ArgumentLoc::Stack(self.offset as i32); - self.offset += u32::from(self.pointer_bytes); - debug_assert!(self.offset <= i32::MAX as u32); - loc.into() - } -} - -/// Legalize `sig`. -pub fn legalize_signature( - sig: &mut Cow, - triple: &Triple, - _current: bool, - shared_flags: &shared_settings::Flags, - isa_flags: &isa_settings::Flags, -) { - let bits; - let mut args; - - match triple.pointer_width().unwrap() { - PointerWidth::U16 => panic!(), - PointerWidth::U32 => { - bits = 32; - args = Args::new(bits, &[], 0, sig.call_conv, shared_flags, isa_flags, false); - } - PointerWidth::U64 => { - bits = 64; - args = if sig.call_conv.extends_windows_fastcall() { - Args::new( - bits, - &ARG_GPRS_WIN_FASTCALL_X64[..], - 4, - sig.call_conv, - shared_flags, - isa_flags, - false, - ) - } else { - Args::new( - bits, - &ARG_GPRS[..], - 8, - sig.call_conv, - shared_flags, - isa_flags, - false, - ) - }; - } - } - - let (ret_regs, ret_fpr_limit) = if sig.call_conv.extends_windows_fastcall() { - // windows-x64 calling convention only uses XMM0 or RAX for return values - (&RET_GPRS_WIN_FASTCALL_X64[..], 1) - } else { - (&RET_GPRS[..], 2) - }; - - let mut rets = Args::new( - bits, - ret_regs, - ret_fpr_limit, - sig.call_conv, - shared_flags, - isa_flags, - true, - ); - - // If we don't have enough available return registers - // to fit all of the return values, we need to backtrack and start - // assigning locations all over again with a different strategy. In order to - // do that, we need a copy of the original assigner for the returns. - let mut backup_rets = rets.clone(); - - if let Some(new_returns) = legalize_args(&sig.returns, &mut rets) { - if new_returns - .iter() - .filter(|r| r.purpose == ArgumentPurpose::Normal) - .any(|r| !r.location.is_reg()) - { - // The return values couldn't all fit into available return - // registers. Introduce the use of a struct-return parameter. - debug_assert!(!sig.uses_struct_return_param()); - - // We're using the first register for the return pointer parameter. - let mut ret_ptr_param = AbiParam { - value_type: args.pointer_type, - purpose: ArgumentPurpose::StructReturn, - extension: ArgumentExtension::None, - location: ArgumentLoc::Unassigned, - legalized_to_pointer: false, - }; - match args.assign(&ret_ptr_param) { - ArgAction::Assign(ArgumentLoc::Reg(reg)) => { - ret_ptr_param.location = ArgumentLoc::Reg(reg); - sig.to_mut().params.push(ret_ptr_param); - } - _ => unreachable!("return pointer should always get a register assignment"), - } - - // We're using the first return register for the return pointer (like - // sys v does). - let mut ret_ptr_return = AbiParam { - value_type: args.pointer_type, - purpose: ArgumentPurpose::StructReturn, - extension: ArgumentExtension::None, - location: ArgumentLoc::Unassigned, - legalized_to_pointer: false, - }; - match backup_rets.assign(&ret_ptr_return) { - ArgAction::Assign(ArgumentLoc::Reg(reg)) => { - ret_ptr_return.location = ArgumentLoc::Reg(reg); - sig.to_mut().returns.push(ret_ptr_return); - } - _ => unreachable!("return pointer should always get a register assignment"), - } - - sig.to_mut().returns.retain(|ret| { - // Either this is the return pointer, in which case we want to keep - // it, or else assume that it is assigned for a reason and doesn't - // conflict with our return pointering legalization. - debug_assert_eq!( - ret.location.is_assigned(), - ret.purpose != ArgumentPurpose::Normal - ); - ret.location.is_assigned() - }); - - if let Some(new_returns) = legalize_args(&sig.returns, &mut backup_rets) { - sig.to_mut().returns = new_returns; - } - } else { - sig.to_mut().returns = new_returns; - } - } - - if let Some(new_params) = legalize_args(&sig.params, &mut args) { - sig.to_mut().params = new_params; - } -} - -/// Get register class for a type appearing in a legalized signature. -pub fn regclass_for_abi_type(ty: ir::Type) -> RegClass { - if ty.is_int() || ty.is_bool() || ty.is_ref() { - GPR - } else { - FPR - } -} - -/// Get the set of allocatable registers for `func`. -pub fn allocatable_registers(triple: &Triple, flags: &shared_settings::Flags) -> RegisterSet { - let mut regs = RegisterSet::new(); - regs.take(GPR, RU::rsp as RegUnit); - regs.take(GPR, RU::rbp as RegUnit); - - // 32-bit arch only has 8 registers. - if triple.pointer_width().unwrap() != PointerWidth::U64 { - for i in 8..16 { - regs.take(GPR, GPR.unit(i)); - regs.take(FPR, FPR.unit(i)); - } - if flags.enable_pinned_reg() { - unimplemented!("Pinned register not implemented on x86-32."); - } - } else { - // Choose r15 as the pinned register on 64-bits: it is non-volatile on native ABIs and - // isn't the fixed output register of any instruction. - if flags.enable_pinned_reg() { - regs.take(GPR, RU::r15 as RegUnit); - } - } - - regs -} - -/// Get the set of callee-saved general-purpose registers. -fn callee_saved_gprs(isa: &dyn TargetIsa, call_conv: CallConv) -> &'static [RU] { - match isa.triple().pointer_width().unwrap() { - PointerWidth::U16 => panic!(), - PointerWidth::U32 => &[RU::rbx, RU::rsi, RU::rdi], - PointerWidth::U64 => { - if call_conv.extends_windows_fastcall() { - // "registers RBX, RBP, RDI, RSI, RSP, R12, R13, R14, R15, and XMM6-15 are - // considered nonvolatile and must be saved and restored by a function that uses - // them." - // as per https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention - // RSP & RBP are not listed below, since they are restored automatically during - // a function call. If that wasn't the case, function calls (RET) would not work. - &[ - RU::rbx, - RU::rdi, - RU::rsi, - RU::r12, - RU::r13, - RU::r14, - RU::r15, - ] - } else { - &[RU::rbx, RU::r12, RU::r13, RU::r14, RU::r15] - } - } - } -} - -/// Get the set of callee-saved floating-point (SIMD) registers. -fn callee_saved_fprs(isa: &dyn TargetIsa, call_conv: CallConv) -> &'static [RU] { - match isa.triple().pointer_width().unwrap() { - PointerWidth::U16 => panic!(), - PointerWidth::U32 => &[], - PointerWidth::U64 => { - if call_conv.extends_windows_fastcall() { - // "registers RBX, ... , and XMM6-15 are considered nonvolatile and must be saved - // and restored by a function that uses them." - // as per https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention as of - // February 5th, 2020. - &[ - RU::xmm6, - RU::xmm7, - RU::xmm8, - RU::xmm9, - RU::xmm10, - RU::xmm11, - RU::xmm12, - RU::xmm13, - RU::xmm14, - RU::xmm15, - ] - } else { - &[] - } - } - } -} - -/// Get the set of callee-saved registers that are used. -fn callee_saved_regs_used(isa: &dyn TargetIsa, func: &ir::Function) -> RegisterSet { - let mut all_callee_saved = RegisterSet::empty(); - for reg in callee_saved_gprs(isa, func.signature.call_conv) { - all_callee_saved.free(GPR, *reg as RegUnit); - } - for reg in callee_saved_fprs(isa, func.signature.call_conv) { - all_callee_saved.free(FPR, *reg as RegUnit); - } - - let mut used = RegisterSet::empty(); - for value_loc in func.locations.values() { - // Note that `value_loc` here contains only a single unit of a potentially multi-unit - // register. We don't use registers that overlap each other in the x86 ISA, but in others - // we do. So this should not be blindly reused. - if let ValueLoc::Reg(ru) = *value_loc { - if GPR.contains(ru) { - if !used.is_avail(GPR, ru) { - used.free(GPR, ru); - } - } else if FPR.contains(ru) { - if !used.is_avail(FPR, ru) { - used.free(FPR, ru); - } - } - } - } - - // regmove and regfill instructions may temporarily divert values into other registers, - // and these are not reflected in `func.locations`. Scan the function for such instructions - // and note which callee-saved registers they use. - // - // TODO: Consider re-evaluating how regmove/regfill/regspill work and whether it's possible - // to avoid this step. - for block in &func.layout { - for inst in func.layout.block_insts(block) { - match func.dfg[inst] { - ir::instructions::InstructionData::RegMove { dst, .. } - | ir::instructions::InstructionData::RegFill { dst, .. } => { - if GPR.contains(dst) { - if !used.is_avail(GPR, dst) { - used.free(GPR, dst); - } - } else if FPR.contains(dst) { - if !used.is_avail(FPR, dst) { - used.free(FPR, dst); - } - } - } - _ => (), - } - } - } - - used.intersect(&all_callee_saved); - used -} - -pub fn prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> CodegenResult<()> { - match func.signature.call_conv { - // For now, just translate fast and cold as system_v. - CallConv::Fast | CallConv::Cold | CallConv::SystemV | CallConv::WasmtimeSystemV => { - system_v_prologue_epilogue(func, isa) - } - CallConv::WindowsFastcall | CallConv::WasmtimeFastcall => { - fastcall_prologue_epilogue(func, isa) - } - CallConv::BaldrdashSystemV | CallConv::BaldrdashWindows => { - baldrdash_prologue_epilogue(func, isa) - } - CallConv::Probestack => unimplemented!("probestack calling convention"), - CallConv::Baldrdash2020 => unimplemented!("Baldrdash ABI 2020"), - CallConv::AppleAarch64 | CallConv::WasmtimeAppleAarch64 => unreachable!(), - } -} - -fn baldrdash_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> CodegenResult<()> { - debug_assert!( - !isa.flags().enable_probestack(), - "baldrdash does not expect cranelift to emit stack probes" - ); - - let word_size = StackSize::from(isa.pointer_bytes()); - let shadow_store_size = if func.signature.call_conv.extends_windows_fastcall() { - WIN_SHADOW_STACK_SPACE - } else { - 0 - }; - - let bytes = - StackSize::from(isa.flags().baldrdash_prologue_words()) * word_size + shadow_store_size; - - let mut ss = ir::StackSlotData::new(ir::StackSlotKind::IncomingArg, bytes); - ss.offset = Some(-(bytes as StackOffset)); - func.stack_slots.push(ss); - - let is_leaf = func.is_leaf(); - layout_stack(&mut func.stack_slots, is_leaf, STACK_ALIGNMENT)?; - Ok(()) -} - -/// Implementation of the fastcall-based Win64 calling convention described at [1] -/// [1] https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention -fn fastcall_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> CodegenResult<()> { - if isa.triple().pointer_width().unwrap() != PointerWidth::U64 { - panic!("TODO: windows-fastcall: x86-32 not implemented yet"); - } - - // The reserved stack area is composed of: - // return address + frame pointer + all callee-saved registers - // - // Pushing the return address is an implicit function of the `call` - // instruction. Each of the others we will then push explicitly. Then we - // will adjust the stack pointer to make room for the rest of the required - // space for this frame. - let csrs = callee_saved_regs_used(isa, func); - let gpsr_stack_size = ((csrs.iter(GPR).len() + 2) * isa.pointer_bytes() as usize) as u32; - let fpsr_stack_size = (csrs.iter(FPR).len() * types::F64X2.bytes() as usize) as u32; - let mut csr_stack_size = gpsr_stack_size + fpsr_stack_size; - - // FPRs must be saved with 16-byte alignment; because they follow the GPRs on the stack, align if needed - if fpsr_stack_size > 0 { - csr_stack_size = (csr_stack_size + 15) & !15; - } - - func.create_stack_slot(ir::StackSlotData { - kind: ir::StackSlotKind::IncomingArg, - size: csr_stack_size, - offset: Some(-(csr_stack_size as StackOffset)), - }); - - let is_leaf = func.is_leaf(); - - // If not a leaf function, allocate an explicit stack slot at the end of the space for the callee's shadow space - if !is_leaf { - // TODO: eventually use the caller-provided shadow store as spill slot space when laying out the stack - func.create_stack_slot(ir::StackSlotData { - kind: ir::StackSlotKind::ExplicitSlot, - size: WIN_SHADOW_STACK_SPACE, - offset: None, - }); - } - - let total_stack_size = layout_stack(&mut func.stack_slots, is_leaf, STACK_ALIGNMENT)? as i32; - - // Subtract the GPR saved register size from the local size because pushes are used for the saves - let local_stack_size = i64::from(total_stack_size - gpsr_stack_size as i32); - - // Add CSRs to function signature - let reg_type = isa.pointer_type(); - let sp_arg_index = if fpsr_stack_size > 0 { - let sp_arg = ir::AbiParam::special_reg( - reg_type, - ir::ArgumentPurpose::CalleeSaved, - RU::rsp as RegUnit, - ); - let index = func.signature.params.len(); - func.signature.params.push(sp_arg); - Some(index) - } else { - None - }; - let fp_arg = ir::AbiParam::special_reg( - reg_type, - ir::ArgumentPurpose::FramePointer, - RU::rbp as RegUnit, - ); - func.signature.params.push(fp_arg); - func.signature.returns.push(fp_arg); - - for gp_csr in csrs.iter(GPR) { - let csr_arg = ir::AbiParam::special_reg(reg_type, ir::ArgumentPurpose::CalleeSaved, gp_csr); - func.signature.params.push(csr_arg); - func.signature.returns.push(csr_arg); - } - - for fp_csr in csrs.iter(FPR) { - // The calling convention described in - // https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention only requires - // preserving the low 128 bits of XMM6-XMM15. - let csr_arg = - ir::AbiParam::special_reg(types::F64X2, ir::ArgumentPurpose::CalleeSaved, fp_csr); - func.signature.params.push(csr_arg); - func.signature.returns.push(csr_arg); - } - - // Set up the cursor and insert the prologue - let entry_block = func.layout.entry_block().expect("missing entry block"); - let mut pos = EncCursor::new(func, isa).at_first_insertion_point(entry_block); - insert_common_prologue( - &mut pos, - local_stack_size, - reg_type, - &csrs, - sp_arg_index.is_some(), - isa, - ); - - // Reset the cursor and insert the epilogue - let mut pos = pos.at_position(CursorPosition::Nowhere); - insert_common_epilogues(&mut pos, local_stack_size, reg_type, &csrs, sp_arg_index); - - Ok(()) -} - -/// Insert a System V-compatible prologue and epilogue. -fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> CodegenResult<()> { - let pointer_width = isa.triple().pointer_width().unwrap(); - let word_size = pointer_width.bytes() as usize; - - let csrs = callee_saved_regs_used(isa, func); - assert!( - csrs.iter(FPR).len() == 0, - "SysV ABI does not have callee-save SIMD registers" - ); - - // The reserved stack area is composed of: - // return address + frame pointer + all callee-saved registers - // - // Pushing the return address is an implicit function of the `call` - // instruction. Each of the others we will then push explicitly. Then we - // will adjust the stack pointer to make room for the rest of the required - // space for this frame. - let csr_stack_size = ((csrs.iter(GPR).len() + 2) * word_size) as i32; - func.create_stack_slot(ir::StackSlotData { - kind: ir::StackSlotKind::IncomingArg, - size: csr_stack_size as u32, - offset: Some(-csr_stack_size), - }); - - let is_leaf = func.is_leaf(); - let total_stack_size = layout_stack(&mut func.stack_slots, is_leaf, STACK_ALIGNMENT)? as i32; - let local_stack_size = i64::from(total_stack_size - csr_stack_size); - - // Add CSRs to function signature - let reg_type = ir::Type::int(u16::from(pointer_width.bits())).unwrap(); - // On X86-32 all parameters, including vmctx, are passed on stack, and we need - // to extract vmctx from the stack before we can save the frame pointer. - let sp_arg_index = if isa.pointer_bits() == 32 { - let sp_arg = ir::AbiParam::special_reg( - reg_type, - ir::ArgumentPurpose::CalleeSaved, - RU::rsp as RegUnit, - ); - let index = func.signature.params.len(); - func.signature.params.push(sp_arg); - Some(index) - } else { - None - }; - let fp_arg = ir::AbiParam::special_reg( - reg_type, - ir::ArgumentPurpose::FramePointer, - RU::rbp as RegUnit, - ); - func.signature.params.push(fp_arg); - func.signature.returns.push(fp_arg); - - for csr in csrs.iter(GPR) { - let csr_arg = ir::AbiParam::special_reg(reg_type, ir::ArgumentPurpose::CalleeSaved, csr); - func.signature.params.push(csr_arg); - func.signature.returns.push(csr_arg); - } - - // Set up the cursor and insert the prologue - let entry_block = func.layout.entry_block().expect("missing entry block"); - let mut pos = EncCursor::new(func, isa).at_first_insertion_point(entry_block); - insert_common_prologue( - &mut pos, - local_stack_size, - reg_type, - &csrs, - sp_arg_index.is_some(), - isa, - ); - - // Reset the cursor and insert the epilogue - let mut pos = pos.at_position(CursorPosition::Nowhere); - insert_common_epilogues(&mut pos, local_stack_size, reg_type, &csrs, sp_arg_index); - - Ok(()) -} - -/// Insert the prologue for a given function. -/// This is used by common calling conventions such as System V. -fn insert_common_prologue( - pos: &mut EncCursor, - stack_size: i64, - reg_type: ir::types::Type, - csrs: &RegisterSet, - has_sp_param: bool, - isa: &dyn TargetIsa, -) { - let sp = if has_sp_param { - let block = pos.current_block().expect("missing block under cursor"); - let sp = pos.func.dfg.append_block_param(block, reg_type); - pos.func.locations[sp] = ir::ValueLoc::Reg(RU::rsp as RegUnit); - Some(sp) - } else { - None - }; - - // If this is a leaf function with zero stack, then there's no need to - // insert a stack check since it can't overflow anything and - // forward-progress is guarantee so long as loop are handled anyway. - // - // If this has a stack size it could stack overflow, or if it isn't a leaf - // it could be part of a long call chain which we need to check anyway. - // - // First we look for the stack limit as a special argument to the function, - // and failing that we see if a custom stack limit factory has been provided - // which will be used to likely calculate the stack limit from the arguments - // or perhaps constants. - if stack_size > 0 || !pos.func.is_leaf() { - let scratch = ir::ValueLoc::Reg(RU::rax as RegUnit); - let stack_limit_arg = match pos.func.special_param(ArgumentPurpose::StackLimit) { - Some(arg) => { - let copy = pos.ins().copy(arg); - pos.func.locations[copy] = scratch; - Some(copy) - } - None => pos - .func - .stack_limit - .map(|gv| interpret_gv(pos, gv, sp, scratch)), - }; - if let Some(stack_limit_arg) = stack_limit_arg { - insert_stack_check(pos, stack_size, stack_limit_arg); - } - } - - // Append param to entry block - let block = pos.current_block().expect("missing block under cursor"); - let fp = pos.func.dfg.append_block_param(block, reg_type); - pos.func.locations[fp] = ir::ValueLoc::Reg(RU::rbp as RegUnit); - - pos.ins().x86_push(fp); - - let mov_sp_inst = pos - .ins() - .copy_special(RU::rsp as RegUnit, RU::rbp as RegUnit); - - let mut last_csr_push = None; - for reg in csrs.iter(GPR) { - // Append param to entry block - let csr_arg = pos.func.dfg.append_block_param(block, reg_type); - - // Assign it a location - pos.func.locations[csr_arg] = ir::ValueLoc::Reg(reg); - last_csr_push = Some(pos.ins().x86_push(csr_arg)); - } - - // Allocate stack frame storage. - let mut adjust_sp_inst = None; - if stack_size > 0 { - if isa.flags().enable_probestack() && stack_size > (1 << isa.flags().probestack_size_log2()) - { - // Emit a stack probe. - let rax = RU::rax as RegUnit; - let rax_val = ir::ValueLoc::Reg(rax); - - // The probestack function expects its input in %rax. - let arg = pos.ins().iconst(reg_type, stack_size); - pos.func.locations[arg] = rax_val; - - // Call the probestack function. - let callee = get_probestack_funcref(pos.func, reg_type, rax, isa); - - // Make the call. - let call = if !isa.flags().is_pic() - && isa.triple().pointer_width().unwrap() == PointerWidth::U64 - && !pos.func.dfg.ext_funcs[callee].colocated - { - // 64-bit non-PIC non-colocated calls need to be legalized to call_indirect. - // Use r11 as it may be clobbered under all supported calling conventions. - let r11 = RU::r11 as RegUnit; - let sig = pos.func.dfg.ext_funcs[callee].signature; - let addr = pos.ins().func_addr(reg_type, callee); - pos.func.locations[addr] = ir::ValueLoc::Reg(r11); - pos.ins().call_indirect(sig, addr, &[arg]) - } else { - // Otherwise just do a normal call. - pos.ins().call(callee, &[arg]) - }; - - // If the probestack function doesn't adjust sp, do it ourselves. - if !isa.flags().probestack_func_adjusts_sp() { - let result = pos.func.dfg.inst_results(call)[0]; - pos.func.locations[result] = rax_val; - adjust_sp_inst = Some(pos.ins().adjust_sp_down(result)); - } - } else { - // Simply decrement the stack pointer. - adjust_sp_inst = Some(pos.ins().adjust_sp_down_imm(Imm64::new(stack_size))); - } - } - - // With the stack pointer adjusted, save any callee-saved floating point registers via offset - // FPR saves are at the highest addresses of the local frame allocation, immediately following the GPR pushes - let mut last_fpr_save = None; - - for (i, reg) in csrs.iter(FPR).enumerate() { - // Append param to entry block - let csr_arg = pos.func.dfg.append_block_param(block, types::F64X2); - - // Since regalloc has already run, we must assign a location. - pos.func.locations[csr_arg] = ir::ValueLoc::Reg(reg); - - // Offset to where the register is saved relative to RSP, accounting for FPR save alignment - let offset = ((i + 1) * types::F64X2.bytes() as usize) as i64 - + (stack_size % types::F64X2.bytes() as i64); - - last_fpr_save = Some(pos.ins().store( - ir::MemFlags::trusted(), - csr_arg, - sp.expect("FPR save requires SP param"), - (stack_size - offset) as i32, - )); - } - - pos.func.prologue_end = Some( - last_fpr_save - .or(adjust_sp_inst) - .or(last_csr_push) - .unwrap_or(mov_sp_inst), - ); -} - -/// Inserts code necessary to calculate `gv`. -/// -/// Note that this is typically done with `ins().global_value(...)` but that -/// requires legalization to run to encode it, and we're running super late -/// here in the backend where legalization isn't possible. To get around this -/// we manually interpret the `gv` specified and do register allocation for -/// intermediate values. -/// -/// This is an incomplete implementation of loading `GlobalValue` values to get -/// compared to the stack pointer, but currently it serves enough functionality -/// to get this implemented in `wasmtime` itself. This'll likely get expanded a -/// bit over time! -fn interpret_gv( - pos: &mut EncCursor, - gv: ir::GlobalValue, - sp: Option, - scratch: ir::ValueLoc, -) -> ir::Value { - match pos.func.global_values[gv] { - ir::GlobalValueData::VMContext => { - let vmctx_index = pos - .func - .signature - .special_param_index(ir::ArgumentPurpose::VMContext) - .expect("no vmcontext parameter found"); - match pos.func.signature.params[vmctx_index] { - AbiParam { - location: ArgumentLoc::Reg(_), - .. - } => { - let entry = pos.func.layout.entry_block().unwrap(); - pos.func.dfg.block_params(entry)[vmctx_index] - } - AbiParam { - location: ArgumentLoc::Stack(offset), - value_type, - .. - } => { - let offset = - offset + i32::from(pos.isa.pointer_bytes() * (1 + vmctx_index as u8)); - // The following access can be marked `trusted` because it is a load of an argument. We - // know it is safe because it was safe to write it in preparing this function call. - let ret = - pos.ins() - .load(value_type, ir::MemFlags::trusted(), sp.unwrap(), offset); - pos.func.locations[ret] = scratch; - return ret; - } - AbiParam { - location: ArgumentLoc::Unassigned, - .. - } => unreachable!(), - } - } - ir::GlobalValueData::Load { - base, - offset, - global_type, - readonly: _, - } => { - let base = interpret_gv(pos, base, sp, scratch); - let ret = pos - .ins() - .load(global_type, ir::MemFlags::trusted(), base, offset); - pos.func.locations[ret] = scratch; - return ret; - } - ref other => panic!("global value for stack limit not supported: {}", other), - } -} - -/// Insert a check that generates a trap if the stack pointer goes -/// below a value in `stack_limit_arg`. -fn insert_stack_check(pos: &mut EncCursor, stack_size: i64, stack_limit_arg: ir::Value) { - use crate::ir::condcodes::IntCC; - - // Our stack pointer, after subtracting `stack_size`, must not be below - // `stack_limit_arg`. To do this we're going to add `stack_size` to - // `stack_limit_arg` and see if the stack pointer is below that. The - // `stack_size + stack_limit_arg` computation might overflow, however, due - // to how stack limits may be loaded and set externally to trigger a trap. - // - // To handle this we'll need an extra comparison to see if the stack - // pointer is already below `stack_limit_arg`. Most of the time this - // isn't necessary though since the stack limit which triggers a trap is - // likely a sentinel somewhere around `usize::max_value()`. In that case - // only conditionally emit this pre-flight check. That way most functions - // only have the one comparison, but are also guaranteed that if we add - // `stack_size` to `stack_limit_arg` is won't overflow. - // - // This does mean that code generators which use this stack check - // functionality need to ensure that values stored into the stack limit - // will never overflow if this threshold is added. - if stack_size >= 32 * 1024 { - let cflags = pos.ins().ifcmp_sp(stack_limit_arg); - pos.func.locations[cflags] = ir::ValueLoc::Reg(RU::rflags as RegUnit); - pos.ins().trapif( - IntCC::UnsignedGreaterThanOrEqual, - cflags, - ir::TrapCode::StackOverflow, - ); - } - - // Copy `stack_limit_arg` into a %rax and use it for calculating - // a SP threshold. - let sp_threshold = pos.ins().iadd_imm(stack_limit_arg, stack_size); - pos.func.locations[sp_threshold] = ir::ValueLoc::Reg(RU::rax as RegUnit); - - // If the stack pointer currently reaches the SP threshold or below it then after opening - // the current stack frame, the current stack pointer will reach the limit. - let cflags = pos.ins().ifcmp_sp(sp_threshold); - pos.func.locations[cflags] = ir::ValueLoc::Reg(RU::rflags as RegUnit); - pos.ins().trapif( - IntCC::UnsignedGreaterThanOrEqual, - cflags, - ir::TrapCode::StackOverflow, - ); -} - -/// Find all `return` instructions and insert epilogues before them. -fn insert_common_epilogues( - pos: &mut EncCursor, - stack_size: i64, - reg_type: ir::types::Type, - csrs: &RegisterSet, - sp_arg_index: Option, -) { - while let Some(block) = pos.next_block() { - pos.goto_last_inst(block); - if let Some(inst) = pos.current_inst() { - if pos.func.dfg[inst].opcode().is_return() { - insert_common_epilogue(inst, block, stack_size, pos, reg_type, csrs, sp_arg_index); - } - } - } -} - -/// Insert an epilogue given a specific `return` instruction. -/// This is used by common calling conventions such as System V. -fn insert_common_epilogue( - inst: ir::Inst, - block: ir::Block, - stack_size: i64, - pos: &mut EncCursor, - reg_type: ir::types::Type, - csrs: &RegisterSet, - sp_arg_index: Option, -) { - // Insert the pop of the frame pointer - let fp_pop = pos.ins().x86_pop(reg_type); - let fp_pop_inst = pos.prev_inst().unwrap(); - pos.func.locations[fp_pop] = ir::ValueLoc::Reg(RU::rbp as RegUnit); - pos.func.dfg.append_inst_arg(inst, fp_pop); - - // Insert the CSR pops - let mut first_csr_pop_inst = None; - for reg in csrs.iter(GPR) { - let csr_pop = pos.ins().x86_pop(reg_type); - first_csr_pop_inst = pos.prev_inst(); - assert!(first_csr_pop_inst.is_some()); - pos.func.locations[csr_pop] = ir::ValueLoc::Reg(reg); - pos.func.dfg.append_inst_arg(inst, csr_pop); - } - - // Insert the adjustment of SP - let mut sp_adjust_inst = None; - if stack_size > 0 { - pos.ins().adjust_sp_up_imm(Imm64::new(stack_size)); - sp_adjust_inst = pos.prev_inst(); - assert!(sp_adjust_inst.is_some()); - } - - let mut first_fpr_load = None; - if let Some(index) = sp_arg_index { - let sp = pos - .func - .dfg - .block_params(pos.func.layout.entry_block().unwrap())[index]; - - // Insert the FPR loads (unlike the GPRs, which are stack pops, these are in-order loads) - for (i, reg) in csrs.iter(FPR).enumerate() { - // Offset to where the register is saved relative to RSP, accounting for FPR save alignment - let offset = ((i + 1) * types::F64X2.bytes() as usize) as i64 - + (stack_size % types::F64X2.bytes() as i64); - - let value = pos.ins().load( - types::F64X2, - ir::MemFlags::trusted(), - sp, - (stack_size - offset) as i32, - ); - - first_fpr_load.get_or_insert(pos.current_inst().expect("current inst")); - - pos.func.locations[value] = ir::ValueLoc::Reg(reg); - pos.func.dfg.append_inst_arg(inst, value); - } - } else { - assert!(csrs.iter(FPR).len() == 0); - } - - pos.func.epilogues_start.push(( - first_fpr_load - .or(sp_adjust_inst) - .or(first_csr_pop_inst) - .unwrap_or(fp_pop_inst), - block, - )); -} - -#[cfg(feature = "unwind")] -pub fn create_unwind_info( - func: &ir::Function, - isa: &dyn TargetIsa, -) -> CodegenResult> { - use crate::isa::unwind::UnwindInfo; - use crate::machinst::UnwindInfoKind; - - // Assumption: RBP is being used as the frame pointer for both calling conventions - // In the future, we should be omitting frame pointer as an optimization, so this will change - Ok(match isa.unwind_info_kind() { - UnwindInfoKind::SystemV => { - super::unwind::systemv::create_unwind_info(func, isa)?.map(|u| UnwindInfo::SystemV(u)) - } - UnwindInfoKind::Windows => { - super::unwind::winx64::create_unwind_info(func, isa)?.map(|u| UnwindInfo::WindowsX64(u)) - } - UnwindInfoKind::None => None, - }) -} diff --git a/cranelift/codegen/src/isa/legacy/x86/binemit.rs b/cranelift/codegen/src/isa/legacy/x86/binemit.rs deleted file mode 100644 index 0480873672..0000000000 --- a/cranelift/codegen/src/isa/legacy/x86/binemit.rs +++ /dev/null @@ -1,578 +0,0 @@ -//! Emitting binary x86 machine code. - -use super::enc_tables::{needs_offset, needs_sib_byte}; -use super::registers::RU; -use crate::binemit::{bad_encoding, CodeSink, Reloc}; -use crate::ir::condcodes::{CondCode, FloatCC, IntCC}; -use crate::ir::{ - Block, Constant, ExternalName, Function, Inst, InstructionData, JumpTable, LibCall, Opcode, - TrapCode, -}; -use crate::isa::{RegUnit, StackBase, StackBaseMask, StackRef, TargetIsa}; -use crate::regalloc::RegDiversions; -use cranelift_codegen_shared::isa::x86::EncodingBits; - -include!(concat!(env!("OUT_DIR"), "/binemit-x86.rs")); - -// Convert a stack base to the corresponding register. -fn stk_base(base: StackBase) -> RegUnit { - let ru = match base { - StackBase::SP => RU::rsp, - StackBase::FP => RU::rbp, - StackBase::Zone => unimplemented!(), - }; - ru as RegUnit -} - -// Mandatory prefix bytes for Mp* opcodes. -const PREFIX: [u8; 3] = [0x66, 0xf3, 0xf2]; - -// Second byte for three-byte opcodes for mm=0b10 and mm=0b11. -const OP3_BYTE2: [u8; 2] = [0x38, 0x3a]; - -// A REX prefix with no bits set: 0b0100WRXB. -const BASE_REX: u8 = 0b0100_0000; - -// Create a single-register REX prefix, setting the B bit to bit 3 of the register. -// This is used for instructions that encode a register in the low 3 bits of the opcode and for -// instructions that use the ModR/M `reg` field for something else. -fn rex1(reg_b: RegUnit) -> u8 { - let b = ((reg_b >> 3) & 1) as u8; - BASE_REX | b -} - -// Create a dual-register REX prefix, setting: -// -// REX.B = bit 3 of r/m register, or SIB base register when a SIB byte is present. -// REX.R = bit 3 of reg register. -fn rex2(rm: RegUnit, reg: RegUnit) -> u8 { - let b = ((rm >> 3) & 1) as u8; - let r = ((reg >> 3) & 1) as u8; - BASE_REX | b | (r << 2) -} - -// Create a three-register REX prefix, setting: -// -// REX.B = bit 3 of r/m register, or SIB base register when a SIB byte is present. -// REX.R = bit 3 of reg register. -// REX.X = bit 3 of SIB index register. -fn rex3(rm: RegUnit, reg: RegUnit, index: RegUnit) -> u8 { - let b = ((rm >> 3) & 1) as u8; - let r = ((reg >> 3) & 1) as u8; - let x = ((index >> 3) & 1) as u8; - BASE_REX | b | (x << 1) | (r << 2) -} - -/// Encode the RXBR' bits of the EVEX P0 byte. For an explanation of these bits, see section 2.6.1 -/// in the Intel Software Development Manual, volume 2A. These bits can be used by different -/// addressing modes (see section 2.6.2), requiring different `vex*` functions than this one. -fn evex2(rm: RegUnit, reg: RegUnit) -> u8 { - let b = (!(rm >> 3) & 1) as u8; - let x = (!(rm >> 4) & 1) as u8; - let r = (!(reg >> 3) & 1) as u8; - let r_ = (!(reg >> 4) & 1) as u8; - 0x00 | r_ | (b << 1) | (x << 2) | (r << 3) -} - -/// Determines whether a REX prefix should be emitted. A REX byte always has 0100 in bits 7:4; bits -/// 3:0 correspond to WRXB. W allows certain instructions to declare a 64-bit operand size; because -/// [needs_rex] is only used by [infer_rex] and we prevent [infer_rex] from using [w] in -/// [Template::build], we do not need to check again whether [w] forces an inferred REX prefix--it -/// always does and should be encoded like `.rex().w()`. The RXB are extension of ModR/M or SIB -/// fields; see section 2.2.1.2 in the Intel Software Development Manual. -#[inline] -fn needs_rex(rex: u8) -> bool { - rex != BASE_REX -} - -// Emit a REX prefix. -// -// The R, X, and B bits are computed from registers using the functions above. The W bit is -// extracted from `bits`. -fn rex_prefix(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!(rex & 0xf8, BASE_REX); - let w = EncodingBits::from(bits).rex_w(); - sink.put1(rex | (w << 3)); -} - -// Emit a single-byte opcode with no REX prefix. -fn put_op1(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!(bits & 0x8f00, 0, "Invalid encoding bits for Op1*"); - debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Op1 encoding"); - sink.put1(bits as u8); -} - -// Emit a single-byte opcode with REX prefix. -fn put_rexop1(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!(bits & 0x0f00, 0, "Invalid encoding bits for RexOp1*"); - rex_prefix(bits, rex, sink); - sink.put1(bits as u8); -} - -/// Emit a single-byte opcode with inferred REX prefix. -fn put_dynrexop1(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!(bits & 0x0f00, 0, "Invalid encoding bits for DynRexOp1*"); - if needs_rex(rex) { - rex_prefix(bits, rex, sink); - } - sink.put1(bits as u8); -} - -// Emit two-byte opcode: 0F XX -fn put_op2(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!(bits & 0x8f00, 0x0400, "Invalid encoding bits for Op2*"); - debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Op2 encoding"); - sink.put1(0x0f); - sink.put1(bits as u8); -} - -// Emit two-byte opcode: 0F XX with REX prefix. -fn put_rexop2(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!(bits & 0x0f00, 0x0400, "Invalid encoding bits for RexOp2*"); - rex_prefix(bits, rex, sink); - sink.put1(0x0f); - sink.put1(bits as u8); -} - -/// Emit two-byte opcode: 0F XX with inferred REX prefix. -fn put_dynrexop2(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!( - bits & 0x0f00, - 0x0400, - "Invalid encoding bits for DynRexOp2*" - ); - if needs_rex(rex) { - rex_prefix(bits, rex, sink); - } - sink.put1(0x0f); - sink.put1(bits as u8); -} - -// Emit single-byte opcode with mandatory prefix. -fn put_mp1(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!(bits & 0x8c00, 0, "Invalid encoding bits for Mp1*"); - let enc = EncodingBits::from(bits); - sink.put1(PREFIX[(enc.pp() - 1) as usize]); - debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp1 encoding"); - sink.put1(bits as u8); -} - -// Emit single-byte opcode with mandatory prefix and REX. -fn put_rexmp1(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!(bits & 0x0c00, 0, "Invalid encoding bits for RexMp1*"); - let enc = EncodingBits::from(bits); - sink.put1(PREFIX[(enc.pp() - 1) as usize]); - rex_prefix(bits, rex, sink); - sink.put1(bits as u8); -} - -// Emit two-byte opcode (0F XX) with mandatory prefix. -fn put_mp2(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!(bits & 0x8c00, 0x0400, "Invalid encoding bits for Mp2*"); - let enc = EncodingBits::from(bits); - sink.put1(PREFIX[(enc.pp() - 1) as usize]); - debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp2 encoding"); - sink.put1(0x0f); - sink.put1(bits as u8); -} - -// Emit two-byte opcode (0F XX) with mandatory prefix and REX. -fn put_rexmp2(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!(bits & 0x0c00, 0x0400, "Invalid encoding bits for RexMp2*"); - let enc = EncodingBits::from(bits); - sink.put1(PREFIX[(enc.pp() - 1) as usize]); - rex_prefix(bits, rex, sink); - sink.put1(0x0f); - sink.put1(bits as u8); -} - -/// Emit two-byte opcode (0F XX) with mandatory prefix and inferred REX. -fn put_dynrexmp2(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!( - bits & 0x0c00, - 0x0400, - "Invalid encoding bits for DynRexMp2*" - ); - let enc = EncodingBits::from(bits); - sink.put1(PREFIX[(enc.pp() - 1) as usize]); - if needs_rex(rex) { - rex_prefix(bits, rex, sink); - } - sink.put1(0x0f); - sink.put1(bits as u8); -} - -/// Emit three-byte opcode (0F 3[8A] XX) with mandatory prefix. -fn put_mp3(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!(bits & 0x8800, 0x0800, "Invalid encoding bits for Mp3*"); - debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp3 encoding"); - let enc = EncodingBits::from(bits); - sink.put1(PREFIX[(enc.pp() - 1) as usize]); - sink.put1(0x0f); - sink.put1(OP3_BYTE2[(enc.mm() - 2) as usize]); - sink.put1(bits as u8); -} - -/// Emit three-byte opcode (0F 3[8A] XX) with mandatory prefix and REX -fn put_rexmp3(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!(bits & 0x0800, 0x0800, "Invalid encoding bits for RexMp3*"); - let enc = EncodingBits::from(bits); - sink.put1(PREFIX[(enc.pp() - 1) as usize]); - rex_prefix(bits, rex, sink); - sink.put1(0x0f); - sink.put1(OP3_BYTE2[(enc.mm() - 2) as usize]); - sink.put1(bits as u8); -} - -/// Emit three-byte opcode (0F 3[8A] XX) with mandatory prefix and an inferred REX prefix. -fn put_dynrexmp3(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!( - bits & 0x0800, - 0x0800, - "Invalid encoding bits for DynRexMp3*" - ); - let enc = EncodingBits::from(bits); - sink.put1(PREFIX[(enc.pp() - 1) as usize]); - if needs_rex(rex) { - rex_prefix(bits, rex, sink); - } - sink.put1(0x0f); - sink.put1(OP3_BYTE2[(enc.mm() - 2) as usize]); - sink.put1(bits as u8); -} - -/// Defines the EVEX context for the `L'`, `L`, and `b` bits (bits 6:4 of EVEX P2 byte). Table 2-36 in -/// section 2.6.10 (Intel Software Development Manual, volume 2A) describes how these bits can be -/// used together for certain classes of instructions; i.e., special care should be taken to ensure -/// that instructions use an applicable correct `EvexContext`. Table 2-39 contains cases where -/// opcodes can result in an #UD. -#[allow(dead_code)] -enum EvexContext { - RoundingRegToRegFP { - rc: EvexRoundingControl, - }, - NoRoundingFP { - sae: bool, - length: EvexVectorLength, - }, - MemoryOp { - broadcast: bool, - length: EvexVectorLength, - }, - Other { - length: EvexVectorLength, - }, -} - -impl EvexContext { - /// Encode the `L'`, `L`, and `b` bits (bits 6:4 of EVEX P2 byte) for merging with the P2 byte. - fn bits(&self) -> u8 { - match self { - Self::RoundingRegToRegFP { rc } => 0b001 | rc.bits() << 1, - Self::NoRoundingFP { sae, length } => (*sae as u8) | length.bits() << 1, - Self::MemoryOp { broadcast, length } => (*broadcast as u8) | length.bits() << 1, - Self::Other { length } => length.bits() << 1, - } - } -} - -/// The EVEX format allows choosing a vector length in the `L'` and `L` bits; see `EvexContext`. -#[allow(dead_code)] -enum EvexVectorLength { - V128, - V256, - V512, -} - -impl EvexVectorLength { - /// Encode the `L'` and `L` bits for merging with the P2 byte. - fn bits(&self) -> u8 { - match self { - Self::V128 => 0b00, - Self::V256 => 0b01, - Self::V512 => 0b10, - // 0b11 is reserved (#UD). - } - } -} - -/// The EVEX format allows defining rounding control in the `L'` and `L` bits; see `EvexContext`. -#[allow(dead_code)] -enum EvexRoundingControl { - RNE, - RD, - RU, - RZ, -} - -impl EvexRoundingControl { - /// Encode the `L'` and `L` bits for merging with the P2 byte. - fn bits(&self) -> u8 { - match self { - Self::RNE => 0b00, - Self::RD => 0b01, - Self::RU => 0b10, - Self::RZ => 0b11, - } - } -} - -/// Defines the EVEX masking behavior; masking support is described in section 2.6.4 of the Intel -/// Software Development Manual, volume 2A. -#[allow(dead_code)] -enum EvexMasking { - None, - Merging { k: u8 }, - Zeroing { k: u8 }, -} - -impl EvexMasking { - /// Encode the `z` bit for merging with the P2 byte. - fn z_bit(&self) -> u8 { - match self { - Self::None | Self::Merging { .. } => 0, - Self::Zeroing { .. } => 1, - } - } - - /// Encode the `aaa` bits for merging with the P2 byte. - fn aaa_bits(&self) -> u8 { - match self { - Self::None => 0b000, - Self::Merging { k } | Self::Zeroing { k } => { - debug_assert!(*k <= 7); - *k - } - } - } -} - -/// Encode an EVEX prefix, including the instruction opcode. To match the current recipe -/// convention, the ModR/M byte is written separately in the recipe. This EVEX encoding function -/// only encodes the `reg` (operand 1), `vvvv` (operand 2), `rm` (operand 3) form; other forms are -/// possible (see section 2.6.2, Intel Software Development Manual, volume 2A), requiring -/// refactoring of this function or separate functions for each form (e.g. as for the REX prefix). -fn put_evex( - bits: u16, - reg: RegUnit, - vvvvv: RegUnit, - rm: RegUnit, - context: EvexContext, - masking: EvexMasking, - sink: &mut CS, -) { - let enc = EncodingBits::from(bits); - - // EVEX prefix. - sink.put1(0x62); - - debug_assert!(enc.mm() < 0b100); - let mut p0 = enc.mm() & 0b11; - p0 |= evex2(rm, reg) << 4; // bits 3:2 are always unset - sink.put1(p0); - - let mut p1 = enc.pp() | 0b100; // bit 2 is always set - p1 |= (!(vvvvv as u8) & 0b1111) << 3; - p1 |= (enc.rex_w() & 0b1) << 7; - sink.put1(p1); - - let mut p2 = masking.aaa_bits(); - p2 |= (!(vvvvv as u8 >> 4) & 0b1) << 3; - p2 |= context.bits() << 4; - p2 |= masking.z_bit() << 7; - sink.put1(p2); - - // Opcode - sink.put1(enc.opcode_byte()); - - // ModR/M byte placed in recipe -} - -/// Emit a ModR/M byte for reg-reg operands. -fn modrm_rr(rm: RegUnit, reg: RegUnit, sink: &mut CS) { - let reg = reg as u8 & 7; - let rm = rm as u8 & 7; - let mut b = 0b11000000; - b |= reg << 3; - b |= rm; - sink.put1(b); -} - -/// Emit a ModR/M byte where the reg bits are part of the opcode. -fn modrm_r_bits(rm: RegUnit, bits: u16, sink: &mut CS) { - let reg = (bits >> 12) as u8 & 7; - let rm = rm as u8 & 7; - let mut b = 0b11000000; - b |= reg << 3; - b |= rm; - sink.put1(b); -} - -/// Emit a mode 00 ModR/M byte. This is a register-indirect addressing mode with no offset. -/// Registers %rsp and %rbp are invalid for `rm`, %rsp indicates a SIB byte, and %rbp indicates an -/// absolute immediate 32-bit address. -fn modrm_rm(rm: RegUnit, reg: RegUnit, sink: &mut CS) { - let reg = reg as u8 & 7; - let rm = rm as u8 & 7; - let mut b = 0b00000000; - b |= reg << 3; - b |= rm; - sink.put1(b); -} - -/// Emit a mode 00 Mod/RM byte, with a rip-relative displacement in 64-bit mode. Effective address -/// is calculated by adding displacement to 64-bit rip of next instruction. See intel Sw dev manual -/// section 2.2.1.6. -fn modrm_riprel(reg: RegUnit, sink: &mut CS) { - modrm_rm(0b101, reg, sink) -} - -/// Emit a mode 01 ModR/M byte. This is a register-indirect addressing mode with 8-bit -/// displacement. -/// Register %rsp is invalid for `rm`. It indicates the presence of a SIB byte. -fn modrm_disp8(rm: RegUnit, reg: RegUnit, sink: &mut CS) { - let reg = reg as u8 & 7; - let rm = rm as u8 & 7; - let mut b = 0b01000000; - b |= reg << 3; - b |= rm; - sink.put1(b); -} - -/// Emit a mode 10 ModR/M byte. This is a register-indirect addressing mode with 32-bit -/// displacement. -/// Register %rsp is invalid for `rm`. It indicates the presence of a SIB byte. -fn modrm_disp32(rm: RegUnit, reg: RegUnit, sink: &mut CS) { - let reg = reg as u8 & 7; - let rm = rm as u8 & 7; - let mut b = 0b10000000; - b |= reg << 3; - b |= rm; - sink.put1(b); -} - -/// Emit a mode 00 ModR/M with a 100 RM indicating a SIB byte is present. -fn modrm_sib(reg: RegUnit, sink: &mut CS) { - modrm_rm(0b100, reg, sink); -} - -/// Emit a mode 01 ModR/M with a 100 RM indicating a SIB byte and 8-bit -/// displacement are present. -fn modrm_sib_disp8(reg: RegUnit, sink: &mut CS) { - modrm_disp8(0b100, reg, sink); -} - -/// Emit a mode 10 ModR/M with a 100 RM indicating a SIB byte and 32-bit -/// displacement are present. -fn modrm_sib_disp32(reg: RegUnit, sink: &mut CS) { - modrm_disp32(0b100, reg, sink); -} - -/// Emit a SIB byte with a base register and no scale+index. -fn sib_noindex(base: RegUnit, sink: &mut CS) { - let base = base as u8 & 7; - // SIB SS_III_BBB. - let mut b = 0b00_100_000; - b |= base; - sink.put1(b); -} - -/// Emit a SIB byte with a scale, base, and index. -fn sib(scale: u8, index: RegUnit, base: RegUnit, sink: &mut CS) { - // SIB SS_III_BBB. - debug_assert_eq!(scale & !0x03, 0, "Scale out of range"); - let scale = scale & 3; - let index = index as u8 & 7; - let base = base as u8 & 7; - let b: u8 = (scale << 6) | (index << 3) | base; - sink.put1(b); -} - -/// Get the low 4 bits of an opcode for an integer condition code. -/// -/// Add this offset to a base opcode for: -/// -/// ---- 0x70: Short conditional branch. -/// 0x0f 0x80: Long conditional branch. -/// 0x0f 0x90: SetCC. -/// -fn icc2opc(cond: IntCC) -> u16 { - use crate::ir::condcodes::IntCC::*; - match cond { - Overflow => 0x0, - NotOverflow => 0x1, - UnsignedLessThan => 0x2, - UnsignedGreaterThanOrEqual => 0x3, - Equal => 0x4, - NotEqual => 0x5, - UnsignedLessThanOrEqual => 0x6, - UnsignedGreaterThan => 0x7, - // 0x8 = Sign. - // 0x9 = !Sign. - // 0xa = Parity even. - // 0xb = Parity odd. - SignedLessThan => 0xc, - SignedGreaterThanOrEqual => 0xd, - SignedLessThanOrEqual => 0xe, - SignedGreaterThan => 0xf, - } -} - -/// Get the low 4 bits of an opcode for a floating point condition code. -/// -/// The ucomiss/ucomisd instructions set the FLAGS bits CF/PF/CF like this: -/// -/// ZPC OSA -/// UN 111 000 -/// GT 000 000 -/// LT 001 000 -/// EQ 100 000 -/// -/// Not all floating point condition codes are supported. -fn fcc2opc(cond: FloatCC) -> u16 { - use crate::ir::condcodes::FloatCC::*; - match cond { - Ordered => 0xb, // EQ|LT|GT => *np (P=0) - Unordered => 0xa, // UN => *p (P=1) - OrderedNotEqual => 0x5, // LT|GT => *ne (Z=0), - UnorderedOrEqual => 0x4, // UN|EQ => *e (Z=1) - GreaterThan => 0x7, // GT => *a (C=0&Z=0) - GreaterThanOrEqual => 0x3, // GT|EQ => *ae (C=0) - UnorderedOrLessThan => 0x2, // UN|LT => *b (C=1) - UnorderedOrLessThanOrEqual => 0x6, // UN|LT|EQ => *be (Z=1|C=1) - Equal | // EQ - NotEqual | // UN|LT|GT - LessThan | // LT - LessThanOrEqual | // LT|EQ - UnorderedOrGreaterThan | // UN|GT - UnorderedOrGreaterThanOrEqual // UN|GT|EQ - => panic!("{} not supported", cond), - } -} - -/// Emit a single-byte branch displacement to `destination`. -fn disp1(destination: Block, func: &Function, sink: &mut CS) { - let delta = func.offsets[destination].wrapping_sub(sink.offset() + 1); - sink.put1(delta as u8); -} - -/// Emit a four-byte branch displacement to `destination`. -fn disp4(destination: Block, func: &Function, sink: &mut CS) { - let delta = func.offsets[destination].wrapping_sub(sink.offset() + 4); - sink.put4(delta); -} - -/// Emit a four-byte displacement to jump table `jt`. -fn jt_disp4(jt: JumpTable, func: &Function, sink: &mut CS) { - let delta = func.jt_offsets[jt].wrapping_sub(sink.offset() + 4); - sink.put4(delta); - sink.reloc_jt(Reloc::X86PCRelRodata4, jt); -} - -/// Emit a four-byte displacement to `constant`. -fn const_disp4(constant: Constant, func: &Function, sink: &mut CS) { - let offset = func.dfg.constants.get_offset(constant); - let delta = offset.wrapping_sub(sink.offset() + 4); - sink.put4(delta); - sink.reloc_constant(Reloc::X86PCRelRodata4, offset); -} diff --git a/cranelift/codegen/src/isa/legacy/x86/enc_tables.rs b/cranelift/codegen/src/isa/legacy/x86/enc_tables.rs deleted file mode 100644 index 72890cffd9..0000000000 --- a/cranelift/codegen/src/isa/legacy/x86/enc_tables.rs +++ /dev/null @@ -1,1894 +0,0 @@ -//! Encoding tables for x86 ISAs. - -use super::registers::*; -use crate::bitset::BitSet; -use crate::cursor::{Cursor, FuncCursor}; -use crate::flowgraph::ControlFlowGraph; -use crate::ir::condcodes::{FloatCC, IntCC}; -use crate::ir::types::*; -use crate::ir::{self, Function, Inst, InstBuilder, MemFlags}; -use crate::isa::constraints::*; -use crate::isa::enc_tables::*; -use crate::isa::encoding::base_size; -use crate::isa::encoding::{Encoding, RecipeSizing}; -use crate::isa::RegUnit; -use crate::isa::{self, TargetIsa}; -use crate::legalizer::expand_as_libcall; -use crate::predicates; -use crate::regalloc::RegDiversions; - -include!(concat!(env!("OUT_DIR"), "/encoding-x86.rs")); -include!(concat!(env!("OUT_DIR"), "/legalize-x86.rs")); - -/// Whether the REX prefix is needed for encoding extended registers (via REX.RXB). -/// -/// Normal x86 instructions have only 3 bits for encoding a register. -/// The REX prefix adds REX.R, REX,X, and REX.B bits, interpreted as fourth bits. -pub fn is_extended_reg(reg: RegUnit) -> bool { - // Extended registers have the fourth bit set. - reg as u8 & 0b1000 != 0 -} - -pub fn needs_sib_byte(reg: RegUnit) -> bool { - reg == RU::r12 as RegUnit || reg == RU::rsp as RegUnit -} -pub fn needs_offset(reg: RegUnit) -> bool { - reg == RU::r13 as RegUnit || reg == RU::rbp as RegUnit -} -pub fn needs_sib_byte_or_offset(reg: RegUnit) -> bool { - needs_sib_byte(reg) || needs_offset(reg) -} - -fn test_input( - op_index: usize, - inst: Inst, - divert: &RegDiversions, - func: &Function, - condition_func: fn(RegUnit) -> bool, -) -> bool { - let in_reg = divert.reg(func.dfg.inst_args(inst)[op_index], &func.locations); - condition_func(in_reg) -} - -fn test_result( - result_index: usize, - inst: Inst, - divert: &RegDiversions, - func: &Function, - condition_func: fn(RegUnit) -> bool, -) -> bool { - let out_reg = divert.reg(func.dfg.inst_results(inst)[result_index], &func.locations); - condition_func(out_reg) -} - -fn size_plus_maybe_offset_for_inreg_0( - sizing: &RecipeSizing, - _enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - let needs_offset = test_input(0, inst, divert, func, needs_offset); - sizing.base_size + if needs_offset { 1 } else { 0 } -} -fn size_plus_maybe_offset_for_inreg_1( - sizing: &RecipeSizing, - _enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - let needs_offset = test_input(1, inst, divert, func, needs_offset); - sizing.base_size + if needs_offset { 1 } else { 0 } -} -fn size_plus_maybe_sib_for_inreg_0( - sizing: &RecipeSizing, - _enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - let needs_sib = test_input(0, inst, divert, func, needs_sib_byte); - sizing.base_size + if needs_sib { 1 } else { 0 } -} -fn size_plus_maybe_sib_for_inreg_1( - sizing: &RecipeSizing, - _enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - let needs_sib = test_input(1, inst, divert, func, needs_sib_byte); - sizing.base_size + if needs_sib { 1 } else { 0 } -} -fn size_plus_maybe_sib_or_offset_for_inreg_0( - sizing: &RecipeSizing, - _enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - let needs_sib_or_offset = test_input(0, inst, divert, func, needs_sib_byte_or_offset); - sizing.base_size + if needs_sib_or_offset { 1 } else { 0 } -} -fn size_plus_maybe_sib_or_offset_for_inreg_1( - sizing: &RecipeSizing, - _enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - let needs_sib_or_offset = test_input(1, inst, divert, func, needs_sib_byte_or_offset); - sizing.base_size + if needs_sib_or_offset { 1 } else { 0 } -} - -/// Calculates the size while inferring if the first and second input registers (inreg0, inreg1) -/// require a dynamic REX prefix and if the second input register (inreg1) requires a SIB or offset. -fn size_plus_maybe_sib_or_offset_inreg1_plus_rex_prefix_for_inreg0_inreg1( - sizing: &RecipeSizing, - enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed. - let needs_rex = test_input(0, inst, divert, func, is_extended_reg) - || test_input(1, inst, divert, func, is_extended_reg); - size_plus_maybe_sib_or_offset_for_inreg_1(sizing, enc, inst, divert, func) - + if needs_rex { 1 } else { 0 } -} - -/// Calculates the size while inferring if the first and second input registers (inreg0, inreg1) -/// require a dynamic REX prefix and if the second input register (inreg1) requires a SIB. -fn size_plus_maybe_sib_inreg1_plus_rex_prefix_for_inreg0_inreg1( - sizing: &RecipeSizing, - enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed. - let needs_rex = test_input(0, inst, divert, func, is_extended_reg) - || test_input(1, inst, divert, func, is_extended_reg); - size_plus_maybe_sib_for_inreg_1(sizing, enc, inst, divert, func) + if needs_rex { 1 } else { 0 } -} - -/// Calculates the size while inferring if the first input register (inreg0) and first output -/// register (outreg0) require a dynamic REX and if the first input register (inreg0) requires a -/// SIB or offset. -fn size_plus_maybe_sib_or_offset_for_inreg_0_plus_rex_prefix_for_inreg0_outreg0( - sizing: &RecipeSizing, - enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed. - let needs_rex = test_input(0, inst, divert, func, is_extended_reg) - || test_result(0, inst, divert, func, is_extended_reg); - size_plus_maybe_sib_or_offset_for_inreg_0(sizing, enc, inst, divert, func) - + if needs_rex { 1 } else { 0 } -} - -/// Calculates the size while inferring if the first input register (inreg0) and first output -/// register (outreg0) require a dynamic REX and if the first input register (inreg0) requires a -/// SIB. -fn size_plus_maybe_sib_for_inreg_0_plus_rex_prefix_for_inreg0_outreg0( - sizing: &RecipeSizing, - enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed. - let needs_rex = test_input(0, inst, divert, func, is_extended_reg) - || test_result(0, inst, divert, func, is_extended_reg); - size_plus_maybe_sib_for_inreg_0(sizing, enc, inst, divert, func) + if needs_rex { 1 } else { 0 } -} - -/// Infers whether a dynamic REX prefix will be emitted, for use with one input reg. -/// -/// A REX prefix is known to be emitted if either: -/// 1. The EncodingBits specify that REX.W is to be set. -/// 2. Registers are used that require REX.R or REX.B bits for encoding. -fn size_with_inferred_rex_for_inreg0( - sizing: &RecipeSizing, - _enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed. - let needs_rex = test_input(0, inst, divert, func, is_extended_reg); - sizing.base_size + if needs_rex { 1 } else { 0 } -} - -/// Infers whether a dynamic REX prefix will be emitted, based on the second operand. -fn size_with_inferred_rex_for_inreg1( - sizing: &RecipeSizing, - _enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed. - let needs_rex = test_input(1, inst, divert, func, is_extended_reg); - sizing.base_size + if needs_rex { 1 } else { 0 } -} - -/// Infers whether a dynamic REX prefix will be emitted, based on the third operand. -fn size_with_inferred_rex_for_inreg2( - sizing: &RecipeSizing, - _: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed. - let needs_rex = test_input(2, inst, divert, func, is_extended_reg); - sizing.base_size + if needs_rex { 1 } else { 0 } -} - -/// Infers whether a dynamic REX prefix will be emitted, for use with two input registers. -/// -/// A REX prefix is known to be emitted if either: -/// 1. The EncodingBits specify that REX.W is to be set. -/// 2. Registers are used that require REX.R or REX.B bits for encoding. -fn size_with_inferred_rex_for_inreg0_inreg1( - sizing: &RecipeSizing, - _enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed. - let needs_rex = test_input(0, inst, divert, func, is_extended_reg) - || test_input(1, inst, divert, func, is_extended_reg); - sizing.base_size + if needs_rex { 1 } else { 0 } -} - -/// Infers whether a dynamic REX prefix will be emitted, based on second and third operand. -fn size_with_inferred_rex_for_inreg1_inreg2( - sizing: &RecipeSizing, - _enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed. - let needs_rex = test_input(1, inst, divert, func, is_extended_reg) - || test_input(2, inst, divert, func, is_extended_reg); - sizing.base_size + if needs_rex { 1 } else { 0 } -} - -/// Infers whether a dynamic REX prefix will be emitted, based on a single -/// input register and a single output register. -fn size_with_inferred_rex_for_inreg0_outreg0( - sizing: &RecipeSizing, - _enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed. - let needs_rex = test_input(0, inst, divert, func, is_extended_reg) - || test_result(0, inst, divert, func, is_extended_reg); - sizing.base_size + if needs_rex { 1 } else { 0 } -} - -/// Infers whether a dynamic REX prefix will be emitted, based on a single output register. -fn size_with_inferred_rex_for_outreg0( - sizing: &RecipeSizing, - _enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed. - let needs_rex = test_result(0, inst, divert, func, is_extended_reg); - sizing.base_size + if needs_rex { 1 } else { 0 } -} - -/// Infers whether a dynamic REX prefix will be emitted, for use with CMOV. -/// -/// CMOV uses 3 inputs, with the REX is inferred from reg1 and reg2. -fn size_with_inferred_rex_for_cmov( - sizing: &RecipeSizing, - _enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, -) -> u8 { - // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed. - let needs_rex = test_input(1, inst, divert, func, is_extended_reg) - || test_input(2, inst, divert, func, is_extended_reg); - sizing.base_size + if needs_rex { 1 } else { 0 } -} - -/// If the value's definition is a constant immediate, returns its unpacked value, or None -/// otherwise. -fn maybe_iconst_imm(pos: &FuncCursor, value: ir::Value) -> Option { - if let ir::ValueDef::Result(inst, _) = &pos.func.dfg.value_def(value) { - if let ir::InstructionData::UnaryImm { - opcode: ir::Opcode::Iconst, - imm, - } = &pos.func.dfg[*inst] - { - let value: i64 = (*imm).into(); - Some(value) - } else { - None - } - } else { - None - } -} - -/// Expand the `sdiv` and `srem` instructions using `x86_sdivmodx`. -fn expand_sdivrem( - inst: ir::Inst, - func: &mut ir::Function, - cfg: &mut ControlFlowGraph, - isa: &dyn TargetIsa, -) { - let (x, y, is_srem) = match func.dfg[inst] { - ir::InstructionData::Binary { - opcode: ir::Opcode::Sdiv, - args, - } => (args[0], args[1], false), - ir::InstructionData::Binary { - opcode: ir::Opcode::Srem, - args, - } => (args[0], args[1], true), - _ => panic!("Need sdiv/srem: {}", func.dfg.display_inst(inst, None)), - }; - - let old_block = func.layout.pp_block(inst); - let result = func.dfg.first_result(inst); - let ty = func.dfg.value_type(result); - - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - pos.func.dfg.clear_results(inst); - - let avoid_div_traps = isa.flags().avoid_div_traps(); - - // If we can tolerate native division traps, sdiv doesn't need branching. - if !avoid_div_traps && !is_srem { - let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1); - pos.ins().with_result(result).x86_sdivmodx(x, xhi, y); - pos.remove_inst(); - return; - } - - // Try to remove checks if the input value is an immediate other than 0 or -1. For these two - // immediates, we'd ideally replace conditional traps by traps, but this requires more - // manipulation of the dfg/cfg, which is out of scope here. - let (could_be_zero, could_be_minus_one) = if let Some(imm) = maybe_iconst_imm(&pos, y) { - (imm == 0, imm == -1) - } else { - (true, true) - }; - - // Put in an explicit division-by-zero trap if the environment requires it. - if avoid_div_traps && could_be_zero { - pos.ins().trapz(y, ir::TrapCode::IntegerDivisionByZero); - } - - if !could_be_minus_one { - let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1); - let reuse = if is_srem { - [None, Some(result)] - } else { - [Some(result), None] - }; - pos.ins().with_results(reuse).x86_sdivmodx(x, xhi, y); - pos.remove_inst(); - return; - } - - // block handling the nominal case. - let nominal = pos.func.dfg.make_block(); - - // block handling the -1 divisor case. - let minus_one = pos.func.dfg.make_block(); - - // Final block with one argument representing the final result value. - let done = pos.func.dfg.make_block(); - - // Move the `inst` result value onto the `done` block. - pos.func.dfg.attach_block_param(done, result); - - // Start by checking for a -1 divisor which needs to be handled specially. - let is_m1 = pos.ins().ifcmp_imm(y, -1); - pos.ins().brif(IntCC::Equal, is_m1, minus_one, &[]); - pos.ins().jump(nominal, &[]); - - // Now it is safe to execute the `x86_sdivmodx` instruction which will still trap on division - // by zero. - pos.insert_block(nominal); - let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1); - let (quot, rem) = pos.ins().x86_sdivmodx(x, xhi, y); - let divres = if is_srem { rem } else { quot }; - pos.ins().jump(done, &[divres]); - - // Now deal with the -1 divisor case. - pos.insert_block(minus_one); - let m1_result = if is_srem { - // x % -1 = 0. - pos.ins().iconst(ty, 0) - } else { - // Explicitly check for overflow: Trap when x == INT_MIN. - debug_assert!(avoid_div_traps, "Native trapping divide handled above"); - let f = pos.ins().ifcmp_imm(x, -1 << (ty.lane_bits() - 1)); - pos.ins() - .trapif(IntCC::Equal, f, ir::TrapCode::IntegerOverflow); - // x / -1 = -x. - pos.ins().irsub_imm(x, 0) - }; - - // Recycle the original instruction as a jump. - pos.func.dfg.replace(inst).jump(done, &[m1_result]); - - // Finally insert a label for the completion. - pos.next_inst(); - pos.insert_block(done); - - cfg.recompute_block(pos.func, old_block); - cfg.recompute_block(pos.func, nominal); - cfg.recompute_block(pos.func, minus_one); - cfg.recompute_block(pos.func, done); -} - -/// Expand the `udiv` and `urem` instructions using `x86_udivmodx`. -fn expand_udivrem( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - isa: &dyn TargetIsa, -) { - let (x, y, is_urem) = match func.dfg[inst] { - ir::InstructionData::Binary { - opcode: ir::Opcode::Udiv, - args, - } => (args[0], args[1], false), - ir::InstructionData::Binary { - opcode: ir::Opcode::Urem, - args, - } => (args[0], args[1], true), - _ => panic!("Need udiv/urem: {}", func.dfg.display_inst(inst, None)), - }; - let avoid_div_traps = isa.flags().avoid_div_traps(); - let result = func.dfg.first_result(inst); - let ty = func.dfg.value_type(result); - - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - pos.func.dfg.clear_results(inst); - - // Put in an explicit division-by-zero trap if the environment requires it. - if avoid_div_traps { - let zero_check = if let Some(imm) = maybe_iconst_imm(&pos, y) { - // Ideally, we'd just replace the conditional trap with a trap when the immediate is - // zero, but this requires more manipulation of the dfg/cfg, which is out of scope - // here. - imm == 0 - } else { - true - }; - if zero_check { - pos.ins().trapz(y, ir::TrapCode::IntegerDivisionByZero); - } - } - - // Now it is safe to execute the `x86_udivmodx` instruction. - let xhi = pos.ins().iconst(ty, 0); - let reuse = if is_urem { - [None, Some(result)] - } else { - [Some(result), None] - }; - pos.ins().with_results(reuse).x86_udivmodx(x, xhi, y); - pos.remove_inst(); -} - -/// Expand the `fmin` and `fmax` instructions using the x86 `x86_fmin` and `x86_fmax` -/// instructions. -fn expand_minmax( - inst: ir::Inst, - func: &mut ir::Function, - cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - let (x, y, x86_opc, bitwise_opc) = match func.dfg[inst] { - ir::InstructionData::Binary { - opcode: ir::Opcode::Fmin, - args, - } => (args[0], args[1], ir::Opcode::X86Fmin, ir::Opcode::Bor), - ir::InstructionData::Binary { - opcode: ir::Opcode::Fmax, - args, - } => (args[0], args[1], ir::Opcode::X86Fmax, ir::Opcode::Band), - _ => panic!("Expected fmin/fmax: {}", func.dfg.display_inst(inst, None)), - }; - let old_block = func.layout.pp_block(inst); - - // We need to handle the following conditions, depending on how x and y compare: - // - // 1. LT or GT: The native `x86_opc` min/max instruction does what we need. - // 2. EQ: We need to use `bitwise_opc` to make sure that - // fmin(0.0, -0.0) -> -0.0 and fmax(0.0, -0.0) -> 0.0. - // 3. UN: We need to produce a quiet NaN that is canonical if the inputs are canonical. - - // block handling case 1) where operands are ordered but not equal. - let one_block = func.dfg.make_block(); - - // block handling case 3) where one operand is NaN. - let uno_block = func.dfg.make_block(); - - // block that handles the unordered or equal cases 2) and 3). - let ueq_block = func.dfg.make_block(); - - // block handling case 2) where operands are ordered and equal. - let eq_block = func.dfg.make_block(); - - // Final block with one argument representing the final result value. - let done = func.dfg.make_block(); - - // The basic blocks are laid out to minimize branching for the common cases: - // - // 1) One branch not taken, one jump. - // 2) One branch taken. - // 3) Two branches taken, one jump. - - // Move the `inst` result value onto the `done` block. - let result = func.dfg.first_result(inst); - let ty = func.dfg.value_type(result); - func.dfg.clear_results(inst); - func.dfg.attach_block_param(done, result); - - // Test for case 1) ordered and not equal. - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - let cmp_ueq = pos.ins().fcmp(FloatCC::UnorderedOrEqual, x, y); - pos.ins().brnz(cmp_ueq, ueq_block, &[]); - pos.ins().jump(one_block, &[]); - - // Handle the common ordered, not equal (LT|GT) case. - pos.insert_block(one_block); - let one_inst = pos.ins().Binary(x86_opc, ty, x, y).0; - let one_result = pos.func.dfg.first_result(one_inst); - pos.ins().jump(done, &[one_result]); - - // Case 3) Unordered. - // We know that at least one operand is a NaN that needs to be propagated. We simply use an - // `fadd` instruction which has the same NaN propagation semantics. - pos.insert_block(uno_block); - let uno_result = pos.ins().fadd(x, y); - pos.ins().jump(done, &[uno_result]); - - // Case 2) or 3). - pos.insert_block(ueq_block); - // Test for case 3) (UN) one value is NaN. - // TODO: When we get support for flag values, we can reuse the above comparison. - let cmp_uno = pos.ins().fcmp(FloatCC::Unordered, x, y); - pos.ins().brnz(cmp_uno, uno_block, &[]); - pos.ins().jump(eq_block, &[]); - - // We are now in case 2) where x and y compare EQ. - // We need a bitwise operation to get the sign right. - pos.insert_block(eq_block); - let bw_inst = pos.ins().Binary(bitwise_opc, ty, x, y).0; - let bw_result = pos.func.dfg.first_result(bw_inst); - // This should become a fall-through for this second most common case. - // Recycle the original instruction as a jump. - pos.func.dfg.replace(inst).jump(done, &[bw_result]); - - // Finally insert a label for the completion. - pos.next_inst(); - pos.insert_block(done); - - cfg.recompute_block(pos.func, old_block); - cfg.recompute_block(pos.func, one_block); - cfg.recompute_block(pos.func, uno_block); - cfg.recompute_block(pos.func, ueq_block); - cfg.recompute_block(pos.func, eq_block); - cfg.recompute_block(pos.func, done); -} - -/// This legalization converts a minimum/maximum operation into a sequence that matches the -/// non-x86-friendly WebAssembly semantics of NaN handling. This logic is kept separate from -/// [expand_minmax] above (the scalar version) for code clarity. -fn expand_minmax_vector( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - let ty = func.dfg.ctrl_typevar(inst); - debug_assert!(ty.is_vector()); - let (x, y, x86_opcode, is_max) = match func.dfg[inst] { - ir::InstructionData::Binary { - opcode: ir::Opcode::Fmin, - args, - } => (args[0], args[1], ir::Opcode::X86Fmin, false), - ir::InstructionData::Binary { - opcode: ir::Opcode::Fmax, - args, - } => (args[0], args[1], ir::Opcode::X86Fmax, true), - _ => panic!("Expected fmin/fmax: {}", func.dfg.display_inst(inst, None)), - }; - - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - // This sequence is complex due to how x86 handles NaNs and +0/-0. If x86 finds a NaN in - // either lane it returns the second operand; likewise, if both operands are in {+0.0, -0.0} - // it returns the second operand. To match the behavior of "return the minimum of the - // operands or a canonical NaN if either operand is NaN," we must compare in both - // directions. - let (forward_inst, dfg) = pos.ins().Binary(x86_opcode, ty, x, y); - let forward = dfg.first_result(forward_inst); - let (backward_inst, dfg) = pos.ins().Binary(x86_opcode, ty, y, x); - let backward = dfg.first_result(backward_inst); - - let (value, mask) = if is_max { - // For maximum: - // Find any differences between the forward and backward `max` operation. - let difference = pos.ins().bxor(forward, backward); - // Merge in the differences. - let propagate_nans_and_plus_zero = pos.ins().bor(backward, difference); - let value = pos.ins().fsub(propagate_nans_and_plus_zero, difference); - // Discover which lanes have NaNs in them. - let find_nan_lanes_mask = pos.ins().fcmp(FloatCC::Unordered, difference, value); - (value, find_nan_lanes_mask) - } else { - // For minimum: - // If either lane is a NaN, we want to use these bits, not the second operand bits. - let propagate_nans = pos.ins().bor(backward, forward); - // Find which lanes contain a NaN with an unordered comparison, filling the mask with - // 1s. - let find_nan_lanes_mask = pos.ins().fcmp(FloatCC::Unordered, forward, propagate_nans); - let bitcast_find_nan_lanes_mask = pos.ins().raw_bitcast(ty, find_nan_lanes_mask); - // Then flood the value lane with all 1s if that lane is a NaN. This causes all NaNs - // along this code path to be quieted and negative: after the upcoming shift and and_not, - // all upper bits (sign, exponent, and payload MSB) will be 1s. - let tmp = pos.ins().bor(propagate_nans, bitcast_find_nan_lanes_mask); - (tmp, bitcast_find_nan_lanes_mask) - }; - - // During this lowering we will need to know how many bits to shift by and what type to - // convert to when using an integer shift. Recall that an IEEE754 number looks like: - // `[sign bit] [exponent bits] [significand bits]` - // A quiet NaN has all exponent bits set to 1 and the most significant bit of the - // significand set to 1; a signaling NaN has the same exponent but the MSB of the - // significand is set to 0. The payload of the NaN is the remaining significand bits, and - // WebAssembly assumes a canonical NaN is quiet and has 0s in its payload. To compute this - // canonical NaN, we create a mask for the top 10 bits on F32X4 (1 sign + 8 exp. + 1 MSB - // sig.) and the top 13 bits on F64X2 (1 sign + 11 exp. + 1 MSB sig.). This means that all - // NaNs produced with the mask will be negative (`-NaN`) which is allowed by the sign - // non-determinism in the spec: https://webassembly.github.io/spec/core/bikeshed/index.html#nan-propagation%E2%91%A0 - let (shift_by, ty_as_int) = match ty { - F32X4 => (10, I32X4), - F64X2 => (13, I64X2), - _ => unimplemented!("this legalization only understands 128-bit floating point types"), - }; - - // In order to clear the NaN payload for canonical NaNs, we shift right the NaN lanes (all - // 1s) leaving 0s in the top bits. Remember that non-NaN lanes are all 0s so this has - // little effect. - let mask_as_int = pos.ins().raw_bitcast(ty_as_int, mask); - let shift_mask = pos.ins().ushr_imm(mask_as_int, shift_by); - let shift_mask_as_float = pos.ins().raw_bitcast(ty, shift_mask); - - // Finally, we replace the value with `value & ~shift_mask`. For non-NaN lanes, this is - // equivalent to `... & 1111...` but for NaN lanes this will only have 1s in the top bits, - // clearing the payload. - pos.func - .dfg - .replace(inst) - .band_not(value, shift_mask_as_float); -} - -/// x86 has no unsigned-to-float conversions. We handle the easy case of zero-extending i32 to -/// i64 with a pattern, the rest needs more code. -/// -/// Note that this is the scalar implementation; for the vector implemenation see -/// [expand_fcvt_from_uint_vector]. -fn expand_fcvt_from_uint( - inst: ir::Inst, - func: &mut ir::Function, - cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - let x; - match func.dfg[inst] { - ir::InstructionData::Unary { - opcode: ir::Opcode::FcvtFromUint, - arg, - } => x = arg, - _ => panic!("Need fcvt_from_uint: {}", func.dfg.display_inst(inst, None)), - } - let xty = func.dfg.value_type(x); - let result = func.dfg.first_result(inst); - let ty = func.dfg.value_type(result); - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - // Conversion from an unsigned int smaller than 64bit is easy on x86-64. - match xty { - ir::types::I8 | ir::types::I16 | ir::types::I32 => { - // TODO: This should be guarded by an ISA check. - let wide = pos.ins().uextend(ir::types::I64, x); - pos.func.dfg.replace(inst).fcvt_from_sint(ty, wide); - return; - } - ir::types::I64 => {} - _ => unimplemented!(), - } - - let old_block = pos.func.layout.pp_block(inst); - - // block handling the case where x >= 0. - let poszero_block = pos.func.dfg.make_block(); - - // block handling the case where x < 0. - let neg_block = pos.func.dfg.make_block(); - - // Final block with one argument representing the final result value. - let done = pos.func.dfg.make_block(); - - // Move the `inst` result value onto the `done` block. - pos.func.dfg.clear_results(inst); - pos.func.dfg.attach_block_param(done, result); - - // If x as a signed int is not negative, we can use the existing `fcvt_from_sint` instruction. - let is_neg = pos.ins().icmp_imm(IntCC::SignedLessThan, x, 0); - pos.ins().brnz(is_neg, neg_block, &[]); - pos.ins().jump(poszero_block, &[]); - - // Easy case: just use a signed conversion. - pos.insert_block(poszero_block); - let posres = pos.ins().fcvt_from_sint(ty, x); - pos.ins().jump(done, &[posres]); - - // Now handle the negative case. - pos.insert_block(neg_block); - - // Divide x by two to get it in range for the signed conversion, keep the LSB, and scale it - // back up on the FP side. - let ihalf = pos.ins().ushr_imm(x, 1); - let lsb = pos.ins().band_imm(x, 1); - let ifinal = pos.ins().bor(ihalf, lsb); - let fhalf = pos.ins().fcvt_from_sint(ty, ifinal); - let negres = pos.ins().fadd(fhalf, fhalf); - - // Recycle the original instruction as a jump. - pos.func.dfg.replace(inst).jump(done, &[negres]); - - // Finally insert a label for the completion. - pos.next_inst(); - pos.insert_block(done); - - cfg.recompute_block(pos.func, old_block); - cfg.recompute_block(pos.func, poszero_block); - cfg.recompute_block(pos.func, neg_block); - cfg.recompute_block(pos.func, done); -} - -/// To convert packed unsigned integers to their float equivalents, we must legalize to a special -/// AVX512 instruction (using MCSR rounding) or use a long sequence of instructions. This logic is -/// separate from [expand_fcvt_from_uint] above (the scalar version), only due to how the transform -/// groups are set up; TODO if we change the SIMD legalization groups, then this logic could be -/// merged into [expand_fcvt_from_uint] (see https://github.com/bytecodealliance/wasmtime/issues/1745). -fn expand_fcvt_from_uint_vector( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - isa: &dyn TargetIsa, -) { - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - if let ir::InstructionData::Unary { - opcode: ir::Opcode::FcvtFromUint, - arg, - } = pos.func.dfg[inst] - { - let controlling_type = pos.func.dfg.ctrl_typevar(inst); - if controlling_type == F32X4 { - debug_assert_eq!(pos.func.dfg.value_type(arg), I32X4); - let x86_isa = isa - .as_any() - .downcast_ref::() - .expect("the target ISA must be x86 at this point"); - if x86_isa.isa_flags.use_avx512vl_simd() || x86_isa.isa_flags.use_avx512f_simd() { - // If we have certain AVX512 features, we can lower this instruction simply. - pos.func.dfg.replace(inst).x86_vcvtudq2ps(arg); - } else { - // Otherwise, we default to a very lengthy SSE4.1-compatible sequence: PXOR, - // PBLENDW, PSUB, CVTDQ2PS, PSRLD, CVTDQ2PS, ADDPS, ADDPS - let bitcast_arg = pos.ins().raw_bitcast(I16X8, arg); - let zero_constant = pos.func.dfg.constants.insert(vec![0; 16].into()); - let zero = pos.ins().vconst(I16X8, zero_constant); - let low = pos.ins().x86_pblendw(zero, bitcast_arg, 0x55); - let bitcast_low = pos.ins().raw_bitcast(I32X4, low); - let high = pos.ins().isub(arg, bitcast_low); - let convert_low = pos.ins().fcvt_from_sint(F32X4, bitcast_low); - let shift_high = pos.ins().ushr_imm(high, 1); - let convert_high = pos.ins().fcvt_from_sint(F32X4, shift_high); - let double_high = pos.ins().fadd(convert_high, convert_high); - pos.func.dfg.replace(inst).fadd(double_high, convert_low); - } - } else { - unimplemented!("cannot legalize {}", pos.func.dfg.display_inst(inst, None)) - } - } -} - -fn expand_fcvt_to_sint( - inst: ir::Inst, - func: &mut ir::Function, - cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - use crate::ir::immediates::{Ieee32, Ieee64}; - - let x = match func.dfg[inst] { - ir::InstructionData::Unary { - opcode: ir::Opcode::FcvtToSint, - arg, - } => arg, - _ => panic!("Need fcvt_to_sint: {}", func.dfg.display_inst(inst, None)), - }; - let old_block = func.layout.pp_block(inst); - let xty = func.dfg.value_type(x); - let result = func.dfg.first_result(inst); - let ty = func.dfg.value_type(result); - - // Final block after the bad value checks. - let done = func.dfg.make_block(); - - // block for checking failure cases. - let maybe_trap_block = func.dfg.make_block(); - - // The `x86_cvtt2si` performs the desired conversion, but it doesn't trap on NaN or overflow. - // It produces an INT_MIN result instead. - func.dfg.replace(inst).x86_cvtt2si(ty, x); - - let mut pos = FuncCursor::new(func).after_inst(inst); - pos.use_srcloc(inst); - - let is_done = pos - .ins() - .icmp_imm(IntCC::NotEqual, result, 1 << (ty.lane_bits() - 1)); - pos.ins().brnz(is_done, done, &[]); - pos.ins().jump(maybe_trap_block, &[]); - - // We now have the following possibilities: - // - // 1. INT_MIN was actually the correct conversion result. - // 2. The input was NaN -> trap bad_toint - // 3. The input was out of range -> trap int_ovf - // - pos.insert_block(maybe_trap_block); - - // Check for NaN. - let is_nan = pos.ins().fcmp(FloatCC::Unordered, x, x); - pos.ins() - .trapnz(is_nan, ir::TrapCode::BadConversionToInteger); - - // Check for case 1: INT_MIN is the correct result. - // Determine the smallest floating point number that would convert to INT_MIN. - let mut overflow_cc = FloatCC::LessThan; - let output_bits = ty.lane_bits(); - let flimit = match xty { - ir::types::F32 => - // An f32 can represent `i16::min_value() - 1` exactly with precision to spare, so - // there are values less than -2^(N-1) that convert correctly to INT_MIN. - { - pos.ins().f32const(if output_bits < 32 { - overflow_cc = FloatCC::LessThanOrEqual; - Ieee32::fcvt_to_sint_negative_overflow(output_bits) - } else { - Ieee32::pow2(output_bits - 1).neg() - }) - } - ir::types::F64 => - // An f64 can represent `i32::min_value() - 1` exactly with precision to spare, so - // there are values less than -2^(N-1) that convert correctly to INT_MIN. - { - pos.ins().f64const(if output_bits < 64 { - overflow_cc = FloatCC::LessThanOrEqual; - Ieee64::fcvt_to_sint_negative_overflow(output_bits) - } else { - Ieee64::pow2(output_bits - 1).neg() - }) - } - _ => panic!("Can't convert {}", xty), - }; - let overflow = pos.ins().fcmp(overflow_cc, x, flimit); - pos.ins().trapnz(overflow, ir::TrapCode::IntegerOverflow); - - // Finally, we could have a positive value that is too large. - let fzero = match xty { - ir::types::F32 => pos.ins().f32const(Ieee32::with_bits(0)), - ir::types::F64 => pos.ins().f64const(Ieee64::with_bits(0)), - _ => panic!("Can't convert {}", xty), - }; - let overflow = pos.ins().fcmp(FloatCC::GreaterThanOrEqual, x, fzero); - pos.ins().trapnz(overflow, ir::TrapCode::IntegerOverflow); - - pos.ins().jump(done, &[]); - pos.insert_block(done); - - cfg.recompute_block(pos.func, old_block); - cfg.recompute_block(pos.func, maybe_trap_block); - cfg.recompute_block(pos.func, done); -} - -fn expand_fcvt_to_sint_sat( - inst: ir::Inst, - func: &mut ir::Function, - cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - use crate::ir::immediates::{Ieee32, Ieee64}; - - let x = match func.dfg[inst] { - ir::InstructionData::Unary { - opcode: ir::Opcode::FcvtToSintSat, - arg, - } => arg, - _ => panic!( - "Need fcvt_to_sint_sat: {}", - func.dfg.display_inst(inst, None) - ), - }; - - let old_block = func.layout.pp_block(inst); - let xty = func.dfg.value_type(x); - let result = func.dfg.first_result(inst); - let ty = func.dfg.value_type(result); - - // Final block after the bad value checks. - let done_block = func.dfg.make_block(); - let intmin_block = func.dfg.make_block(); - let minsat_block = func.dfg.make_block(); - let maxsat_block = func.dfg.make_block(); - func.dfg.clear_results(inst); - func.dfg.attach_block_param(done_block, result); - - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - // The `x86_cvtt2si` performs the desired conversion, but it doesn't trap on NaN or - // overflow. It produces an INT_MIN result instead. - let cvtt2si = pos.ins().x86_cvtt2si(ty, x); - - let is_done = pos - .ins() - .icmp_imm(IntCC::NotEqual, cvtt2si, 1 << (ty.lane_bits() - 1)); - pos.ins().brnz(is_done, done_block, &[cvtt2si]); - pos.ins().jump(intmin_block, &[]); - - // We now have the following possibilities: - // - // 1. INT_MIN was actually the correct conversion result. - // 2. The input was NaN -> replace the result value with 0. - // 3. The input was out of range -> saturate the result to the min/max value. - pos.insert_block(intmin_block); - - // Check for NaN, which is truncated to 0. - let zero = pos.ins().iconst(ty, 0); - let is_nan = pos.ins().fcmp(FloatCC::Unordered, x, x); - pos.ins().brnz(is_nan, done_block, &[zero]); - pos.ins().jump(minsat_block, &[]); - - // Check for case 1: INT_MIN is the correct result. - // Determine the smallest floating point number that would convert to INT_MIN. - pos.insert_block(minsat_block); - let mut overflow_cc = FloatCC::LessThan; - let output_bits = ty.lane_bits(); - let flimit = match xty { - ir::types::F32 => - // An f32 can represent `i16::min_value() - 1` exactly with precision to spare, so - // there are values less than -2^(N-1) that convert correctly to INT_MIN. - { - pos.ins().f32const(if output_bits < 32 { - overflow_cc = FloatCC::LessThanOrEqual; - Ieee32::fcvt_to_sint_negative_overflow(output_bits) - } else { - Ieee32::pow2(output_bits - 1).neg() - }) - } - ir::types::F64 => - // An f64 can represent `i32::min_value() - 1` exactly with precision to spare, so - // there are values less than -2^(N-1) that convert correctly to INT_MIN. - { - pos.ins().f64const(if output_bits < 64 { - overflow_cc = FloatCC::LessThanOrEqual; - Ieee64::fcvt_to_sint_negative_overflow(output_bits) - } else { - Ieee64::pow2(output_bits - 1).neg() - }) - } - _ => panic!("Can't convert {}", xty), - }; - - let overflow = pos.ins().fcmp(overflow_cc, x, flimit); - let min_imm = match ty { - ir::types::I32 => i32::min_value() as i64, - ir::types::I64 => i64::min_value(), - _ => panic!("Don't know the min value for {}", ty), - }; - let min_value = pos.ins().iconst(ty, min_imm); - pos.ins().brnz(overflow, done_block, &[min_value]); - pos.ins().jump(maxsat_block, &[]); - - // Finally, we could have a positive value that is too large. - pos.insert_block(maxsat_block); - let fzero = match xty { - ir::types::F32 => pos.ins().f32const(Ieee32::with_bits(0)), - ir::types::F64 => pos.ins().f64const(Ieee64::with_bits(0)), - _ => panic!("Can't convert {}", xty), - }; - - let max_imm = match ty { - ir::types::I32 => i32::max_value() as i64, - ir::types::I64 => i64::max_value(), - _ => panic!("Don't know the max value for {}", ty), - }; - let max_value = pos.ins().iconst(ty, max_imm); - - let overflow = pos.ins().fcmp(FloatCC::GreaterThanOrEqual, x, fzero); - pos.ins().brnz(overflow, done_block, &[max_value]); - - // Recycle the original instruction. - pos.func.dfg.replace(inst).jump(done_block, &[cvtt2si]); - - // Finally insert a label for the completion. - pos.next_inst(); - pos.insert_block(done_block); - - cfg.recompute_block(pos.func, old_block); - cfg.recompute_block(pos.func, intmin_block); - cfg.recompute_block(pos.func, minsat_block); - cfg.recompute_block(pos.func, maxsat_block); - cfg.recompute_block(pos.func, done_block); -} - -/// This legalization converts a vector of 32-bit floating point lanes to signed integer lanes -/// using CVTTPS2DQ (see encoding of `x86_cvtt2si`). This logic is separate from [expand_fcvt_to_sint_sat] -/// above (the scalar version), only due to how the transform groups are set up; TODO if we change -/// the SIMD legalization groups, then this logic could be merged into [expand_fcvt_to_sint_sat] -/// (see https://github.com/bytecodealliance/wasmtime/issues/1745). -fn expand_fcvt_to_sint_sat_vector( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - if let ir::InstructionData::Unary { - opcode: ir::Opcode::FcvtToSintSat, - arg, - } = pos.func.dfg[inst] - { - let controlling_type = pos.func.dfg.ctrl_typevar(inst); - if controlling_type == I32X4 { - debug_assert_eq!(pos.func.dfg.value_type(arg), F32X4); - // We must both quiet any NaNs--setting that lane to 0--and saturate any - // lanes that might overflow during conversion to the highest/lowest signed integer - // allowed in that lane. - - // Saturate NaNs: `fcmp eq` will not match if a lane contains a NaN. We use ANDPS to - // avoid doing the comparison twice (we need the zeroed lanes to find differences). - let zeroed_nans = pos.ins().fcmp(FloatCC::Equal, arg, arg); - let zeroed_nans_bitcast = pos.ins().raw_bitcast(F32X4, zeroed_nans); - let zeroed_nans_copy = pos.ins().band(arg, zeroed_nans_bitcast); - - // Find differences with the zeroed lanes (we will only use the MSB: 1 if positive or - // NaN, 0 otherwise). - let differences = pos.ins().bxor(zeroed_nans_bitcast, arg); - let differences_bitcast = pos.ins().raw_bitcast(I32X4, differences); - - // Convert the numeric lanes. CVTTPS2DQ will mark overflows with 0x80000000 (MSB set). - let converted = pos.ins().x86_cvtt2si(I32X4, zeroed_nans_copy); - - // Create a mask of all 1s only on positive overflow, 0s otherwise. This uses the MSB - // of `differences` (1 when positive or NaN) and the MSB of `converted` (1 on positive - // overflow). - let tmp = pos.ins().band(differences_bitcast, converted); - let mask = pos.ins().sshr_imm(tmp, 31); - - // Apply the mask to create 0x7FFFFFFF for positive overflow. XOR of all 0s (all other - // cases) has no effect. - pos.func.dfg.replace(inst).bxor(converted, mask); - } else { - unimplemented!("cannot legalize {}", pos.func.dfg.display_inst(inst, None)) - } - } -} - -fn expand_fcvt_to_uint( - inst: ir::Inst, - func: &mut ir::Function, - cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - use crate::ir::immediates::{Ieee32, Ieee64}; - - let x = match func.dfg[inst] { - ir::InstructionData::Unary { - opcode: ir::Opcode::FcvtToUint, - arg, - } => arg, - _ => panic!("Need fcvt_to_uint: {}", func.dfg.display_inst(inst, None)), - }; - - let old_block = func.layout.pp_block(inst); - let xty = func.dfg.value_type(x); - let result = func.dfg.first_result(inst); - let ty = func.dfg.value_type(result); - - // block handle numbers < 2^(N-1). - let below_uint_max_block = func.dfg.make_block(); - - // block handle numbers < 0. - let below_zero_block = func.dfg.make_block(); - - // block handling numbers >= 2^(N-1). - let large = func.dfg.make_block(); - - // Final block after the bad value checks. - let done = func.dfg.make_block(); - - // Move the `inst` result value onto the `done` block. - func.dfg.clear_results(inst); - func.dfg.attach_block_param(done, result); - - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - // Start by materializing the floating point constant 2^(N-1) where N is the number of bits in - // the destination integer type. - let pow2nm1 = match xty { - ir::types::F32 => pos.ins().f32const(Ieee32::pow2(ty.lane_bits() - 1)), - ir::types::F64 => pos.ins().f64const(Ieee64::pow2(ty.lane_bits() - 1)), - _ => panic!("Can't convert {}", xty), - }; - let is_large = pos.ins().ffcmp(x, pow2nm1); - pos.ins() - .brff(FloatCC::GreaterThanOrEqual, is_large, large, &[]); - pos.ins().jump(below_uint_max_block, &[]); - - // We need to generate a specific trap code when `x` is NaN, so reuse the flags from the - // previous comparison. - pos.insert_block(below_uint_max_block); - pos.ins().trapff( - FloatCC::Unordered, - is_large, - ir::TrapCode::BadConversionToInteger, - ); - - // Now we know that x < 2^(N-1) and not NaN. - let sres = pos.ins().x86_cvtt2si(ty, x); - let is_neg = pos.ins().ifcmp_imm(sres, 0); - pos.ins() - .brif(IntCC::SignedGreaterThanOrEqual, is_neg, done, &[sres]); - pos.ins().jump(below_zero_block, &[]); - - pos.insert_block(below_zero_block); - pos.ins().trap(ir::TrapCode::IntegerOverflow); - - // Handle the case where x >= 2^(N-1) and not NaN. - pos.insert_block(large); - let adjx = pos.ins().fsub(x, pow2nm1); - let lres = pos.ins().x86_cvtt2si(ty, adjx); - let is_neg = pos.ins().ifcmp_imm(lres, 0); - pos.ins() - .trapif(IntCC::SignedLessThan, is_neg, ir::TrapCode::IntegerOverflow); - let lfinal = pos.ins().iadd_imm(lres, 1 << (ty.lane_bits() - 1)); - - // Recycle the original instruction as a jump. - pos.func.dfg.replace(inst).jump(done, &[lfinal]); - - // Finally insert a label for the completion. - pos.next_inst(); - pos.insert_block(done); - - cfg.recompute_block(pos.func, old_block); - cfg.recompute_block(pos.func, below_uint_max_block); - cfg.recompute_block(pos.func, below_zero_block); - cfg.recompute_block(pos.func, large); - cfg.recompute_block(pos.func, done); -} - -fn expand_fcvt_to_uint_sat( - inst: ir::Inst, - func: &mut ir::Function, - cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - use crate::ir::immediates::{Ieee32, Ieee64}; - - let x = match func.dfg[inst] { - ir::InstructionData::Unary { - opcode: ir::Opcode::FcvtToUintSat, - arg, - } => arg, - _ => panic!( - "Need fcvt_to_uint_sat: {}", - func.dfg.display_inst(inst, None) - ), - }; - - let old_block = func.layout.pp_block(inst); - let xty = func.dfg.value_type(x); - let result = func.dfg.first_result(inst); - let ty = func.dfg.value_type(result); - - // block handle numbers < 2^(N-1). - let below_pow2nm1_or_nan_block = func.dfg.make_block(); - let below_pow2nm1_block = func.dfg.make_block(); - - // block handling numbers >= 2^(N-1). - let large = func.dfg.make_block(); - - // block handling numbers < 2^N. - let uint_large_block = func.dfg.make_block(); - - // Final block after the bad value checks. - let done = func.dfg.make_block(); - - // Move the `inst` result value onto the `done` block. - func.dfg.clear_results(inst); - func.dfg.attach_block_param(done, result); - - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - // Start by materializing the floating point constant 2^(N-1) where N is the number of bits in - // the destination integer type. - let pow2nm1 = match xty { - ir::types::F32 => pos.ins().f32const(Ieee32::pow2(ty.lane_bits() - 1)), - ir::types::F64 => pos.ins().f64const(Ieee64::pow2(ty.lane_bits() - 1)), - _ => panic!("Can't convert {}", xty), - }; - let zero = pos.ins().iconst(ty, 0); - let is_large = pos.ins().ffcmp(x, pow2nm1); - pos.ins() - .brff(FloatCC::GreaterThanOrEqual, is_large, large, &[]); - pos.ins().jump(below_pow2nm1_or_nan_block, &[]); - - // We need to generate zero when `x` is NaN, so reuse the flags from the previous comparison. - pos.insert_block(below_pow2nm1_or_nan_block); - pos.ins().brff(FloatCC::Unordered, is_large, done, &[zero]); - pos.ins().jump(below_pow2nm1_block, &[]); - - // Now we know that x < 2^(N-1) and not NaN. If the result of the cvtt2si is positive, we're - // done; otherwise saturate to the minimum unsigned value, that is 0. - pos.insert_block(below_pow2nm1_block); - let sres = pos.ins().x86_cvtt2si(ty, x); - let is_neg = pos.ins().ifcmp_imm(sres, 0); - pos.ins() - .brif(IntCC::SignedGreaterThanOrEqual, is_neg, done, &[sres]); - pos.ins().jump(done, &[zero]); - - // Handle the case where x >= 2^(N-1) and not NaN. - pos.insert_block(large); - let adjx = pos.ins().fsub(x, pow2nm1); - let lres = pos.ins().x86_cvtt2si(ty, adjx); - let max_value = pos.ins().iconst( - ty, - match ty { - ir::types::I32 => u32::max_value() as i64, - ir::types::I64 => u64::max_value() as i64, - _ => panic!("Can't convert {}", ty), - }, - ); - let is_neg = pos.ins().ifcmp_imm(lres, 0); - pos.ins() - .brif(IntCC::SignedLessThan, is_neg, done, &[max_value]); - pos.ins().jump(uint_large_block, &[]); - - pos.insert_block(uint_large_block); - let lfinal = pos.ins().iadd_imm(lres, 1 << (ty.lane_bits() - 1)); - - // Recycle the original instruction as a jump. - pos.func.dfg.replace(inst).jump(done, &[lfinal]); - - // Finally insert a label for the completion. - pos.next_inst(); - pos.insert_block(done); - - cfg.recompute_block(pos.func, old_block); - cfg.recompute_block(pos.func, below_pow2nm1_or_nan_block); - cfg.recompute_block(pos.func, below_pow2nm1_block); - cfg.recompute_block(pos.func, large); - cfg.recompute_block(pos.func, uint_large_block); - cfg.recompute_block(pos.func, done); -} - -// Lanes of an I32x4 filled with the max signed integer values converted to an F32x4. -static MAX_SIGNED_I32X4S_AS_F32X4S: [u8; 16] = [ - 0x00, 0x00, 0x00, 0x4f, 0x00, 0x00, 0x00, 0x4f, 0x00, 0x00, 0x00, 0x4f, 0x00, 0x00, 0x00, 0x4f, -]; - -/// This legalization converts a vector of 32-bit floating point lanes to unsigned integer lanes -/// using a long sequence of NaN quieting and truncation. This logic is separate from -/// [expand_fcvt_to_uint_sat] above (the scalar version), only due to how the transform groups are -/// set up; TODO if we change the SIMD legalization groups, then this logic could be merged into -/// [expand_fcvt_to_uint_sat] (see https://github.com/bytecodealliance/wasmtime/issues/1745). -fn expand_fcvt_to_uint_sat_vector( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - if let ir::InstructionData::Unary { - opcode: ir::Opcode::FcvtToUintSat, - arg, - } = pos.func.dfg[inst] - { - let controlling_type = pos.func.dfg.ctrl_typevar(inst); - if controlling_type == I32X4 { - debug_assert_eq!(pos.func.dfg.value_type(arg), F32X4); - // We must both quiet any NaNs--setting that lane to 0--and saturate any - // lanes that might overflow during conversion to the highest/lowest integer - // allowed in that lane. - let zeroes_constant = pos.func.dfg.constants.insert(vec![0x00; 16].into()); - let max_signed_constant = pos - .func - .dfg - .constants - .insert(MAX_SIGNED_I32X4S_AS_F32X4S.as_ref().into()); - let zeroes = pos.ins().vconst(F32X4, zeroes_constant); - let max_signed = pos.ins().vconst(F32X4, max_signed_constant); - // Clamp the input to 0 for negative floating point numbers. TODO we need to - // convert NaNs to 0 but this doesn't do that? - let ge_zero = pos.ins().x86_fmax(arg, zeroes); - // Find lanes that exceed the max signed value that CVTTPS2DQ knows how to convert. - // For floating point numbers above this, CVTTPS2DQ returns the undefined value - // 0x80000000. - let minus_max_signed = pos.ins().fsub(ge_zero, max_signed); - let le_max_signed = - pos.ins() - .fcmp(FloatCC::LessThanOrEqual, max_signed, minus_max_signed); - // Identify lanes that have minus_max_signed > max_signed || minus_max_signed < 0. - // These lanes have the MSB set to 1 after the XOR. We are trying to calculate a - // valid, in-range addend. - let minus_max_signed_as_int = pos.ins().x86_cvtt2si(I32X4, minus_max_signed); - let le_max_signed_as_int = pos.ins().raw_bitcast(I32X4, le_max_signed); - let difference = pos - .ins() - .bxor(minus_max_signed_as_int, le_max_signed_as_int); - // Calculate amount to add above 0x7FFFFFF, zeroing out any lanes identified - // previously (MSB set to 1). - let zeroes_as_int = pos.ins().raw_bitcast(I32X4, zeroes); - let addend = pos.ins().x86_pmaxs(difference, zeroes_as_int); - // Convert the original clamped number to an integer and add back in the addend - // (the part of the value above 0x7FFFFFF, since CVTTPS2DQ overflows with these). - let converted = pos.ins().x86_cvtt2si(I32X4, ge_zero); - pos.func.dfg.replace(inst).iadd(converted, addend); - } else { - unreachable!( - "{} should not be legalized in expand_fcvt_to_uint_sat_vector", - pos.func.dfg.display_inst(inst, None) - ) - } - } -} - -/// Convert shuffle instructions. -fn convert_shuffle( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - if let ir::InstructionData::Shuffle { args, mask, .. } = pos.func.dfg[inst] { - // A mask-building helper: in 128-bit SIMD, 0-15 indicate which lane to read from and a 1 - // in the most significant position zeroes the lane. - let zero_unknown_lane_index = |b: u8| if b > 15 { 0b10000000 } else { b }; - - // We only have to worry about aliasing here because copies will be introduced later (in - // regalloc). - let a = pos.func.dfg.resolve_aliases(args[0]); - let b = pos.func.dfg.resolve_aliases(args[1]); - let mask = pos - .func - .dfg - .immediates - .get(mask) - .expect("The shuffle immediate should have been recorded before this point") - .clone(); - if a == b { - // PSHUFB the first argument (since it is the same as the second). - let constructed_mask = mask - .iter() - // If the mask is greater than 15 it still may be referring to a lane in b. - .map(|&b| if b > 15 { b.wrapping_sub(16) } else { b }) - .map(zero_unknown_lane_index) - .collect(); - let handle = pos.func.dfg.constants.insert(constructed_mask); - // Move the built mask into another XMM register. - let a_type = pos.func.dfg.value_type(a); - let mask_value = pos.ins().vconst(a_type, handle); - // Shuffle the single incoming argument. - pos.func.dfg.replace(inst).x86_pshufb(a, mask_value); - } else { - // PSHUFB the first argument, placing zeroes for unused lanes. - let constructed_mask = mask.iter().cloned().map(zero_unknown_lane_index).collect(); - let handle = pos.func.dfg.constants.insert(constructed_mask); - // Move the built mask into another XMM register. - let a_type = pos.func.dfg.value_type(a); - let mask_value = pos.ins().vconst(a_type, handle); - // Shuffle the first argument. - let shuffled_first_arg = pos.ins().x86_pshufb(a, mask_value); - - // PSHUFB the second argument, placing zeroes for unused lanes. - let constructed_mask = mask - .iter() - .map(|b| b.wrapping_sub(16)) - .map(zero_unknown_lane_index) - .collect(); - let handle = pos.func.dfg.constants.insert(constructed_mask); - // Move the built mask into another XMM register. - let b_type = pos.func.dfg.value_type(b); - let mask_value = pos.ins().vconst(b_type, handle); - // Shuffle the second argument. - let shuffled_second_arg = pos.ins().x86_pshufb(b, mask_value); - - // OR the vectors together to form the final shuffled value. - pos.func - .dfg - .replace(inst) - .bor(shuffled_first_arg, shuffled_second_arg); - - // TODO when AVX512 is enabled we should replace this sequence with a single VPERMB - }; - } -} - -/// Because floats already exist in XMM registers, we can keep them there when executing a CLIF -/// extractlane instruction -fn convert_extractlane( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - if let ir::InstructionData::BinaryImm8 { - opcode: ir::Opcode::Extractlane, - arg, - imm: lane, - } = pos.func.dfg[inst] - { - // NOTE: the following legalization assumes that the upper bits of the XMM register do - // not need to be zeroed during extractlane. - let value_type = pos.func.dfg.value_type(arg); - if value_type.lane_type().is_float() { - // Floats are already in XMM registers and can stay there. - let shuffled = if lane != 0 { - // Replace the extractlane with a PSHUFD to get the float in the right place. - match value_type { - F32X4 => { - // Move the selected lane to the 0 lane. - let shuffle_mask: u8 = 0b00_00_00_00 | lane; - pos.ins().x86_pshufd(arg, shuffle_mask) - } - F64X2 => { - assert_eq!(lane, 1); - // Because we know the lane == 1, we move the upper 64 bits to the lower - // 64 bits, leaving the top 64 bits as-is. - let shuffle_mask = 0b11_10_11_10; - let bitcast = pos.ins().raw_bitcast(F32X4, arg); - pos.ins().x86_pshufd(bitcast, shuffle_mask) - } - _ => unreachable!(), - } - } else { - // Remove the extractlane instruction, leaving the float where it is. - arg - }; - // Then we must bitcast to the right type. - pos.func - .dfg - .replace(inst) - .raw_bitcast(value_type.lane_type(), shuffled); - } else { - // For non-floats, lower with the usual PEXTR* instruction. - pos.func.dfg.replace(inst).x86_pextr(arg, lane); - } - } -} - -/// Because floats exist in XMM registers, we can keep them there when executing a CLIF -/// insertlane instruction -fn convert_insertlane( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - if let ir::InstructionData::TernaryImm8 { - opcode: ir::Opcode::Insertlane, - args: [vector, replacement], - imm: lane, - } = pos.func.dfg[inst] - { - let value_type = pos.func.dfg.value_type(vector); - if value_type.lane_type().is_float() { - // Floats are already in XMM registers and can stay there. - match value_type { - F32X4 => { - assert!(lane <= 3); - let immediate = 0b00_00_00_00 | lane << 4; - // Insert 32-bits from replacement (at index 00, bits 7:8) to vector (lane - // shifted into bits 5:6). - pos.func - .dfg - .replace(inst) - .x86_insertps(vector, replacement, immediate) - } - F64X2 => { - let replacement_as_vector = pos.ins().raw_bitcast(F64X2, replacement); // only necessary due to SSA types - if lane == 0 { - // Move the lowest quadword in replacement to vector without changing - // the upper bits. - pos.func - .dfg - .replace(inst) - .x86_movsd(vector, replacement_as_vector) - } else { - assert_eq!(lane, 1); - // Move the low 64 bits of replacement vector to the high 64 bits of the - // vector. - pos.func - .dfg - .replace(inst) - .x86_movlhps(vector, replacement_as_vector) - } - } - _ => unreachable!(), - }; - } else { - // For non-floats, lower with the usual PINSR* instruction. - pos.func - .dfg - .replace(inst) - .x86_pinsr(vector, replacement, lane); - } - } -} - -/// For SIMD or scalar integer negation, convert `ineg` to `vconst + isub` or `iconst + isub`. -fn convert_ineg( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - _isa: &dyn TargetIsa, -) { - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - if let ir::InstructionData::Unary { - opcode: ir::Opcode::Ineg, - arg, - } = pos.func.dfg[inst] - { - let value_type = pos.func.dfg.value_type(arg); - let zero_value = if value_type.is_vector() && value_type.lane_type().is_int() { - let zero_immediate = pos.func.dfg.constants.insert(vec![0; 16].into()); - pos.ins().vconst(value_type, zero_immediate) // this should be legalized to a PXOR - } else if value_type.is_int() { - pos.ins().iconst(value_type, 0) - } else { - panic!("Can't convert ineg of type {}", value_type) - }; - pos.func.dfg.replace(inst).isub(zero_value, arg); - } else { - unreachable!() - } -} - -fn expand_dword_to_xmm<'f>( - pos: &mut FuncCursor<'_>, - arg: ir::Value, - arg_type: ir::Type, -) -> ir::Value { - if arg_type == I64 { - let (arg_lo, arg_hi) = pos.ins().isplit(arg); - let arg = pos.ins().scalar_to_vector(I32X4, arg_lo); - let arg = pos.ins().insertlane(arg, arg_hi, 1); - let arg = pos.ins().raw_bitcast(I64X2, arg); - arg - } else { - pos.ins().bitcast(I64X2, arg) - } -} - -fn contract_dword_from_xmm<'f>( - pos: &mut FuncCursor<'f>, - inst: ir::Inst, - ret: ir::Value, - ret_type: ir::Type, -) { - if ret_type == I64 { - let ret = pos.ins().raw_bitcast(I32X4, ret); - let ret_lo = pos.ins().extractlane(ret, 0); - let ret_hi = pos.ins().extractlane(ret, 1); - pos.func.dfg.replace(inst).iconcat(ret_lo, ret_hi); - } else { - let ret = pos.ins().extractlane(ret, 0); - pos.func.dfg.replace(inst).ireduce(ret_type, ret); - } -} - -// Masks for i8x16 unsigned right shift. -static USHR_MASKS: [u8; 128] = [ - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, - 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, - 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, - 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, - 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, - 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, - 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, -]; - -// Convert a vector unsigned right shift. x86 has implementations for i16x8 and up (see `x86_pslr`), -// but for i8x16 we translate the shift to a i16x8 shift and mask off the upper bits. This same -// conversion could be provided in the CDSL if we could use varargs there (TODO); i.e. `load_complex` -// has a varargs field that we can't modify with the CDSL in legalize.rs. -fn convert_ushr( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - isa: &dyn TargetIsa, -) { - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - if let ir::InstructionData::Binary { - opcode: ir::Opcode::Ushr, - args: [arg0, arg1], - } = pos.func.dfg[inst] - { - // Note that for Wasm, the bounding of the shift index has happened during translation - let arg0_type = pos.func.dfg.value_type(arg0); - let arg1_type = pos.func.dfg.value_type(arg1); - assert!(!arg1_type.is_vector() && arg1_type.is_int()); - - // TODO it may be more clear to use scalar_to_vector here; the current issue is that - // scalar_to_vector has the restriction that the vector produced has a matching lane size - // (e.g. i32 -> i32x4) whereas bitcast allows moving any-to-any conversions (e.g. i32 -> - // i64x2). This matters because for some reason x86_psrl only allows i64x2 as the shift - // index type--this could be relaxed since it is not really meaningful. - let shift_index = pos.ins().bitcast(I64X2, arg1); - - if arg0_type == I8X16 { - // First, shift the vector using an I16X8 shift. - let bitcasted = pos.ins().raw_bitcast(I16X8, arg0); - let shifted = pos.ins().x86_psrl(bitcasted, shift_index); - let shifted = pos.ins().raw_bitcast(I8X16, shifted); - - // Then, fixup the even lanes that have incorrect upper bits. This uses the 128 mask - // bytes as a table that we index into. It is a substantial code-size increase but - // reduces the instruction count slightly. - let masks = pos.func.dfg.constants.insert(USHR_MASKS.as_ref().into()); - let mask_address = pos.ins().const_addr(isa.pointer_type(), masks); - let mask_offset = pos.ins().ishl_imm(arg1, 4); - let mask = - pos.ins() - .load_complex(arg0_type, MemFlags::new(), &[mask_address, mask_offset], 0); - pos.func.dfg.replace(inst).band(shifted, mask); - } else if arg0_type.is_vector() { - // x86 has encodings for these shifts. - pos.func.dfg.replace(inst).x86_psrl(arg0, shift_index); - } else if arg0_type == I64 { - // 64 bit shifts need to be legalized on x86_32. - let x86_isa = isa - .as_any() - .downcast_ref::() - .expect("the target ISA must be x86 at this point"); - if x86_isa.isa_flags.has_sse41() { - // if we have pinstrq/pextrq (SSE 4.1), legalize to that - let value = expand_dword_to_xmm(&mut pos, arg0, arg0_type); - let amount = expand_dword_to_xmm(&mut pos, arg1, arg1_type); - let shifted = pos.ins().x86_psrl(value, amount); - contract_dword_from_xmm(&mut pos, inst, shifted, arg0_type); - } else { - // otherwise legalize to libcall - expand_as_libcall(inst, func, isa); - } - } else { - // Everything else should be already legal. - unreachable!() - } - } -} - -// Masks for i8x16 left shift. -static SHL_MASKS: [u8; 128] = [ - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, - 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, - 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, - 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, - 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, - 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, - 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, -]; - -// Convert a vector left shift. x86 has implementations for i16x8 and up (see `x86_psll`), -// but for i8x16 we translate the shift to a i16x8 shift and mask off the lower bits. This same -// conversion could be provided in the CDSL if we could use varargs there (TODO); i.e. `load_complex` -// has a varargs field that we can't modify with the CDSL in legalize.rs. -fn convert_ishl( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - isa: &dyn TargetIsa, -) { - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - if let ir::InstructionData::Binary { - opcode: ir::Opcode::Ishl, - args: [arg0, arg1], - } = pos.func.dfg[inst] - { - // Note that for Wasm, the bounding of the shift index has happened during translation - let arg0_type = pos.func.dfg.value_type(arg0); - let arg1_type = pos.func.dfg.value_type(arg1); - assert!(!arg1_type.is_vector() && arg1_type.is_int()); - - // TODO it may be more clear to use scalar_to_vector here; the current issue is that - // scalar_to_vector has the restriction that the vector produced has a matching lane size - // (e.g. i32 -> i32x4) whereas bitcast allows moving any-to-any conversions (e.g. i32 -> - // i64x2). This matters because for some reason x86_psrl only allows i64x2 as the shift - // index type--this could be relaxed since it is not really meaningful. - let shift_index = pos.ins().bitcast(I64X2, arg1); - - if arg0_type == I8X16 { - // First, shift the vector using an I16X8 shift. - let bitcasted = pos.ins().raw_bitcast(I16X8, arg0); - let shifted = pos.ins().x86_psll(bitcasted, shift_index); - let shifted = pos.ins().raw_bitcast(I8X16, shifted); - - // Then, fixup the even lanes that have incorrect lower bits. This uses the 128 mask - // bytes as a table that we index into. It is a substantial code-size increase but - // reduces the instruction count slightly. - let masks = pos.func.dfg.constants.insert(SHL_MASKS.as_ref().into()); - let mask_address = pos.ins().const_addr(isa.pointer_type(), masks); - let mask_offset = pos.ins().ishl_imm(arg1, 4); - let mask = - pos.ins() - .load_complex(arg0_type, MemFlags::new(), &[mask_address, mask_offset], 0); - pos.func.dfg.replace(inst).band(shifted, mask); - } else if arg0_type.is_vector() { - // x86 has encodings for these shifts. - pos.func.dfg.replace(inst).x86_psll(arg0, shift_index); - } else if arg0_type == I64 { - // 64 bit shifts need to be legalized on x86_32. - let x86_isa = isa - .as_any() - .downcast_ref::() - .expect("the target ISA must be x86 at this point"); - if x86_isa.isa_flags.has_sse41() { - // if we have pinstrq/pextrq (SSE 4.1), legalize to that - let value = expand_dword_to_xmm(&mut pos, arg0, arg0_type); - let amount = expand_dword_to_xmm(&mut pos, arg1, arg1_type); - let shifted = pos.ins().x86_psll(value, amount); - contract_dword_from_xmm(&mut pos, inst, shifted, arg0_type); - } else { - // otherwise legalize to libcall - expand_as_libcall(inst, func, isa); - } - } else { - // Everything else should be already legal. - unreachable!() - } - } -} - -/// Convert an imul.i64x2 to a valid code sequence on x86, first with AVX512 and then with SSE2. -fn convert_i64x2_imul( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - isa: &dyn TargetIsa, -) { - let mut pos = FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - if let ir::InstructionData::Binary { - opcode: ir::Opcode::Imul, - args: [arg0, arg1], - } = pos.func.dfg[inst] - { - let ty = pos.func.dfg.ctrl_typevar(inst); - if ty == I64X2 { - let x86_isa = isa - .as_any() - .downcast_ref::() - .expect("the target ISA must be x86 at this point"); - if x86_isa.isa_flags.use_avx512dq_simd() || x86_isa.isa_flags.use_avx512vl_simd() { - // If we have certain AVX512 features, we can lower this instruction simply. - pos.func.dfg.replace(inst).x86_pmullq(arg0, arg1); - } else { - // Otherwise, we default to a very lengthy SSE2-compatible sequence. It splits each - // 64-bit lane into 32-bit high and low sections using shifting and then performs - // the following arithmetic per lane: with arg0 = concat(high0, low0) and arg1 = - // concat(high1, low1), calculate (high0 * low1) + (high1 * low0) + (low0 * low1). - let high0 = pos.ins().ushr_imm(arg0, 32); - let mul0 = pos.ins().x86_pmuludq(high0, arg1); - let high1 = pos.ins().ushr_imm(arg1, 32); - let mul1 = pos.ins().x86_pmuludq(high1, arg0); - let addhigh = pos.ins().iadd(mul0, mul1); - let high = pos.ins().ishl_imm(addhigh, 32); - let low = pos.ins().x86_pmuludq(arg0, arg1); - pos.func.dfg.replace(inst).iadd(low, high); - } - } else { - unreachable!( - "{} should be encodable; it cannot be legalized by convert_i64x2_imul", - pos.func.dfg.display_inst(inst, None) - ); - } - } -} - -fn expand_tls_value( - inst: ir::Inst, - func: &mut ir::Function, - _cfg: &mut ControlFlowGraph, - isa: &dyn TargetIsa, -) { - use crate::settings::TlsModel; - - assert!( - isa.triple().architecture == target_lexicon::Architecture::X86_64, - "Not yet implemented for {:?}", - isa.triple(), - ); - - if let ir::InstructionData::UnaryGlobalValue { - opcode: ir::Opcode::TlsValue, - global_value, - } = func.dfg[inst] - { - let ctrl_typevar = func.dfg.ctrl_typevar(inst); - assert_eq!(ctrl_typevar, ir::types::I64); - - match isa.flags().tls_model() { - TlsModel::None => panic!("tls_model flag is not set."), - TlsModel::ElfGd => { - func.dfg.replace(inst).x86_elf_tls_get_addr(global_value); - } - TlsModel::Macho => { - func.dfg.replace(inst).x86_macho_tls_get_addr(global_value); - } - model => unimplemented!("tls_value for tls model {:?}", model), - } - } else { - unreachable!(); - } -} diff --git a/cranelift/codegen/src/isa/legacy/x86/mod.rs b/cranelift/codegen/src/isa/legacy/x86/mod.rs deleted file mode 100644 index e61fda1931..0000000000 --- a/cranelift/codegen/src/isa/legacy/x86/mod.rs +++ /dev/null @@ -1,199 +0,0 @@ -//! x86 Instruction Set Architectures. - -mod abi; -mod binemit; -mod enc_tables; -mod registers; -pub mod settings; -#[cfg(feature = "unwind")] -pub mod unwind; - -use super::super::settings as shared_settings; -#[cfg(feature = "testing_hooks")] -use crate::binemit::CodeSink; -use crate::binemit::{emit_function, MemoryCodeSink}; -use crate::ir; -use crate::isa::enc_tables::{self as shared_enc_tables, lookup_enclist, Encodings}; -use crate::isa::Builder as IsaBuilder; -#[cfg(feature = "unwind")] -use crate::isa::{unwind::systemv::RegisterMappingError, RegUnit}; -use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa}; -use crate::regalloc; -use crate::result::CodegenResult; -use crate::timing; -use alloc::{borrow::Cow, boxed::Box, vec::Vec}; -use core::any::Any; -use core::fmt; -use core::hash::{Hash, Hasher}; -use target_lexicon::{PointerWidth, Triple}; - -#[allow(dead_code)] -struct Isa { - triple: Triple, - shared_flags: shared_settings::Flags, - isa_flags: settings::Flags, - cpumode: &'static [shared_enc_tables::Level1Entry], -} - -/// Get an ISA builder for creating x86 targets. -pub fn isa_builder(triple: Triple) -> IsaBuilder { - IsaBuilder { - triple, - setup: settings::builder(), - constructor: isa_constructor, - } -} - -fn isa_constructor( - triple: Triple, - shared_flags: shared_settings::Flags, - builder: shared_settings::Builder, -) -> Box { - let level1 = match triple.pointer_width().unwrap() { - PointerWidth::U16 => unimplemented!("x86-16"), - PointerWidth::U32 => &enc_tables::LEVEL1_I32[..], - PointerWidth::U64 => &enc_tables::LEVEL1_I64[..], - }; - - let isa_flags = settings::Flags::new(&shared_flags, builder); - - Box::new(Isa { - triple, - isa_flags, - shared_flags, - cpumode: level1, - }) -} - -impl TargetIsa for Isa { - fn name(&self) -> &'static str { - "x86" - } - - fn triple(&self) -> &Triple { - &self.triple - } - - fn flags(&self) -> &shared_settings::Flags { - &self.shared_flags - } - - fn isa_flags(&self) -> Vec { - self.isa_flags.iter().collect() - } - - fn hash_all_flags(&self, mut hasher: &mut dyn Hasher) { - self.shared_flags.hash(&mut hasher); - self.isa_flags.hash(&mut hasher); - } - - fn uses_cpu_flags(&self) -> bool { - true - } - - fn uses_complex_addresses(&self) -> bool { - true - } - - fn register_info(&self) -> RegInfo { - registers::INFO.clone() - } - - #[cfg(feature = "unwind")] - fn map_dwarf_register(&self, reg: RegUnit) -> Result { - unwind::systemv::map_reg(self, reg).map(|r| r.0) - } - - fn encoding_info(&self) -> EncInfo { - enc_tables::INFO.clone() - } - - fn legal_encodings<'a>( - &'a self, - func: &'a ir::Function, - inst: &'a ir::InstructionData, - ctrl_typevar: ir::Type, - ) -> Encodings<'a> { - lookup_enclist( - ctrl_typevar, - inst, - func, - self.cpumode, - &enc_tables::LEVEL2[..], - &enc_tables::ENCLISTS[..], - &enc_tables::LEGALIZE_ACTIONS[..], - &enc_tables::RECIPE_PREDICATES[..], - &enc_tables::INST_PREDICATES[..], - self.isa_flags.predicate_view(), - ) - } - - fn legalize_signature(&self, sig: &mut Cow, current: bool) { - abi::legalize_signature( - sig, - &self.triple, - current, - &self.shared_flags, - &self.isa_flags, - ) - } - - fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass { - abi::regclass_for_abi_type(ty) - } - - fn allocatable_registers(&self, _func: &ir::Function) -> regalloc::RegisterSet { - abi::allocatable_registers(&self.triple, &self.shared_flags) - } - - #[cfg(feature = "testing_hooks")] - fn emit_inst( - &self, - func: &ir::Function, - inst: ir::Inst, - divert: &mut regalloc::RegDiversions, - sink: &mut dyn CodeSink, - ) { - binemit::emit_inst(func, inst, divert, sink, self) - } - - fn emit_function_to_memory(&self, func: &ir::Function, sink: &mut MemoryCodeSink) { - emit_function(func, binemit::emit_inst, sink, self) - } - - fn prologue_epilogue(&self, func: &mut ir::Function) -> CodegenResult<()> { - let _tt = timing::prologue_epilogue(); - abi::prologue_epilogue(func, self) - } - - fn unsigned_add_overflow_condition(&self) -> ir::condcodes::IntCC { - ir::condcodes::IntCC::UnsignedLessThan - } - - fn unsigned_sub_overflow_condition(&self) -> ir::condcodes::IntCC { - ir::condcodes::IntCC::UnsignedLessThan - } - - #[cfg(feature = "unwind")] - fn create_unwind_info( - &self, - func: &ir::Function, - ) -> CodegenResult> { - abi::create_unwind_info(func, self) - } - - #[cfg(feature = "unwind")] - fn create_systemv_cie(&self) -> Option { - Some(unwind::systemv::create_cie()) - } - - fn as_any(&self) -> &dyn Any { - self as &dyn Any - } -} - -impl fmt::Display for Isa { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}\n{}", self.shared_flags, self.isa_flags) - } -} diff --git a/cranelift/codegen/src/isa/legacy/x86/registers.rs b/cranelift/codegen/src/isa/legacy/x86/registers.rs deleted file mode 100644 index a7518b268b..0000000000 --- a/cranelift/codegen/src/isa/legacy/x86/registers.rs +++ /dev/null @@ -1,86 +0,0 @@ -//! x86 register descriptions. - -use crate::isa::registers::{RegBank, RegClass, RegClassData, RegInfo, RegUnit}; - -include!(concat!(env!("OUT_DIR"), "/registers-x86.rs")); - -#[cfg(test)] -mod tests { - use super::*; - use crate::isa::RegUnit; - use alloc::string::{String, ToString}; - - #[test] - fn unit_encodings() { - fn gpr(unit: usize) -> Option { - Some(GPR.unit(unit)) - } - // The encoding of integer registers is not alphabetical. - assert_eq!(INFO.parse_regunit("rax"), gpr(0)); - assert_eq!(INFO.parse_regunit("rbx"), gpr(3)); - assert_eq!(INFO.parse_regunit("rcx"), gpr(1)); - assert_eq!(INFO.parse_regunit("rdx"), gpr(2)); - assert_eq!(INFO.parse_regunit("rsi"), gpr(6)); - assert_eq!(INFO.parse_regunit("rdi"), gpr(7)); - assert_eq!(INFO.parse_regunit("rbp"), gpr(5)); - assert_eq!(INFO.parse_regunit("rsp"), gpr(4)); - assert_eq!(INFO.parse_regunit("r8"), gpr(8)); - assert_eq!(INFO.parse_regunit("r15"), gpr(15)); - - fn fpr(unit: usize) -> Option { - Some(FPR.unit(unit)) - } - assert_eq!(INFO.parse_regunit("xmm0"), fpr(0)); - assert_eq!(INFO.parse_regunit("xmm15"), fpr(15)); - - // FIXME(#1306) Add these tests back in when FPR32 is re-added. - // fn fpr32(unit: usize) -> Option { - // Some(FPR32.unit(unit)) - // } - // assert_eq!(INFO.parse_regunit("xmm0"), fpr32(0)); - // assert_eq!(INFO.parse_regunit("xmm31"), fpr32(31)); - } - - #[test] - fn unit_names() { - fn gpr(ru: RegUnit) -> String { - INFO.display_regunit(GPR.first + ru).to_string() - } - assert_eq!(gpr(0), "%rax"); - assert_eq!(gpr(3), "%rbx"); - assert_eq!(gpr(1), "%rcx"); - assert_eq!(gpr(2), "%rdx"); - assert_eq!(gpr(6), "%rsi"); - assert_eq!(gpr(7), "%rdi"); - assert_eq!(gpr(5), "%rbp"); - assert_eq!(gpr(4), "%rsp"); - assert_eq!(gpr(8), "%r8"); - assert_eq!(gpr(15), "%r15"); - - fn fpr(ru: RegUnit) -> String { - INFO.display_regunit(FPR.first + ru).to_string() - } - assert_eq!(fpr(0), "%xmm0"); - assert_eq!(fpr(15), "%xmm15"); - - // FIXME(#1306) Add these tests back in when FPR32 is re-added. - // fn fpr32(ru: RegUnit) -> String { - // INFO.display_regunit(FPR32.first + ru).to_string() - // } - // assert_eq!(fpr32(0), "%xmm0"); - // assert_eq!(fpr32(31), "%xmm31"); - } - - #[test] - fn regclasses() { - assert_eq!(GPR.intersect_index(GPR), Some(GPR.into())); - assert_eq!(GPR.intersect_index(ABCD), Some(ABCD.into())); - assert_eq!(GPR.intersect_index(FPR), None); - assert_eq!(ABCD.intersect_index(GPR), Some(ABCD.into())); - assert_eq!(ABCD.intersect_index(ABCD), Some(ABCD.into())); - assert_eq!(ABCD.intersect_index(FPR), None); - assert_eq!(FPR.intersect_index(FPR), Some(FPR.into())); - assert_eq!(FPR.intersect_index(GPR), None); - assert_eq!(FPR.intersect_index(ABCD), None); - } -} diff --git a/cranelift/codegen/src/isa/legacy/x86/settings.rs b/cranelift/codegen/src/isa/legacy/x86/settings.rs deleted file mode 100644 index f13431c1a2..0000000000 --- a/cranelift/codegen/src/isa/legacy/x86/settings.rs +++ /dev/null @@ -1,52 +0,0 @@ -//! x86 Settings. - -use crate::settings::{self, detail, Builder, Value}; -use core::fmt; - -// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs:`. This file contains a -// public `Flags` struct with an impl for all of the settings defined in -// `cranelift-codegen/meta/src/isa/x86/settings.rs`. -include!(concat!(env!("OUT_DIR"), "/settings-x86.rs")); - -#[cfg(test)] -mod tests { - use super::{builder, Flags}; - use crate::settings::{self, Configurable}; - - #[test] - fn presets() { - let shared = settings::Flags::new(settings::builder()); - - // Nehalem has SSE4.1 but not BMI1. - let mut b0 = builder(); - b0.enable("nehalem").unwrap(); - let f0 = Flags::new(&shared, b0); - assert_eq!(f0.has_sse41(), true); - assert_eq!(f0.has_bmi1(), false); - - let mut b1 = builder(); - b1.enable("haswell").unwrap(); - let f1 = Flags::new(&shared, b1); - assert_eq!(f1.has_sse41(), true); - assert_eq!(f1.has_bmi1(), true); - } - #[test] - fn display_presets() { - // Spot check that the flags Display impl does not cause a panic - let shared = settings::Flags::new(settings::builder()); - - let b0 = builder(); - let f0 = Flags::new(&shared, b0); - let _ = format!("{}", f0); - - let mut b1 = builder(); - b1.enable("nehalem").unwrap(); - let f1 = Flags::new(&shared, b1); - let _ = format!("{}", f1); - - let mut b2 = builder(); - b2.enable("haswell").unwrap(); - let f2 = Flags::new(&shared, b2); - let _ = format!("{}", f2); - } -} diff --git a/cranelift/codegen/src/isa/legacy/x86/unwind.rs b/cranelift/codegen/src/isa/legacy/x86/unwind.rs deleted file mode 100644 index 2eed8b74e4..0000000000 --- a/cranelift/codegen/src/isa/legacy/x86/unwind.rs +++ /dev/null @@ -1,531 +0,0 @@ -//! Module for x86 unwind generation for supported ABIs. - -pub mod systemv; -pub mod winx64; - -use crate::ir::{Function, InstructionData, Opcode, ValueLoc}; -use crate::isa::x86::registers::{FPR, RU}; -use crate::isa::{RegUnit, TargetIsa}; -use crate::result::CodegenResult; -use alloc::vec::Vec; -use std::collections::HashMap; - -use crate::isa::unwind::input::{UnwindCode, UnwindInfo}; - -pub(crate) fn create_unwind_info( - func: &Function, - isa: &dyn TargetIsa, -) -> CodegenResult>> { - // Find last block based on max offset. - let last_block = func - .layout - .blocks() - .max_by_key(|b| func.offsets[*b]) - .expect("at least a block"); - // Find last instruction offset + size, and make it function size. - let function_size = func - .inst_offsets(last_block, &isa.encoding_info()) - .fold(0, |_, (offset, _, size)| offset + size); - - let entry_block = func.layout.entry_block().expect("missing entry block"); - let prologue_end = func.prologue_end.unwrap(); - let epilogues_start = func - .epilogues_start - .iter() - .map(|(i, b)| (*b, *i)) - .collect::>(); - - let word_size = isa.pointer_bytes(); - - let mut stack_size = None; - let mut prologue_size = 0; - let mut prologue_unwind_codes = Vec::new(); - let mut epilogues_unwind_codes = Vec::new(); - let mut frame_register: Option = None; - - // Process only entry block and blocks with epilogues. - let mut blocks = func - .epilogues_start - .iter() - .map(|(_, b)| *b) - .collect::>(); - if !blocks.contains(&entry_block) { - blocks.push(entry_block); - } - blocks.sort_by_key(|b| func.offsets[*b]); - - for block in blocks.iter() { - let mut in_prologue = block == &entry_block; - let mut in_epilogue = false; - let mut epilogue_pop_offsets = Vec::new(); - - let epilogue_start = epilogues_start.get(block); - let is_last_block = block == &last_block; - - for (offset, inst, size) in func.inst_offsets(*block, &isa.encoding_info()) { - let offset = offset + size; - - let unwind_codes; - if in_prologue { - // Check for prologue end (inclusive) - if prologue_end == inst { - in_prologue = false; - } - prologue_size += size; - unwind_codes = &mut prologue_unwind_codes; - } else if !in_epilogue && epilogue_start == Some(&inst) { - // Now in an epilogue, emit a remember state instruction if not last block - in_epilogue = true; - - epilogues_unwind_codes.push(Vec::new()); - unwind_codes = epilogues_unwind_codes.last_mut().unwrap(); - - if !is_last_block { - unwind_codes.push((offset, UnwindCode::RememberState)); - } - } else if in_epilogue { - unwind_codes = epilogues_unwind_codes.last_mut().unwrap(); - } else { - // Ignore normal instructions - continue; - } - - match func.dfg[inst] { - InstructionData::Unary { opcode, arg } => { - match opcode { - Opcode::X86Push => { - let reg = func.locations[arg].unwrap_reg(); - unwind_codes.push(( - offset, - UnwindCode::StackAlloc { - size: word_size.into(), - }, - )); - unwind_codes.push(( - offset, - UnwindCode::SaveRegister { - reg, - stack_offset: 0, - }, - )); - } - Opcode::AdjustSpDown => { - let stack_size = - stack_size.expect("expected a previous stack size instruction"); - - // This is used when calling a stack check function - // We need to track the assignment to RAX which has the size of the stack - unwind_codes - .push((offset, UnwindCode::StackAlloc { size: stack_size })); - } - _ => {} - } - } - InstructionData::UnaryImm { opcode, imm } => { - match opcode { - Opcode::Iconst => { - let imm: i64 = imm.into(); - assert!(imm <= core::u32::MAX as i64); - assert!(stack_size.is_none()); - - // This instruction should only appear in a prologue to pass an - // argument of the stack size to a stack check function. - // Record the stack size so we know what it is when we encounter the adjustment - // instruction (which will adjust via the register assigned to this instruction). - stack_size = Some(imm as u32); - } - Opcode::AdjustSpDownImm => { - let imm: i64 = imm.into(); - assert!(imm <= core::u32::MAX as i64); - - stack_size = Some(imm as u32); - - unwind_codes - .push((offset, UnwindCode::StackAlloc { size: imm as u32 })); - } - Opcode::AdjustSpUpImm => { - let imm: i64 = imm.into(); - assert!(imm <= core::u32::MAX as i64); - - stack_size = Some(imm as u32); - - unwind_codes - .push((offset, UnwindCode::StackDealloc { size: imm as u32 })); - } - _ => {} - } - } - InstructionData::Store { - opcode: Opcode::Store, - args: [arg1, arg2], - offset: stack_offset, - .. - } => { - if let (ValueLoc::Reg(src), ValueLoc::Reg(dst)) = - (func.locations[arg1], func.locations[arg2]) - { - // If this is a save of an FPR, record an unwind operation - // Note: the stack_offset here is relative to an adjusted SP - if dst == (RU::rsp as RegUnit) && FPR.contains(src) { - let stack_offset: i32 = stack_offset.into(); - unwind_codes.push(( - offset, - UnwindCode::SaveRegister { - reg: src, - stack_offset: stack_offset as u32, - }, - )); - } - } - } - InstructionData::CopySpecial { src, dst, .. } if frame_register.is_none() => { - // Check for change in CFA register (RSP is always the starting CFA) - if src == (RU::rsp as RegUnit) { - unwind_codes.push((offset, UnwindCode::SetFramePointer { reg: dst })); - frame_register = Some(dst); - } - } - InstructionData::NullAry { opcode } => match opcode { - Opcode::X86Pop => { - epilogue_pop_offsets.push(offset); - } - _ => {} - }, - InstructionData::MultiAry { opcode, .. } if in_epilogue => match opcode { - Opcode::Return => { - let args = func.dfg.inst_args(inst); - for (i, arg) in args.iter().rev().enumerate() { - // Only walk back the args for the pop instructions encountered - if i >= epilogue_pop_offsets.len() { - break; - } - - let offset = epilogue_pop_offsets[i]; - - let reg = func.locations[*arg].unwrap_reg(); - unwind_codes.push((offset, UnwindCode::RestoreRegister { reg })); - unwind_codes.push(( - offset, - UnwindCode::StackDealloc { - size: word_size.into(), - }, - )); - - if Some(reg) == frame_register { - unwind_codes.push((offset, UnwindCode::RestoreFramePointer)); - // Keep frame_register assigned for next epilogue. - } - } - epilogue_pop_offsets.clear(); - - // TODO ensure unwind codes sorted by offsets ? - - if !is_last_block { - unwind_codes.push((offset, UnwindCode::RestoreState)); - } - - in_epilogue = false; - } - _ => {} - }, - _ => {} - }; - } - } - - Ok(Some(UnwindInfo { - prologue_size, - prologue_unwind_codes, - epilogues_unwind_codes, - function_size, - word_size, - initial_sp_offset: word_size, - })) -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::cursor::{Cursor, FuncCursor}; - use crate::ir::{ - types, AbiParam, ExternalName, InstBuilder, Signature, StackSlotData, StackSlotKind, - }; - use crate::isa::{lookup_variant, BackendVariant, CallConv}; - use crate::settings::{builder, Flags}; - use crate::Context; - use std::str::FromStr; - use target_lexicon::triple; - - #[test] - fn test_small_alloc() { - let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy) - .expect("expect x86 ISA") - .finish(Flags::new(builder())); - - let mut context = Context::for_function(create_function( - CallConv::WindowsFastcall, - Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)), - )); - - context.compile(&*isa).expect("expected compilation"); - - let unwind = create_unwind_info(&context.func, &*isa) - .expect("can create unwind info") - .expect("expected unwind info"); - - assert_eq!( - unwind, - UnwindInfo { - prologue_size: 9, - prologue_unwind_codes: vec![ - (2, UnwindCode::StackAlloc { size: 8 }), - ( - 2, - UnwindCode::SaveRegister { - reg: RU::rbp.into(), - stack_offset: 0, - } - ), - ( - 5, - UnwindCode::SetFramePointer { - reg: RU::rbp.into(), - } - ), - (9, UnwindCode::StackAlloc { size: 64 }) - ], - epilogues_unwind_codes: vec![vec![ - (13, UnwindCode::StackDealloc { size: 64 }), - ( - 15, - UnwindCode::RestoreRegister { - reg: RU::rbp.into() - } - ), - (15, UnwindCode::StackDealloc { size: 8 }), - (15, UnwindCode::RestoreFramePointer) - ]], - function_size: 16, - word_size: 8, - initial_sp_offset: 8, - } - ); - } - - #[test] - fn test_medium_alloc() { - let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy) - .expect("expect x86 ISA") - .finish(Flags::new(builder())); - - let mut context = Context::for_function(create_function( - CallConv::WindowsFastcall, - Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 10000)), - )); - - context.compile(&*isa).expect("expected compilation"); - - let unwind = create_unwind_info(&context.func, &*isa) - .expect("can create unwind info") - .expect("expected unwind info"); - - assert_eq!( - unwind, - UnwindInfo { - prologue_size: 27, - prologue_unwind_codes: vec![ - (2, UnwindCode::StackAlloc { size: 8 }), - ( - 2, - UnwindCode::SaveRegister { - reg: RU::rbp.into(), - stack_offset: 0, - } - ), - ( - 5, - UnwindCode::SetFramePointer { - reg: RU::rbp.into(), - } - ), - (27, UnwindCode::StackAlloc { size: 10000 }) - ], - epilogues_unwind_codes: vec![vec![ - (34, UnwindCode::StackDealloc { size: 10000 }), - ( - 36, - UnwindCode::RestoreRegister { - reg: RU::rbp.into() - } - ), - (36, UnwindCode::StackDealloc { size: 8 }), - (36, UnwindCode::RestoreFramePointer) - ]], - function_size: 37, - word_size: 8, - initial_sp_offset: 8, - } - ); - } - - #[test] - fn test_large_alloc() { - let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy) - .expect("expect x86 ISA") - .finish(Flags::new(builder())); - - let mut context = Context::for_function(create_function( - CallConv::WindowsFastcall, - Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 1000000)), - )); - - context.compile(&*isa).expect("expected compilation"); - - let unwind = create_unwind_info(&context.func, &*isa) - .expect("can create unwind info") - .expect("expected unwind info"); - - assert_eq!( - unwind, - UnwindInfo { - prologue_size: 27, - prologue_unwind_codes: vec![ - (2, UnwindCode::StackAlloc { size: 8 }), - ( - 2, - UnwindCode::SaveRegister { - reg: RU::rbp.into(), - stack_offset: 0, - } - ), - ( - 5, - UnwindCode::SetFramePointer { - reg: RU::rbp.into(), - } - ), - (27, UnwindCode::StackAlloc { size: 1000000 }) - ], - epilogues_unwind_codes: vec![vec![ - (34, UnwindCode::StackDealloc { size: 1000000 }), - ( - 36, - UnwindCode::RestoreRegister { - reg: RU::rbp.into() - } - ), - (36, UnwindCode::StackDealloc { size: 8 }), - (36, UnwindCode::RestoreFramePointer) - ]], - function_size: 37, - word_size: 8, - initial_sp_offset: 8, - } - ); - } - - fn create_function(call_conv: CallConv, stack_slot: Option) -> Function { - let mut func = - Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv)); - - let block0 = func.dfg.make_block(); - let mut pos = FuncCursor::new(&mut func); - pos.insert_block(block0); - pos.ins().return_(&[]); - - if let Some(stack_slot) = stack_slot { - func.stack_slots.push(stack_slot); - } - - func - } - - #[test] - fn test_multi_return_func() { - let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy) - .expect("expect x86 ISA") - .finish(Flags::new(builder())); - - let mut context = Context::for_function(create_multi_return_function(CallConv::SystemV)); - - context.compile(&*isa).expect("expected compilation"); - - let unwind = create_unwind_info(&context.func, &*isa) - .expect("can create unwind info") - .expect("expected unwind info"); - - assert_eq!( - unwind, - UnwindInfo { - prologue_size: 5, - prologue_unwind_codes: vec![ - (2, UnwindCode::StackAlloc { size: 8 }), - ( - 2, - UnwindCode::SaveRegister { - reg: RU::rbp.into(), - stack_offset: 0, - } - ), - ( - 5, - UnwindCode::SetFramePointer { - reg: RU::rbp.into() - } - ) - ], - epilogues_unwind_codes: vec![ - vec![ - (12, UnwindCode::RememberState), - ( - 12, - UnwindCode::RestoreRegister { - reg: RU::rbp.into() - } - ), - (12, UnwindCode::StackDealloc { size: 8 }), - (12, UnwindCode::RestoreFramePointer), - (13, UnwindCode::RestoreState) - ], - vec![ - ( - 15, - UnwindCode::RestoreRegister { - reg: RU::rbp.into() - } - ), - (15, UnwindCode::StackDealloc { size: 8 }), - (15, UnwindCode::RestoreFramePointer) - ] - ], - function_size: 16, - word_size: 8, - initial_sp_offset: 8, - } - ); - } - - fn create_multi_return_function(call_conv: CallConv) -> Function { - let mut sig = Signature::new(call_conv); - sig.params.push(AbiParam::new(types::I32)); - let mut func = Function::with_name_signature(ExternalName::user(0, 0), sig); - - let block0 = func.dfg.make_block(); - let v0 = func.dfg.append_block_param(block0, types::I32); - let block1 = func.dfg.make_block(); - let block2 = func.dfg.make_block(); - - let mut pos = FuncCursor::new(&mut func); - pos.insert_block(block0); - pos.ins().brnz(v0, block2, &[]); - pos.ins().jump(block1, &[]); - - pos.insert_block(block1); - pos.ins().return_(&[]); - - pos.insert_block(block2); - pos.ins().return_(&[]); - - func - } -} diff --git a/cranelift/codegen/src/isa/legacy/x86/unwind/systemv.rs b/cranelift/codegen/src/isa/legacy/x86/unwind/systemv.rs deleted file mode 100644 index 31fc64c9fb..0000000000 --- a/cranelift/codegen/src/isa/legacy/x86/unwind/systemv.rs +++ /dev/null @@ -1,235 +0,0 @@ -//! Unwind information for System V ABI (x86-64). - -use crate::ir::Function; -use crate::isa::{ - unwind::systemv::{RegisterMappingError, UnwindInfo}, - RegUnit, TargetIsa, -}; -use crate::result::CodegenResult; -use gimli::{write::CommonInformationEntry, Encoding, Format, Register, X86_64}; - -/// Creates a new x86-64 common information entry (CIE). -pub fn create_cie() -> CommonInformationEntry { - use gimli::write::CallFrameInstruction; - - let mut entry = CommonInformationEntry::new( - Encoding { - address_size: 8, - format: Format::Dwarf32, - version: 1, - }, - 1, // Code alignment factor - -8, // Data alignment factor - X86_64::RA, - ); - - // Every frame will start with the call frame address (CFA) at RSP+8 - // It is +8 to account for the push of the return address by the call instruction - entry.add_instruction(CallFrameInstruction::Cfa(X86_64::RSP, 8)); - - // Every frame will start with the return address at RSP (CFA-8 = RSP+8-8 = RSP) - entry.add_instruction(CallFrameInstruction::Offset(X86_64::RA, -8)); - - entry -} - -/// Map Cranelift registers to their corresponding Gimli registers. -pub fn map_reg(isa: &dyn TargetIsa, reg: RegUnit) -> Result { - if isa.name() != "x86" || isa.pointer_bits() != 64 { - return Err(RegisterMappingError::UnsupportedArchitecture); - } - - // Mapping from https://github.com/bytecodealliance/cranelift/pull/902 by @iximeow - const X86_GP_REG_MAP: [gimli::Register; 16] = [ - X86_64::RAX, - X86_64::RCX, - X86_64::RDX, - X86_64::RBX, - X86_64::RSP, - X86_64::RBP, - X86_64::RSI, - X86_64::RDI, - X86_64::R8, - X86_64::R9, - X86_64::R10, - X86_64::R11, - X86_64::R12, - X86_64::R13, - X86_64::R14, - X86_64::R15, - ]; - const X86_XMM_REG_MAP: [gimli::Register; 16] = [ - X86_64::XMM0, - X86_64::XMM1, - X86_64::XMM2, - X86_64::XMM3, - X86_64::XMM4, - X86_64::XMM5, - X86_64::XMM6, - X86_64::XMM7, - X86_64::XMM8, - X86_64::XMM9, - X86_64::XMM10, - X86_64::XMM11, - X86_64::XMM12, - X86_64::XMM13, - X86_64::XMM14, - X86_64::XMM15, - ]; - - let reg_info = isa.register_info(); - let bank = reg_info - .bank_containing_regunit(reg) - .ok_or_else(|| RegisterMappingError::MissingBank)?; - match bank.name { - "IntRegs" => { - // x86 GP registers have a weird mapping to DWARF registers, so we use a - // lookup table. - Ok(X86_GP_REG_MAP[(reg - bank.first_unit) as usize]) - } - "FloatRegs" => Ok(X86_XMM_REG_MAP[(reg - bank.first_unit) as usize]), - _ => Err(RegisterMappingError::UnsupportedRegisterBank(bank.name)), - } -} - -pub(crate) fn create_unwind_info( - func: &Function, - isa: &dyn TargetIsa, -) -> CodegenResult> { - // Only System V-like calling conventions are supported - match isa.unwind_info_kind() { - crate::machinst::UnwindInfoKind::SystemV => {} - _ => return Ok(None), - } - - if func.prologue_end.is_none() || isa.name() != "x86" || isa.pointer_bits() != 64 { - return Ok(None); - } - - let unwind = match super::create_unwind_info(func, isa)? { - Some(u) => u, - None => { - return Ok(None); - } - }; - - struct RegisterMapper<'a, 'b>(&'a (dyn TargetIsa + 'b)); - impl<'a, 'b> crate::isa::unwind::systemv::RegisterMapper for RegisterMapper<'a, 'b> { - fn map(&self, reg: RegUnit) -> Result { - Ok(map_reg(self.0, reg)?.0) - } - fn sp(&self) -> u16 { - X86_64::RSP.0 - } - fn fp(&self) -> Option { - Some(X86_64::RBP.0) - } - } - let map = RegisterMapper(isa); - - Ok(Some(UnwindInfo::build(unwind, &map)?)) -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::cursor::{Cursor, FuncCursor}; - use crate::ir::{ - types, AbiParam, ExternalName, InstBuilder, Signature, StackSlotData, StackSlotKind, - }; - use crate::isa::{lookup_variant, BackendVariant, CallConv}; - use crate::settings::{builder, Flags}; - use crate::Context; - use gimli::write::Address; - use std::str::FromStr; - use target_lexicon::triple; - - #[test] - fn test_simple_func() { - let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy) - .expect("expect x86 ISA") - .finish(Flags::new(builder())); - - let mut context = Context::for_function(create_function( - CallConv::SystemV, - Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)), - )); - - context.compile(&*isa).expect("expected compilation"); - - let fde = match isa - .create_unwind_info(&context.func) - .expect("can create unwind info") - { - Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => { - info.to_fde(Address::Constant(1234)) - } - _ => panic!("expected unwind information"), - }; - - assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(1234), length: 16, lsda: None, instructions: [(2, CfaOffset(16)), (2, Offset(Register(6), -16)), (5, CfaRegister(Register(6))), (15, SameValue(Register(6))), (15, Cfa(Register(7), 8))] }"); - } - - fn create_function(call_conv: CallConv, stack_slot: Option) -> Function { - let mut func = - Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv)); - - let block0 = func.dfg.make_block(); - let mut pos = FuncCursor::new(&mut func); - pos.insert_block(block0); - pos.ins().return_(&[]); - - if let Some(stack_slot) = stack_slot { - func.stack_slots.push(stack_slot); - } - - func - } - - #[test] - fn test_multi_return_func() { - let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy) - .expect("expect x86 ISA") - .finish(Flags::new(builder())); - - let mut context = Context::for_function(create_multi_return_function(CallConv::SystemV)); - - context.compile(&*isa).expect("expected compilation"); - - let fde = match isa - .create_unwind_info(&context.func) - .expect("can create unwind info") - { - Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => { - info.to_fde(Address::Constant(4321)) - } - _ => panic!("expected unwind information"), - }; - - assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(4321), length: 16, lsda: None, instructions: [(2, CfaOffset(16)), (2, Offset(Register(6), -16)), (5, CfaRegister(Register(6))), (12, RememberState), (12, SameValue(Register(6))), (12, Cfa(Register(7), 8)), (13, RestoreState), (15, SameValue(Register(6))), (15, Cfa(Register(7), 8))] }"); - } - - fn create_multi_return_function(call_conv: CallConv) -> Function { - let mut sig = Signature::new(call_conv); - sig.params.push(AbiParam::new(types::I32)); - let mut func = Function::with_name_signature(ExternalName::user(0, 0), sig); - - let block0 = func.dfg.make_block(); - let v0 = func.dfg.append_block_param(block0, types::I32); - let block1 = func.dfg.make_block(); - let block2 = func.dfg.make_block(); - - let mut pos = FuncCursor::new(&mut func); - pos.insert_block(block0); - pos.ins().brnz(v0, block2, &[]); - pos.ins().jump(block1, &[]); - - pos.insert_block(block1); - pos.ins().return_(&[]); - - pos.insert_block(block2); - pos.ins().return_(&[]); - - func - } -} diff --git a/cranelift/codegen/src/isa/legacy/x86/unwind/winx64.rs b/cranelift/codegen/src/isa/legacy/x86/unwind/winx64.rs deleted file mode 100644 index 33e5463bb8..0000000000 --- a/cranelift/codegen/src/isa/legacy/x86/unwind/winx64.rs +++ /dev/null @@ -1,265 +0,0 @@ -//! Unwind information for Windows x64 ABI. - -use crate::ir::Function; -use crate::isa::x86::registers::{FPR, GPR}; -use crate::isa::{unwind::winx64::UnwindInfo, RegUnit, TargetIsa}; -use crate::result::CodegenResult; - -pub(crate) fn create_unwind_info( - func: &Function, - isa: &dyn TargetIsa, -) -> CodegenResult> { - // Only Windows fastcall is supported for unwind information - if !func.signature.call_conv.extends_windows_fastcall() || func.prologue_end.is_none() { - return Ok(None); - } - - let unwind = match super::create_unwind_info(func, isa)? { - Some(u) => u, - None => { - return Ok(None); - } - }; - - Ok(Some(UnwindInfo::build::(unwind)?)) -} - -struct RegisterMapper; - -impl crate::isa::unwind::winx64::RegisterMapper for RegisterMapper { - fn map(reg: RegUnit) -> crate::isa::unwind::winx64::MappedRegister { - use crate::isa::unwind::winx64::MappedRegister; - if GPR.contains(reg) { - MappedRegister::Int(GPR.index_of(reg) as u8) - } else if FPR.contains(reg) { - MappedRegister::Xmm(reg as u8) - } else { - panic!() - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::cursor::{Cursor, FuncCursor}; - use crate::ir::{ExternalName, InstBuilder, Signature, StackSlotData, StackSlotKind}; - use crate::isa::unwind::winx64::UnwindCode; - use crate::isa::x86::registers::RU; - use crate::isa::{lookup_variant, BackendVariant, CallConv}; - use crate::settings::{builder, Flags}; - use crate::Context; - use std::str::FromStr; - use target_lexicon::triple; - - #[test] - fn test_wrong_calling_convention() { - let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy) - .expect("expect x86 ISA") - .finish(Flags::new(builder())); - - let mut context = Context::for_function(create_function(CallConv::SystemV, None)); - - context.compile(&*isa).expect("expected compilation"); - - assert_eq!( - create_unwind_info(&context.func, &*isa).expect("can create unwind info"), - None - ); - } - - #[test] - fn test_small_alloc() { - let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy) - .expect("expect x86 ISA") - .finish(Flags::new(builder())); - - let mut context = Context::for_function(create_function( - CallConv::WindowsFastcall, - Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)), - )); - - context.compile(&*isa).expect("expected compilation"); - - let unwind = create_unwind_info(&context.func, &*isa) - .expect("can create unwind info") - .expect("expected unwind info"); - - assert_eq!( - unwind, - UnwindInfo { - flags: 0, - prologue_size: 9, - frame_register: None, - frame_register_offset: 0, - unwind_codes: vec![ - UnwindCode::PushRegister { - instruction_offset: 2, - reg: GPR.index_of(RU::rbp.into()) as u8 - }, - UnwindCode::StackAlloc { - instruction_offset: 9, - size: 64 - } - ] - } - ); - - assert_eq!(unwind.emit_size(), 8); - - let mut buf = [0u8; 8]; - unwind.emit(&mut buf); - - assert_eq!( - buf, - [ - 0x01, // Version and flags (version 1, no flags) - 0x09, // Prologue size - 0x02, // Unwind code count (1 for stack alloc, 1 for push reg) - 0x00, // Frame register + offset (no frame register) - 0x09, // Prolog offset - 0x72, // Operation 2 (small stack alloc), size = 0xB slots (e.g. (0x7 * 8) + 8 = 64 bytes) - 0x02, // Prolog offset - 0x50, // Operation 0 (save nonvolatile register), reg = 5 (RBP) - ] - ); - } - - #[test] - fn test_medium_alloc() { - let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy) - .expect("expect x86 ISA") - .finish(Flags::new(builder())); - - let mut context = Context::for_function(create_function( - CallConv::WindowsFastcall, - Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 10000)), - )); - - context.compile(&*isa).expect("expected compilation"); - - let unwind = create_unwind_info(&context.func, &*isa) - .expect("can create unwind info") - .expect("expected unwind info"); - - assert_eq!( - unwind, - UnwindInfo { - flags: 0, - prologue_size: 27, - frame_register: None, - frame_register_offset: 0, - unwind_codes: vec![ - UnwindCode::PushRegister { - instruction_offset: 2, - reg: GPR.index_of(RU::rbp.into()) as u8 - }, - UnwindCode::StackAlloc { - instruction_offset: 27, - size: 10000 - } - ] - } - ); - - assert_eq!(unwind.emit_size(), 12); - - let mut buf = [0u8; 12]; - unwind.emit(&mut buf); - - assert_eq!( - buf, - [ - 0x01, // Version and flags (version 1, no flags) - 0x1B, // Prologue size - 0x03, // Unwind code count (2 for stack alloc, 1 for push reg) - 0x00, // Frame register + offset (no frame register) - 0x1B, // Prolog offset - 0x01, // Operation 1 (large stack alloc), size is scaled 16-bits (info = 0) - 0xE2, // Low size byte - 0x04, // High size byte (e.g. 0x04E2 * 8 = 10000 bytes) - 0x02, // Prolog offset - 0x50, // Operation 0 (push nonvolatile register), reg = 5 (RBP) - 0x00, // Padding - 0x00, // Padding - ] - ); - } - - #[test] - fn test_large_alloc() { - let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy) - .expect("expect x86 ISA") - .finish(Flags::new(builder())); - - let mut context = Context::for_function(create_function( - CallConv::WindowsFastcall, - Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 1000000)), - )); - - context.compile(&*isa).expect("expected compilation"); - - let unwind = create_unwind_info(&context.func, &*isa) - .expect("can create unwind info") - .expect("expected unwind info"); - - assert_eq!( - unwind, - UnwindInfo { - flags: 0, - prologue_size: 27, - frame_register: None, - frame_register_offset: 0, - unwind_codes: vec![ - UnwindCode::PushRegister { - instruction_offset: 2, - reg: GPR.index_of(RU::rbp.into()) as u8 - }, - UnwindCode::StackAlloc { - instruction_offset: 27, - size: 1000000 - } - ] - } - ); - - assert_eq!(unwind.emit_size(), 12); - - let mut buf = [0u8; 12]; - unwind.emit(&mut buf); - - assert_eq!( - buf, - [ - 0x01, // Version and flags (version 1, no flags) - 0x1B, // Prologue size - 0x04, // Unwind code count (3 for stack alloc, 1 for push reg) - 0x00, // Frame register + offset (no frame register) - 0x1B, // Prolog offset - 0x11, // Operation 1 (large stack alloc), size is unscaled 32-bits (info = 1) - 0x40, // Byte 1 of size - 0x42, // Byte 2 of size - 0x0F, // Byte 3 of size - 0x00, // Byte 4 of size (size is 0xF4240 = 1000000 bytes) - 0x02, // Prolog offset - 0x50, // Operation 0 (push nonvolatile register), reg = 5 (RBP) - ] - ); - } - - fn create_function(call_conv: CallConv, stack_slot: Option) -> Function { - let mut func = - Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv)); - - let block0 = func.dfg.make_block(); - let mut pos = FuncCursor::new(&mut func); - pos.insert_block(block0); - pos.ins().return_(&[]); - - if let Some(stack_slot) = stack_slot { - func.stack_slots.push(stack_slot); - } - - func - } -} diff --git a/cranelift/codegen/src/isa/mod.rs b/cranelift/codegen/src/isa/mod.rs index 3331534c49..92edfd744f 100644 --- a/cranelift/codegen/src/isa/mod.rs +++ b/cranelift/codegen/src/isa/mod.rs @@ -84,12 +84,9 @@ pub(crate) mod aarch64; #[cfg(feature = "s390x")] mod s390x; -#[cfg(any(feature = "x86", feature = "riscv"))] +#[cfg(feature = "riscv")] mod legacy; -#[cfg(feature = "x86")] -use legacy::x86; - #[cfg(feature = "riscv")] use legacy::riscv; @@ -120,49 +117,19 @@ macro_rules! isa_builder { }}; } -/// The "variant" for a given target. On one platform (x86-64), we have two -/// backends, the "old" and "new" one; the new one is the default if included -/// in the build configuration and not otherwise specified. -#[derive(Clone, Copy, Debug)] -pub enum BackendVariant { - /// Any backend available. - Any, - /// A "legacy" backend: one that operates using legalizations and encodings. - Legacy, - /// A backend built on `MachInst`s and the `VCode` framework. - MachInst, -} - -impl Default for BackendVariant { - fn default() -> Self { - BackendVariant::Any - } -} - /// Look for an ISA for the given `triple`, selecting the backend variant given /// by `variant` if available. -pub fn lookup_variant(triple: Triple, variant: BackendVariant) -> Result { - match (triple.architecture, variant) { - (Architecture::Riscv32 { .. }, _) | (Architecture::Riscv64 { .. }, _) => { +pub fn lookup_variant(triple: Triple) -> Result { + match triple.architecture { + Architecture::Riscv32 { .. } | Architecture::Riscv64 { .. } => { isa_builder!(riscv, (feature = "riscv"), triple) } - (Architecture::X86_64, BackendVariant::Legacy) => { - isa_builder!(x86, (feature = "x86"), triple) - } - (Architecture::X86_64, BackendVariant::MachInst) => { + Architecture::X86_64 => { isa_builder!(x64, (feature = "x86"), triple) } - #[cfg(not(feature = "old-x86-backend"))] - (Architecture::X86_64, BackendVariant::Any) => { - isa_builder!(x64, (feature = "x86"), triple) - } - #[cfg(feature = "old-x86-backend")] - (Architecture::X86_64, BackendVariant::Any) => { - isa_builder!(x86, (feature = "x86"), triple) - } - (Architecture::Arm { .. }, _) => isa_builder!(arm32, (feature = "arm32"), triple), - (Architecture::Aarch64 { .. }, _) => isa_builder!(aarch64, (feature = "arm64"), triple), - (Architecture::S390x { .. }, _) => isa_builder!(s390x, (feature = "s390x"), triple), + Architecture::Arm { .. } => isa_builder!(arm32, (feature = "arm32"), triple), + Architecture::Aarch64 { .. } => isa_builder!(aarch64, (feature = "arm64"), triple), + Architecture::S390x { .. } => isa_builder!(s390x, (feature = "s390x"), triple), _ => Err(LookupError::Unsupported), } } @@ -170,7 +137,7 @@ pub fn lookup_variant(triple: Triple, variant: BackendVariant) -> Result Result { - lookup_variant(triple, BackendVariant::Any) + lookup_variant(triple) } /// Look for a supported ISA with the given `name`. @@ -292,11 +259,6 @@ pub trait TargetIsa: fmt::Display + Send + Sync { /// Get the ISA-dependent flag values that were used to make this trait object. fn isa_flags(&self) -> Vec; - /// Get the variant of this ISA (Legacy or MachInst). - fn variant(&self) -> BackendVariant { - BackendVariant::Legacy - } - /// Hashes all flags, both ISA-independent and ISA-specific, into the /// specified hasher. fn hash_all_flags(&self, hasher: &mut dyn Hasher); diff --git a/cranelift/codegen/src/isa/s390x/lower.rs b/cranelift/codegen/src/isa/s390x/lower.rs index cd0db1ae34..c100e36031 100644 --- a/cranelift/codegen/src/isa/s390x/lower.rs +++ b/cranelift/codegen/src/isa/s390x/lower.rs @@ -2962,45 +2962,6 @@ fn lower_insn_to_regs>( | Opcode::IfcmpImm => { panic!("ALU+imm and ALU+carry ops should not appear here!"); } - - #[cfg(feature = "x86")] - Opcode::X86Udivmodx - | Opcode::X86Sdivmodx - | Opcode::X86Umulx - | Opcode::X86Smulx - | Opcode::X86Cvtt2si - | Opcode::X86Fmin - | Opcode::X86Fmax - | Opcode::X86Push - | Opcode::X86Pop - | Opcode::X86Bsr - | Opcode::X86Bsf - | Opcode::X86Pblendw - | Opcode::X86Pshufd - | Opcode::X86Pshufb - | Opcode::X86Pextr - | Opcode::X86Pinsr - | Opcode::X86Insertps - | Opcode::X86Movsd - | Opcode::X86Movlhps - | Opcode::X86Psll - | Opcode::X86Psrl - | Opcode::X86Psra - | Opcode::X86Ptest - | Opcode::X86Pmaxs - | Opcode::X86Pmaxu - | Opcode::X86Pmins - | Opcode::X86Pminu - | Opcode::X86Pmullq - | Opcode::X86Pmuludq - | Opcode::X86Punpckh - | Opcode::X86Punpckl - | Opcode::X86Vcvtudq2ps - | Opcode::X86Palignr - | Opcode::X86ElfTlsGetAddr - | Opcode::X86MachoTlsGetAddr => { - panic!("x86-specific opcode in supposedly arch-neutral IR!"); - } } Ok(()) diff --git a/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs b/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs index 9115db0671..7119d5b260 100644 --- a/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs +++ b/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs @@ -109,7 +109,6 @@ mod tests { use target_lexicon::triple; #[test] - #[cfg_attr(feature = "old-x86-backend", ignore)] fn test_simple_func() { let isa = lookup(triple!("x86_64")) .expect("expect x86 ISA") @@ -152,7 +151,6 @@ mod tests { } #[test] - #[cfg_attr(feature = "old-x86-backend", ignore)] fn test_multi_return_func() { let isa = lookup(triple!("x86_64")) .expect("expect x86 ISA") diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index da29a04314..9b51a27b07 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -6900,44 +6900,6 @@ fn lower_insn_to_regs>( panic!("Branch opcode reached non-branch lowering logic!"); } - Opcode::X86Udivmodx - | Opcode::X86Sdivmodx - | Opcode::X86Umulx - | Opcode::X86Smulx - | Opcode::X86Cvtt2si - | Opcode::X86Fmin - | Opcode::X86Fmax - | Opcode::X86Push - | Opcode::X86Pop - | Opcode::X86Bsr - | Opcode::X86Bsf - | Opcode::X86Pblendw - | Opcode::X86Pshufd - | Opcode::X86Pshufb - | Opcode::X86Pextr - | Opcode::X86Pinsr - | Opcode::X86Insertps - | Opcode::X86Movsd - | Opcode::X86Movlhps - | Opcode::X86Palignr - | Opcode::X86Psll - | Opcode::X86Psrl - | Opcode::X86Psra - | Opcode::X86Ptest - | Opcode::X86Pmaxs - | Opcode::X86Pmaxu - | Opcode::X86Pmins - | Opcode::X86Pminu - | Opcode::X86Pmullq - | Opcode::X86Pmuludq - | Opcode::X86Punpckh - | Opcode::X86Punpckl - | Opcode::X86Vcvtudq2ps - | Opcode::X86ElfTlsGetAddr - | Opcode::X86MachoTlsGetAddr => { - panic!("x86-specific opcode in supposedly arch-neutral IR!"); - } - Opcode::Nop => { // Nothing. } diff --git a/cranelift/codegen/src/legalizer/mod.rs b/cranelift/codegen/src/legalizer/mod.rs index 3b794a1e25..ee89ee35f0 100644 --- a/cranelift/codegen/src/legalizer/mod.rs +++ b/cranelift/codegen/src/legalizer/mod.rs @@ -21,9 +21,9 @@ use crate::ir::types::{I32, I64}; use crate::ir::{self, InstBuilder, MemFlags}; use crate::isa::TargetIsa; -#[cfg(any(feature = "x86", feature = "riscv"))] +#[cfg(feature = "riscv")] use crate::predicates; -#[cfg(any(feature = "x86", feature = "riscv"))] +#[cfg(feature = "riscv")] use alloc::vec::Vec; use crate::timing; diff --git a/cranelift/codegen/src/machinst/adapter.rs b/cranelift/codegen/src/machinst/adapter.rs index c0bc76417f..b60bf8300a 100644 --- a/cranelift/codegen/src/machinst/adapter.rs +++ b/cranelift/codegen/src/machinst/adapter.rs @@ -3,7 +3,7 @@ use crate::binemit; use crate::ir; use crate::isa::{ - BackendVariant, EncInfo, Encoding, Encodings, Legalize, RegClass, RegInfo, TargetIsa, + EncInfo, Encoding, Encodings, Legalize, RegClass, RegInfo, TargetIsa, }; use crate::machinst::*; use crate::regalloc::RegisterSet; @@ -64,10 +64,6 @@ impl TargetIsa for TargetIsaAdapter { self.backend.isa_flags() } - fn variant(&self) -> BackendVariant { - BackendVariant::MachInst - } - fn hash_all_flags(&self, hasher: &mut dyn Hasher) { self.backend.hash_all_flags(hasher); } diff --git a/cranelift/filetests/filetests/isa/x64/amode-opt.clif b/cranelift/filetests/filetests/isa/x64/amode-opt.clif index f5789b67ee..902df05a08 100644 --- a/cranelift/filetests/filetests/isa/x64/amode-opt.clif +++ b/cranelift/filetests/filetests/isa/x64/amode-opt.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 function %amode_add(i64, i64) -> i64 { block0(v0: i64, v1: i64): diff --git a/cranelift/filetests/filetests/isa/x64/b1.clif b/cranelift/filetests/filetests/isa/x64/b1.clif index cbd265a9ea..64049860dd 100644 --- a/cranelift/filetests/filetests/isa/x64/b1.clif +++ b/cranelift/filetests/filetests/isa/x64/b1.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 function %f0(b1, i32, i32) -> i32 { ; check: pushq %rbp diff --git a/cranelift/filetests/filetests/isa/x64/basic.clif b/cranelift/filetests/filetests/isa/x64/basic.clif index 8b43d70c7c..88b605d3d7 100644 --- a/cranelift/filetests/filetests/isa/x64/basic.clif +++ b/cranelift/filetests/filetests/isa/x64/basic.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 function %f(i32, i32) -> i32 { block0(v0: i32, v1: i32): diff --git a/cranelift/filetests/filetests/isa/x64/branches.clif b/cranelift/filetests/filetests/isa/x64/branches.clif index 99aec088ac..edcf36ec1a 100644 --- a/cranelift/filetests/filetests/isa/x64/branches.clif +++ b/cranelift/filetests/filetests/isa/x64/branches.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 function %f0(i32, i32) -> i32 { block0(v0: i32, v1: i32): diff --git a/cranelift/filetests/filetests/isa/x64/call-conv.clif b/cranelift/filetests/filetests/isa/x64/call-conv.clif index 8f619e2aa7..e2f0d8e81a 100644 --- a/cranelift/filetests/filetests/isa/x64/call-conv.clif +++ b/cranelift/filetests/filetests/isa/x64/call-conv.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 ;; system_v has first param in %rdi, fascall in %rcx function %one_arg(i32) system_v { diff --git a/cranelift/filetests/filetests/isa/x64/clz-lzcnt.clif b/cranelift/filetests/filetests/isa/x64/clz-lzcnt.clif index f36caed88a..627e4700bb 100644 --- a/cranelift/filetests/filetests/isa/x64/clz-lzcnt.clif +++ b/cranelift/filetests/filetests/isa/x64/clz-lzcnt.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst has_lzcnt +target x86_64 has_lzcnt function %clz(i64) -> i64 { block0(v0: i64): diff --git a/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif b/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif index 16c788ed84..853fd0d5f9 100644 --- a/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif +++ b/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 function %f0(i64, i64) -> i64, i64 { block0(v0: i64, v1: i64): diff --git a/cranelift/filetests/filetests/isa/x64/ctz-bmi1.clif b/cranelift/filetests/filetests/isa/x64/ctz-bmi1.clif index 5931451e11..42f82c653f 100644 --- a/cranelift/filetests/filetests/isa/x64/ctz-bmi1.clif +++ b/cranelift/filetests/filetests/isa/x64/ctz-bmi1.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst has_bmi1 +target x86_64 has_bmi1 function %ctz(i64) -> i64 { block0(v0: i64): diff --git a/cranelift/filetests/filetests/isa/x64/div-checks.clif b/cranelift/filetests/filetests/isa/x64/div-checks.clif index 3984aba42f..5a366914a2 100644 --- a/cranelift/filetests/filetests/isa/x64/div-checks.clif +++ b/cranelift/filetests/filetests/isa/x64/div-checks.clif @@ -1,6 +1,6 @@ test compile set avoid_div_traps=false -target x86_64 machinst +target x86_64 ;; We should get the checked-div/rem sequence (`srem` pseudoinst below) even ;; when `avoid_div_traps` above is false (i.e. even when the host is normally diff --git a/cranelift/filetests/filetests/isa/x64/fastcall.clif b/cranelift/filetests/filetests/isa/x64/fastcall.clif index fc8d3a801d..99397044d9 100644 --- a/cranelift/filetests/filetests/isa/x64/fastcall.clif +++ b/cranelift/filetests/filetests/isa/x64/fastcall.clif @@ -1,7 +1,7 @@ test compile set enable_llvm_abi_extensions=true set unwind_info=true -target x86_64 machinst +target x86_64 function %f0(i64, i64, i64, i64) -> i64 windows_fastcall { block0(v0: i64, v1: i64, v2: i64, v3: i64): @@ -206,7 +206,7 @@ block0(v0: i64): v18 = load.f64 v0+136 v19 = load.f64 v0+144 v20 = load.f64 v0+152 - + v21 = fadd.f64 v1, v2 v22 = fadd.f64 v3, v4 v23 = fadd.f64 v5, v6 diff --git a/cranelift/filetests/filetests/isa/x64/floating-point.clif b/cranelift/filetests/filetests/isa/x64/floating-point.clif index c1e30a3b19..2f2552aec1 100644 --- a/cranelift/filetests/filetests/isa/x64/floating-point.clif +++ b/cranelift/filetests/filetests/isa/x64/floating-point.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 function %f(f64) -> f64 { block0(v0: f64): diff --git a/cranelift/filetests/filetests/isa/x64/heap.clif b/cranelift/filetests/filetests/isa/x64/heap.clif index 2c77bc7ec2..b8c27f422e 100644 --- a/cranelift/filetests/filetests/isa/x64/heap.clif +++ b/cranelift/filetests/filetests/isa/x64/heap.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 function %f(i32, i64 vmctx) -> i64 { gv0 = vmctx diff --git a/cranelift/filetests/filetests/isa/x64/i128.clif b/cranelift/filetests/filetests/isa/x64/i128.clif index 61783e366d..75013a8170 100644 --- a/cranelift/filetests/filetests/isa/x64/i128.clif +++ b/cranelift/filetests/filetests/isa/x64/i128.clif @@ -1,6 +1,6 @@ test compile set enable_llvm_abi_extensions=true -target x86_64 machinst +target x86_64 function %f0(i128, i128) -> i128 { ; check: pushq %rbp @@ -190,7 +190,7 @@ block0(v0: i128, v1: i128): ; nextln: orq %rax, %r8 ; nextln: andq $$1, %r8 ; nextln: setnz %r8b - + v4 = icmp slt v0, v1 ; check: cmpq %rcx, %rsi ; nextln: setl %r9b @@ -201,7 +201,7 @@ block0(v0: i128, v1: i128): ; nextln: orq %r9, %r10 ; nextln: andq $$1, %r10 ; nextln: setnz %r9b - + v5 = icmp sle v0, v1 ; check: cmpq %rcx, %rsi ; nextln: setl %r10b @@ -212,7 +212,7 @@ block0(v0: i128, v1: i128): ; nextln: orq %r10, %r11 ; nextln: andq $$1, %r11 ; nextln: setnz %r10b - + v6 = icmp sgt v0, v1 ; check: cmpq %rcx, %rsi ; nextln: setnle %r11b @@ -307,7 +307,7 @@ block0(v0: i128): ; nextln: setz %sil ; nextln: andb %dil, %sil ; nextln: jnz label1; j label2 - + jump block2 block1: @@ -725,7 +725,7 @@ block2(v6: i128): ; nextln: movq %rbp, %rsp ; nextln: popq %rbp ; nextln: ret - + } function %f24(i128, i128, i64, i128, i128, i128) -> i128 { @@ -1106,4 +1106,4 @@ block0(v0: i128, v1: i128): ; nextln: movq %rcx, %rdx ; nextln: movq %rbp, %rsp ; nextln: popq %rbp -; nextln: ret \ No newline at end of file +; nextln: ret diff --git a/cranelift/filetests/filetests/isa/x64/load-op.clif b/cranelift/filetests/filetests/isa/x64/load-op.clif index a4069b20ca..85c26dec3e 100644 --- a/cranelift/filetests/filetests/isa/x64/load-op.clif +++ b/cranelift/filetests/filetests/isa/x64/load-op.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 function %add_from_mem_u32_1(i64, i32) -> i32 { block0(v0: i64, v1: i32): diff --git a/cranelift/filetests/filetests/isa/x64/move-elision.clif b/cranelift/filetests/filetests/isa/x64/move-elision.clif index 5b23afb8d3..533eb5341b 100644 --- a/cranelift/filetests/filetests/isa/x64/move-elision.clif +++ b/cranelift/filetests/filetests/isa/x64/move-elision.clif @@ -1,6 +1,6 @@ test compile set enable_simd -target x86_64 machinst skylake +target x86_64 skylake function %move_registers(i32x4) -> b8x16 { block0(v0: i32x4): diff --git a/cranelift/filetests/filetests/isa/x64/popcnt-use-popcnt.clif b/cranelift/filetests/filetests/isa/x64/popcnt-use-popcnt.clif index 2049f53962..8326e681b0 100644 --- a/cranelift/filetests/filetests/isa/x64/popcnt-use-popcnt.clif +++ b/cranelift/filetests/filetests/isa/x64/popcnt-use-popcnt.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst has_popcnt has_sse42 +target x86_64 has_popcnt has_sse42 function %popcnt(i64) -> i64 { block0(v0: i64): diff --git a/cranelift/filetests/filetests/isa/x64/popcnt.clif b/cranelift/filetests/filetests/isa/x64/popcnt.clif index df68f6b4b7..e3f8d0c49d 100644 --- a/cranelift/filetests/filetests/isa/x64/popcnt.clif +++ b/cranelift/filetests/filetests/isa/x64/popcnt.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 function %popcnt64(i64) -> i64 { block0(v0: i64): diff --git a/cranelift/filetests/filetests/isa/x64/probestack.clif b/cranelift/filetests/filetests/isa/x64/probestack.clif index 8e8b424c70..40944a797c 100644 --- a/cranelift/filetests/filetests/isa/x64/probestack.clif +++ b/cranelift/filetests/filetests/isa/x64/probestack.clif @@ -1,6 +1,6 @@ test compile set enable_probestack=true -target x86_64 machinst +target x86_64 function %f1() -> i64 { ss0 = explicit_slot 100000 diff --git a/cranelift/filetests/filetests/isa/x64/select-i128.clif b/cranelift/filetests/filetests/isa/x64/select-i128.clif index af6996f85f..41c8a67fb1 100644 --- a/cranelift/filetests/filetests/isa/x64/select-i128.clif +++ b/cranelift/filetests/filetests/isa/x64/select-i128.clif @@ -1,6 +1,6 @@ test compile set enable_llvm_abi_extensions=true -target x86_64 machinst +target x86_64 function %f0(i32, i128, i128) -> i128 { ; check: pushq %rbp @@ -24,6 +24,6 @@ block0(v0: i32, v1: i128, v2: i128): ; nextln: movq %rbp, %rsp ; nextln: popq %rbp ; nextln: ret - + } diff --git a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif index 52761b1ed0..b7251f9fe1 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif @@ -1,6 +1,6 @@ test compile set enable_simd -target x86_64 machinst skylake +target x86_64 skylake function %bitselect_i16x8() -> i16x8 { block0: diff --git a/cranelift/filetests/filetests/isa/x64/simd-comparison-legalize.clif b/cranelift/filetests/filetests/isa/x64/simd-comparison-legalize.clif index b50ff6328d..7e3dee77f0 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-comparison-legalize.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-comparison-legalize.clif @@ -1,6 +1,6 @@ test compile set enable_simd -target x86_64 machinst skylake +target x86_64 skylake function %icmp_ne_32x4(i32x4, i32x4) -> b32x4 { block0(v0: i32x4, v1: i32x4): diff --git a/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif index 2f6a8c7dfd..38894f6086 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif @@ -1,6 +1,6 @@ test compile set enable_simd -target x86_64 machinst has_ssse3 has_sse41 +target x86_64 has_ssse3 has_sse41 ;; shuffle diff --git a/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif index 72249faaef..29f4b2cdb0 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif @@ -1,6 +1,6 @@ test compile set enable_simd -target x86_64 machinst skylake +target x86_64 skylake function %bnot_b32x4(b32x4) -> b32x4 { block0(v0: b32x4): diff --git a/cranelift/filetests/filetests/isa/x64/store-stack-full-width-i32.clif b/cranelift/filetests/filetests/isa/x64/store-stack-full-width-i32.clif index 31edd7bdca..c20f816fc2 100644 --- a/cranelift/filetests/filetests/isa/x64/store-stack-full-width-i32.clif +++ b/cranelift/filetests/filetests/isa/x64/store-stack-full-width-i32.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 ;; The goal of this test is to ensure that stack spills of an integer value, ;; which width is less than the machine word's size, cause the full word to be diff --git a/cranelift/filetests/filetests/isa/x64/struct-arg.clif b/cranelift/filetests/filetests/isa/x64/struct-arg.clif index e9001c5393..23fbb731b8 100644 --- a/cranelift/filetests/filetests/isa/x64/struct-arg.clif +++ b/cranelift/filetests/filetests/isa/x64/struct-arg.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 function u0:0(i64 sarg(64)) -> i8 system_v { block0(v0: i64): diff --git a/cranelift/filetests/filetests/isa/x64/struct-ret.clif b/cranelift/filetests/filetests/isa/x64/struct-ret.clif index ee59ff4963..90a6d6fbe4 100644 --- a/cranelift/filetests/filetests/isa/x64/struct-ret.clif +++ b/cranelift/filetests/filetests/isa/x64/struct-ret.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 function %f0(i64 sret) { block0(v0: i64): diff --git a/cranelift/filetests/filetests/isa/x64/tls_elf.clif b/cranelift/filetests/filetests/isa/x64/tls_elf.clif index 37a4698619..71bf7dada9 100644 --- a/cranelift/filetests/filetests/isa/x64/tls_elf.clif +++ b/cranelift/filetests/filetests/isa/x64/tls_elf.clif @@ -1,6 +1,6 @@ test compile set tls_model=elf_gd -target x86_64 machinst +target x86_64 function u0:0(i32) -> i64 { gv0 = symbol colocated tls u1:0 diff --git a/cranelift/filetests/filetests/isa/x64/uextend-elision.clif b/cranelift/filetests/filetests/isa/x64/uextend-elision.clif index ef43c3dd03..32c856c419 100644 --- a/cranelift/filetests/filetests/isa/x64/uextend-elision.clif +++ b/cranelift/filetests/filetests/isa/x64/uextend-elision.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 function %elide_uextend_add(i32, i32) -> i64 { block0(v0: i32, v1: i32): diff --git a/cranelift/filetests/filetests/isa/x64/unused_jt_unreachable_block.clif b/cranelift/filetests/filetests/isa/x64/unused_jt_unreachable_block.clif index 5ddd4b20d3..6548930328 100644 --- a/cranelift/filetests/filetests/isa/x64/unused_jt_unreachable_block.clif +++ b/cranelift/filetests/filetests/isa/x64/unused_jt_unreachable_block.clif @@ -1,5 +1,5 @@ test compile -target x86_64 machinst +target x86_64 ;; From: https://github.com/bytecodealliance/wasmtime/issues/2670 diff --git a/cranelift/filetests/filetests/isa/x86/abcd.clif b/cranelift/filetests/filetests/isa/x86/abcd.clif deleted file mode 100644 index bfea325055..0000000000 --- a/cranelift/filetests/filetests/isa/x86/abcd.clif +++ /dev/null @@ -1,13 +0,0 @@ -test regalloc -target i686 legacy - -; %rdi can't be used in a movsbl instruction, so test that the register -; allocator can move it to a register that can be. - -function %test(i32 [%rdi]) -> i32 system_v { -block0(v0: i32 [%rdi]): - v1 = ireduce.i8 v0 - v2 = sextend.i32 v1 - return v2 -} -; check: regmove v1, %rdi -> %rax diff --git a/cranelift/filetests/filetests/isa/x86/abi-bool.clif b/cranelift/filetests/filetests/isa/x86/abi-bool.clif deleted file mode 100644 index 5286de3c18..0000000000 --- a/cranelift/filetests/filetests/isa/x86/abi-bool.clif +++ /dev/null @@ -1,19 +0,0 @@ -test compile -target x86_64 legacy haswell - -function %foo(i64, i64, i64, i32) -> b1 system_v { -block3(v0: i64, v1: i64, v2: i64, v3: i32): - v5 = icmp ne v2, v2 - v8 = iconst.i64 0 - jump block2(v8, v3, v5) - -block2(v10: i64, v30: i32, v37: b1): - v18 = load.i32 notrap aligned v2 - v27 = iadd.i64 v10, v10 - v31 = icmp eq v30, v30 - brz v31, block2(v27, v30, v37) - jump block0(v37) - -block0(v35: b1): - return v35 -} diff --git a/cranelift/filetests/filetests/isa/x86/abi32.clif b/cranelift/filetests/filetests/isa/x86/abi32.clif deleted file mode 100644 index 8ca530a695..0000000000 --- a/cranelift/filetests/filetests/isa/x86/abi32.clif +++ /dev/null @@ -1,20 +0,0 @@ -; Test the legalization of function signatures. -test legalizer -target i686 legacy - -; regex: V=v\d+ - -function %f() { - sig0 = (i32) -> i32 system_v - ; check: sig0 = (i32 [0]) -> i32 [%rax] system_v - - sig1 = (i64) -> b1 system_v - ; check: sig1 = (i32 [0], i32 [4]) -> b1 [%rax] system_v - - sig2 = (f32, i64) -> f64 system_v - ; check: sig2 = (f32 [0], i32 [4], i32 [8]) -> f64 [%xmm0] system_v - -block0: - return -} - diff --git a/cranelift/filetests/filetests/isa/x86/abi64.clif b/cranelift/filetests/filetests/isa/x86/abi64.clif deleted file mode 100644 index 0da2aad424..0000000000 --- a/cranelift/filetests/filetests/isa/x86/abi64.clif +++ /dev/null @@ -1,37 +0,0 @@ -; Test the legalization of function signatures. -test legalizer -target x86_64 legacy - -; regex: V=v\d+ - -function %f() { - sig0 = (i32) -> i32 system_v - ; check: sig0 = (i32 [%rdi]) -> i32 [%rax] system_v - - sig1 = (i64) -> b1 system_v - ; check: sig1 = (i64 [%rdi]) -> b1 [%rax] system_v - - sig2 = (f32, i64) -> f64 system_v - ; check: sig2 = (f32 [%xmm0], i64 [%rdi]) -> f64 [%xmm0] system_v - - sig3 = () -> i128 system_v - ; check: sig3 = () -> i64 [%rax], i64 [%rdx] system_v - - sig4 = (i128) -> i128 system_v - ; check: sig4 = (i64 [%rdi], i64 [%rsi]) -> i64 [%rax], i64 [%rdx] system_v - -block0: - return -} - -function %pass_stack_int64(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 vmctx) baldrdash_system_v { - sig0 = (i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 vmctx) baldrdash_system_v - fn0 = u0:0 sig0 - -block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64, v6: i64, v7: i64, v8: i64, v9: i64, v10: i64, v11: i64, v12: i64, v13: i64, v14: i64, v15: i64, v16: i64, v17: i64, v18: i64, v19: i64, v20: i64): - call fn0(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20) - jump block1 - -block1: - return -} diff --git a/cranelift/filetests/filetests/isa/x86/allones_funcaddrs32.clif b/cranelift/filetests/filetests/isa/x86/allones_funcaddrs32.clif deleted file mode 100644 index ca0ace1342..0000000000 --- a/cranelift/filetests/filetests/isa/x86/allones_funcaddrs32.clif +++ /dev/null @@ -1,25 +0,0 @@ -; binary emission of 32-bit code. -test binemit -set opt_level=speed_and_size -set emit_all_ones_funcaddrs -target i686 legacy haswell - -; The binary encodings can be verified with the command: -; -; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/allones_funcaddrs32.clif | llvm-mc -show-encoding -triple=i386 -; - -; Tests from binary32.clif affected by emit_all_ones_funcaddrs. -function %I32() { - sig0 = () - fn0 = %foo() - -block0: - - ; asm: movl $-1, %ecx - [-,%rcx] v400 = func_addr.i32 fn0 ; bin: b9 Abs4(%foo) ffffffff - ; asm: movl $-1, %esi - [-,%rsi] v401 = func_addr.i32 fn0 ; bin: be Abs4(%foo) ffffffff - - return ; bin: c3 -} diff --git a/cranelift/filetests/filetests/isa/x86/allones_funcaddrs64.clif b/cranelift/filetests/filetests/isa/x86/allones_funcaddrs64.clif deleted file mode 100644 index 7fbb670df2..0000000000 --- a/cranelift/filetests/filetests/isa/x86/allones_funcaddrs64.clif +++ /dev/null @@ -1,27 +0,0 @@ -; binary emission of 64-bit code. -test binemit -set opt_level=speed_and_size -set emit_all_ones_funcaddrs -target x86_64 legacy haswell - -; The binary encodings can be verified with the command: -; -; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/allones_funcaddrs64.clif | llvm-mc -show-encoding -triple=x86_64 -; - -; Tests from binary64.clif affected by emit_all_ones_funcaddrs. -function %I64() { - sig0 = () - fn0 = %foo() - -block0: - - ; asm: movabsq $-1, %rcx - [-,%rcx] v400 = func_addr.i64 fn0 ; bin: 48 b9 Abs8(%foo) ffffffffffffffff - ; asm: movabsq $-1, %rsi - [-,%rsi] v401 = func_addr.i64 fn0 ; bin: 48 be Abs8(%foo) ffffffffffffffff - ; asm: movabsq $-1, %r10 - [-,%r10] v402 = func_addr.i64 fn0 ; bin: 49 ba Abs8(%foo) ffffffffffffffff - - return ; bin: c3 -} diff --git a/cranelift/filetests/filetests/isa/x86/baldrdash-table-sig-reg.clif b/cranelift/filetests/filetests/isa/x86/baldrdash-table-sig-reg.clif deleted file mode 100644 index d2713829cd..0000000000 --- a/cranelift/filetests/filetests/isa/x86/baldrdash-table-sig-reg.clif +++ /dev/null @@ -1,14 +0,0 @@ -test compile -set enable_probestack=false -target i686 legacy - -function u0:0(i32 vmctx) baldrdash_system_v { - sig0 = (i32 vmctx, i32 sigid) baldrdash_system_v - -block0(v0: i32): - v2 = iconst.i32 0 - v8 = iconst.i32 0 - v9 = iconst.i32 0 - call_indirect sig0, v8(v9, v2) - trap user0 -} diff --git a/cranelift/filetests/filetests/isa/x86/baseline_clz_ctz_popcount.clif b/cranelift/filetests/filetests/isa/x86/baseline_clz_ctz_popcount.clif deleted file mode 100644 index 9099315878..0000000000 --- a/cranelift/filetests/filetests/isa/x86/baseline_clz_ctz_popcount.clif +++ /dev/null @@ -1,92 +0,0 @@ -test compile -target x86_64 legacy baseline - - -; clz/ctz on 64 bit operands - -function %i64_clz(i64) -> i64 { -block0(v10: i64): - v11 = clz v10 - ; check: x86_bsr - ; check: selectif.i64 - return v11 -} - -function %i64_ctz(i64) -> i64 { -block1(v20: i64): - v21 = ctz v20 - ; check: x86_bsf - ; check: selectif.i64 - return v21 -} - - -; clz/ctz on 32 bit operands - -function %i32_clz(i32) -> i32 { -block0(v10: i32): - v11 = clz v10 - ; check: x86_bsr - ; check: selectif.i32 - return v11 -} - -function %i32_ctz(i32) -> i32 { -block1(v20: i32): - v21 = ctz v20 - ; check: x86_bsf - ; check: selectif.i32 - return v21 -} - - -; popcount on 64 bit operands - -function %i64_popcount(i64) -> i64 { -block0(v30: i64): - v31 = popcnt v30; - ; check: ushr_imm - ; check: iconst.i64 - ; check: band - ; check: isub - ; check: ushr_imm - ; check: band - ; check: isub - ; check: ushr_imm - ; check: band - ; check: isub - ; check: ushr_imm - ; check: iadd - ; check: iconst.i64 - ; check: band - ; check: iconst.i64 - ; check: imul - ; check: ushr_imm - return v31; -} - - -; popcount on 32 bit operands - -function %i32_popcount(i32) -> i32 { -block0(v40: i32): - v41 = popcnt v40; - ; check: ushr_imm - ; check: iconst.i32 - ; check: band - ; check: isub - ; check: ushr_imm - ; check: band - ; check: isub - ; check: ushr_imm - ; check: band - ; check: isub - ; check: ushr_imm - ; check: iadd - ; check: iconst.i32 - ; check: band - ; check: iconst.i32 - ; check: imul - ; check: ushr_imm - return v41; -} diff --git a/cranelift/filetests/filetests/isa/x86/baseline_clz_ctz_popcount_encoding.clif b/cranelift/filetests/filetests/isa/x86/baseline_clz_ctz_popcount_encoding.clif deleted file mode 100644 index b2f36ff148..0000000000 --- a/cranelift/filetests/filetests/isa/x86/baseline_clz_ctz_popcount_encoding.clif +++ /dev/null @@ -1,87 +0,0 @@ -test binemit -set opt_level=speed_and_size -target x86_64 legacy baseline - -; The binary encodings can be verified with the command: -; -; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/baseline_clz_ctz_popcount_encoding.clif | llvm-mc -show-encoding -triple=x86_64 -; - -function %Foo() { -block0: - ; 64-bit wide bsf - - [-,%r11] v10 = iconst.i64 0x1234 - ; asm: bsfq %r11, %rcx - [-,%rcx,%rflags] v11, v12 = x86_bsf v10 ; bin: 49 0f bc cb - - [-,%rdx] v14 = iconst.i64 0x5678 - ; asm: bsfq %rdx, %r12 - [-,%r12,%rflags] v15, v16 = x86_bsf v14 ; bin: 4c 0f bc e2 - - ; asm: bsfq %rdx, %rdi - [-,%rdi,%rflags] v17, v18 = x86_bsf v14 ; bin: 48 0f bc fa - - - ; 32-bit wide bsf - - [-,%r11] v20 = iconst.i32 0x1234 - ; asm: bsfl %r11d, %ecx - [-,%rcx,%rflags] v21, v22 = x86_bsf v20 ; bin: 41 0f bc cb - - [-,%rdx] v24 = iconst.i32 0x5678 - ; asm: bsfl %edx, %r12d - [-,%r12,%rflags] v25, v26 = x86_bsf v24 ; bin: 44 0f bc e2 - - ; asm: bsfl %edx, %esi - [-,%rsi,%rflags] v27, v28 = x86_bsf v24 ; bin: 0f bc f2 - - - ; 64-bit wide bsr - - [-,%r11] v30 = iconst.i64 0x1234 - ; asm: bsrq %r11, %rcx - [-,%rcx,%rflags] v31, v32 = x86_bsr v30 ; bin: 49 0f bd cb - - [-,%rdx] v34 = iconst.i64 0x5678 - ; asm: bsrq %rdx, %r12 - [-,%r12,%rflags] v35, v36 = x86_bsr v34 ; bin: 4c 0f bd e2 - - ; asm: bsrq %rdx, %rdi - [-,%rdi,%rflags] v37, v38 = x86_bsr v34 ; bin: 48 0f bd fa - - - ; 32-bit wide bsr - - [-,%r11] v40 = iconst.i32 0x1234 - ; asm: bsrl %r11d, %ecx - [-,%rcx,%rflags] v41, v42 = x86_bsr v40 ; bin: 41 0f bd cb - - [-,%rdx] v44 = iconst.i32 0x5678 - ; asm: bsrl %edx, %r12d - [-,%r12,%rflags] v45, v46 = x86_bsr v44 ; bin: 44 0f bd e2 - - ; asm: bsrl %edx, %esi - [-,%rsi,%rflags] v47, v48 = x86_bsr v44 ; bin: 0f bd f2 - - - ; 64-bit wide cmov - - ; asm: cmoveq %r11, %rdx - [-,%rdx] v51 = selectif.i64 eq v48, v30, v34 ; bin: 49 0f 44 d3 - - ; asm: cmoveq %rdi, %rdx - [-,%rdx] v52 = selectif.i64 eq v48, v37, v34 ; bin: 48 0f 44 d7 - - - ; 32-bit wide cmov - - ; asm: cmovnel %r11d, %edx - [-,%rdx] v60 = selectif.i32 ne v48, v40, v44 ; bin: 41 0f 45 d3 - - ; asm: cmovlel %esi, %edx - [-,%rdx] v61 = selectif.i32 sle v48, v27, v44 ; bin: 0f 4e d6 - - - trap user0 -} diff --git a/cranelift/filetests/filetests/isa/x86/binary32-float.clif b/cranelift/filetests/filetests/isa/x86/binary32-float.clif deleted file mode 100644 index cfac85f7b2..0000000000 --- a/cranelift/filetests/filetests/isa/x86/binary32-float.clif +++ /dev/null @@ -1,557 +0,0 @@ -; Binary emission of 32-bit floating point code. -test binemit -target i686 legacy haswell - -; The binary encodings can be verified with the command: -; -; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/binary32-float.clif | llvm-mc -show-encoding -triple=i386 -; - -function %F32() { - ss0 = incoming_arg 8, offset 0 - ss1 = incoming_arg 1024, offset -1024 - ss2 = incoming_arg 1024, offset -2048 - ss3 = incoming_arg 8, offset -2056 - -block0: - [-,%rcx] v0 = iconst.i32 1 - [-,%rsi] v1 = iconst.i32 2 - - ; asm: cvtsi2ss %ecx, %xmm5 - [-,%xmm5] v10 = fcvt_from_sint.f32 v0 ; bin: f3 0f 2a e9 - ; asm: cvtsi2ss %esi, %xmm2 - [-,%xmm2] v11 = fcvt_from_sint.f32 v1 ; bin: f3 0f 2a d6 - - ; asm: cvtss2sd %xmm2, %xmm5 - [-,%xmm5] v12 = fpromote.f64 v11 ; bin: f3 0f 5a ea - ; asm: cvtss2sd %xmm5, %xmm2 - [-,%xmm2] v13 = fpromote.f64 v10 ; bin: f3 0f 5a d5 - - ; asm: movd %ecx, %xmm5 - [-,%xmm5] v14 = bitcast.f32 v0 ; bin: 66 0f 6e e9 - ; asm: movd %esi, %xmm2 - [-,%xmm2] v15 = bitcast.f32 v1 ; bin: 66 0f 6e d6 - - ; asm: movd %xmm5, %ecx - [-,%rcx] v16 = bitcast.i32 v10 ; bin: 66 0f 7e e9 - ; asm: movd %xmm2, %esi - [-,%rsi] v17 = bitcast.i32 v11 ; bin: 66 0f 7e d6 - - ; asm: movaps %xmm2, %xmm5 - [-,%xmm5] v18 = copy v11 ; bin: 0f 28 ea - ; asm: movaps %xmm5, %xmm2 - [-,%xmm2] v19 = copy v10 ; bin: 0f 28 d5 - - ; asm: movaps %xmm2, %xmm5 - regmove v19, %xmm2 -> %xmm5 ; bin: 0f 28 ea - ; asm: movaps %xmm5, %xmm2 - regmove v19, %xmm5 -> %xmm2 ; bin: 0f 28 d5 - - ; Binary arithmetic. - - ; asm: addss %xmm2, %xmm5 - [-,%xmm5] v20 = fadd v10, v11 ; bin: f3 0f 58 ea - ; asm: addss %xmm5, %xmm2 - [-,%xmm2] v21 = fadd v11, v10 ; bin: f3 0f 58 d5 - - ; asm: subss %xmm2, %xmm5 - [-,%xmm5] v22 = fsub v10, v11 ; bin: f3 0f 5c ea - ; asm: subss %xmm5, %xmm2 - [-,%xmm2] v23 = fsub v11, v10 ; bin: f3 0f 5c d5 - - ; asm: mulss %xmm2, %xmm5 - [-,%xmm5] v24 = fmul v10, v11 ; bin: f3 0f 59 ea - ; asm: mulss %xmm5, %xmm2 - [-,%xmm2] v25 = fmul v11, v10 ; bin: f3 0f 59 d5 - - ; asm: divss %xmm2, %xmm5 - [-,%xmm5] v26 = fdiv v10, v11 ; bin: f3 0f 5e ea - ; asm: divss %xmm5, %xmm2 - [-,%xmm2] v27 = fdiv v11, v10 ; bin: f3 0f 5e d5 - - ; Bitwise ops. - ; We use the *ps SSE instructions for everything because they are smaller. - - ; asm: andps %xmm2, %xmm5 - [-,%xmm5] v30 = band v10, v11 ; bin: 0f 54 ea - ; asm: andps %xmm5, %xmm2 - [-,%xmm2] v31 = band v11, v10 ; bin: 0f 54 d5 - - ; asm: andnps %xmm2, %xmm5 - [-,%xmm5] v32 = band_not v11, v10 ; bin: 0f 55 ea - ; asm: andnps %xmm5, %xmm2 - [-,%xmm2] v33 = band_not v10, v11 ; bin: 0f 55 d5 - - ; asm: orps %xmm2, %xmm5 - [-,%xmm5] v34 = bor v10, v11 ; bin: 0f 56 ea - ; asm: orps %xmm5, %xmm2 - [-,%xmm2] v35 = bor v11, v10 ; bin: 0f 56 d5 - - ; asm: xorps %xmm2, %xmm5 - [-,%xmm5] v36 = bxor v10, v11 ; bin: 0f 57 ea - ; asm: xorps %xmm5, %xmm2 - [-,%xmm2] v37 = bxor v11, v10 ; bin: 0f 57 d5 - - ; Convert float to int. (No i64 dest on i386). - - ; asm: cvttss2si %xmm5, %ecx - [-,%rcx] v40 = x86_cvtt2si.i32 v10 ; bin: f3 0f 2c cd - ; asm: cvttss2si %xmm2, %esi - [-,%rsi] v41 = x86_cvtt2si.i32 v11 ; bin: f3 0f 2c f2 - - ; Min/max. - - ; asm: minss %xmm2, %xmm5 - [-,%xmm5] v42 = x86_fmin v10, v11 ; bin: f3 0f 5d ea - ; asm: minss %xmm5, %xmm2 - [-,%xmm2] v43 = x86_fmin v11, v10 ; bin: f3 0f 5d d5 - ; asm: maxss %xmm2, %xmm5 - [-,%xmm5] v44 = x86_fmax v10, v11 ; bin: f3 0f 5f ea - ; asm: maxss %xmm5, %xmm2 - [-,%xmm2] v45 = x86_fmax v11, v10 ; bin: f3 0f 5f d5 - - ; Unary arithmetic. - - ; asm: sqrtss %xmm5, %xmm2 - [-,%xmm2] v50 = sqrt v10 ; bin: f3 0f 51 d5 - ; asm: sqrtss %xmm2, %xmm5 - [-,%xmm5] v51 = sqrt v11 ; bin: f3 0f 51 ea - - ; asm: roundss $0, %xmm5, %xmm4 - [-,%xmm4] v52 = nearest v10 ; bin: 66 0f 3a 0a e5 00 - ; asm: roundss $0, %xmm2, %xmm5 - [-,%xmm5] v53 = nearest v11 ; bin: 66 0f 3a 0a ea 00 - ; asm: roundss $0, %xmm5, %xmm2 - [-,%xmm2] v54 = nearest v10 ; bin: 66 0f 3a 0a d5 00 - - ; asm: roundss $1, %xmm5, %xmm4 - [-,%xmm4] v55 = floor v10 ; bin: 66 0f 3a 0a e5 01 - ; asm: roundss $1, %xmm2, %xmm5 - [-,%xmm5] v56 = floor v11 ; bin: 66 0f 3a 0a ea 01 - ; asm: roundss $1, %xmm5, %xmm2 - [-,%xmm2] v57 = floor v10 ; bin: 66 0f 3a 0a d5 01 - - ; asm: roundss $2, %xmm5, %xmm4 - [-,%xmm4] v58 = ceil v10 ; bin: 66 0f 3a 0a e5 02 - ; asm: roundss $2, %xmm2, %xmm5 - [-,%xmm5] v59 = ceil v11 ; bin: 66 0f 3a 0a ea 02 - ; asm: roundss $2, %xmm5, %xmm2 - [-,%xmm2] v60 = ceil v10 ; bin: 66 0f 3a 0a d5 02 - - ; asm: roundss $3, %xmm5, %xmm4 - [-,%xmm4] v61 = trunc v10 ; bin: 66 0f 3a 0a e5 03 - ; asm: roundss $3, %xmm2, %xmm5 - [-,%xmm5] v62 = trunc v11 ; bin: 66 0f 3a 0a ea 03 - ; asm: roundss $3, %xmm5, %xmm2 - [-,%xmm2] v63 = trunc v10 ; bin: 66 0f 3a 0a d5 03 - - ; Load/Store - - ; asm: movss (%ecx), %xmm5 - [-,%xmm5] v100 = load.f32 v0 ; bin: heap_oob f3 0f 10 29 - ; asm: movss (%esi), %xmm2 - [-,%xmm2] v101 = load.f32 v1 ; bin: heap_oob f3 0f 10 16 - ; asm: movss 50(%ecx), %xmm5 - [-,%xmm5] v110 = load.f32 v0+50 ; bin: heap_oob f3 0f 10 69 32 - ; asm: movss -50(%esi), %xmm2 - [-,%xmm2] v111 = load.f32 v1-50 ; bin: heap_oob f3 0f 10 56 ce - ; asm: movss 10000(%ecx), %xmm5 - [-,%xmm5] v120 = load.f32 v0+10000 ; bin: heap_oob f3 0f 10 a9 00002710 - ; asm: movss -10000(%esi), %xmm2 - [-,%xmm2] v121 = load.f32 v1-10000 ; bin: heap_oob f3 0f 10 96 ffffd8f0 - - ; asm: movss %xmm5, (%ecx) - [-] store.f32 v100, v0 ; bin: heap_oob f3 0f 11 29 - ; asm: movss %xmm2, (%esi) - [-] store.f32 v101, v1 ; bin: heap_oob f3 0f 11 16 - ; asm: movss %xmm5, 50(%ecx) - [-] store.f32 v100, v0+50 ; bin: heap_oob f3 0f 11 69 32 - ; asm: movss %xmm2, -50(%esi) - [-] store.f32 v101, v1-50 ; bin: heap_oob f3 0f 11 56 ce - ; asm: movss %xmm5, 10000(%ecx) - [-] store.f32 v100, v0+10000 ; bin: heap_oob f3 0f 11 a9 00002710 - ; asm: movss %xmm2, -10000(%esi) - [-] store.f32 v101, v1-10000 ; bin: heap_oob f3 0f 11 96 ffffd8f0 - - ; Spill / Fill. - - ; asm: movss %xmm5, 1032(%esp) - [-,ss1] v200 = spill v100 ; bin: stk_ovf f3 0f 11 ac 24 00000408 - ; asm: movss %xmm2, 1032(%esp) - [-,ss1] v201 = spill v101 ; bin: stk_ovf f3 0f 11 94 24 00000408 - - ; asm: movss 1032(%esp), %xmm5 - [-,%xmm5] v210 = fill v200 ; bin: f3 0f 10 ac 24 00000408 - ; asm: movss 1032(%esp), %xmm2 - [-,%xmm2] v211 = fill v201 ; bin: f3 0f 10 94 24 00000408 - - ; asm: movss %xmm5, 1032(%esp) - regspill v100, %xmm5 -> ss1 ; bin: stk_ovf f3 0f 11 ac 24 00000408 - ; asm: movss 1032(%esp), %xmm5 - regfill v100, ss1 -> %xmm5 ; bin: f3 0f 10 ac 24 00000408 - - ; Comparisons. - ; - ; Only `supported_floatccs` are tested here. Others are handled by - ; legalization patterns. - - ; asm: ucomiss %xmm2, %xmm5 - ; asm: setnp %bl - [-,%rbx] v300 = fcmp ord v10, v11 ; bin: 0f 2e ea 0f 9b c3 - ; asm: ucomiss %xmm5, %xmm2 - ; asm: setp %bl - [-,%rbx] v301 = fcmp uno v11, v10 ; bin: 0f 2e d5 0f 9a c3 - ; asm: ucomiss %xmm2, %xmm5 - ; asm: setne %dl - [-,%rdx] v302 = fcmp one v10, v11 ; bin: 0f 2e ea 0f 95 c2 - ; asm: ucomiss %xmm5, %xmm2 - ; asm: sete %dl - [-,%rdx] v303 = fcmp ueq v11, v10 ; bin: 0f 2e d5 0f 94 c2 - ; asm: ucomiss %xmm2, %xmm5 - ; asm: seta %bl - [-,%rbx] v304 = fcmp gt v10, v11 ; bin: 0f 2e ea 0f 97 c3 - ; asm: ucomiss %xmm5, %xmm2 - ; asm: setae %bl - [-,%rbx] v305 = fcmp ge v11, v10 ; bin: 0f 2e d5 0f 93 c3 - ; asm: ucomiss %xmm2, %xmm5 - ; asm: setb %dl - [-,%rdx] v306 = fcmp ult v10, v11 ; bin: 0f 2e ea 0f 92 c2 - ; asm: ucomiss %xmm5, %xmm2 - ; asm: setbe %dl - [-,%rdx] v307 = fcmp ule v11, v10 ; bin: 0f 2e d5 0f 96 c2 - - ; asm: ucomiss %xmm2, %xmm5 - [-,%rflags] v310 = ffcmp v10, v11 ; bin: 0f 2e ea - ; asm: ucomiss %xmm2, %xmm5 - [-,%rflags] v311 = ffcmp v11, v10 ; bin: 0f 2e d5 - ; asm: ucomiss %xmm5, %xmm5 - [-,%rflags] v312 = ffcmp v10, v10 ; bin: 0f 2e ed - - ; Load/Store Complex - - [-,%rax] v350 = iconst.i32 1 - [-,%rbx] v351 = iconst.i32 2 - - ; asm: movss (%rax,%rbx,1),%xmm5 - [-,%xmm5] v352 = load_complex.f32 v350+v351 ; bin: heap_oob f3 0f 10 2c 18 - ; asm: movss 0x32(%rax,%rbx,1),%xmm5 - [-,%xmm5] v353 = load_complex.f32 v350+v351+50 ; bin: heap_oob f3 0f 10 6c 18 32 - ; asm: movss -0x32(%rax,%rbx,1),%xmm5 - [-,%xmm5] v354 = load_complex.f32 v350+v351-50 ; bin: heap_oob f3 0f 10 6c 18 ce - ; asm: movss 0x2710(%rax,%rbx,1),%xmm5 - [-,%xmm5] v355 = load_complex.f32 v350+v351+10000 ; bin: heap_oob f3 0f 10 ac 18 00002710 - ; asm: movss -0x2710(%rax,%rbx,1),%xmm5 - [-,%xmm5] v356 = load_complex.f32 v350+v351-10000 ; bin: heap_oob f3 0f 10 ac 18 ffffd8f0 - ; asm: movss %xmm5,(%rax,%rbx,1) - [-] store_complex.f32 v100, v350+v351 ; bin: heap_oob f3 0f 11 2c 18 - ; asm: movss %xmm5,0x32(%rax,%rbx,1) - [-] store_complex.f32 v100, v350+v351+50 ; bin: heap_oob f3 0f 11 6c 18 32 - ; asm: movss %xmm2,-0x32(%rax,%rbx,1) - [-] store_complex.f32 v101, v350+v351-50 ; bin: heap_oob f3 0f 11 54 18 ce - ; asm: movss %xmm5,0x2710(%rax,%rbx,1) - [-] store_complex.f32 v100, v350+v351+10000 ; bin: heap_oob f3 0f 11 ac 18 00002710 - ; asm: movss %xmm2,-0x2710(%rax,%rbx,1) - [-] store_complex.f32 v101, v350+v351-10000 ; bin: heap_oob f3 0f 11 94 18 ffffd8f0 - - return -} - -function %F64() { - ss0 = incoming_arg 8, offset 0 - ss1 = incoming_arg 1024, offset -1024 - ss2 = incoming_arg 1024, offset -2048 - ss3 = incoming_arg 8, offset -2056 - -block0: - [-,%rcx] v0 = iconst.i32 1 - [-,%rsi] v1 = iconst.i32 2 - - ; asm: cvtsi2sd %ecx, %xmm5 - [-,%xmm5] v10 = fcvt_from_sint.f64 v0 ; bin: f2 0f 2a e9 - ; asm: cvtsi2sd %esi, %xmm2 - [-,%xmm2] v11 = fcvt_from_sint.f64 v1 ; bin: f2 0f 2a d6 - - ; asm: cvtsd2ss %xmm2, %xmm5 - [-,%xmm5] v12 = fdemote.f32 v11 ; bin: f2 0f 5a ea - ; asm: cvtsd2ss %xmm5, %xmm2 - [-,%xmm2] v13 = fdemote.f32 v10 ; bin: f2 0f 5a d5 - - ; No i64 <-> f64 bitcasts in 32-bit mode. - - ; asm: movaps %xmm2, %xmm5 - [-,%xmm5] v18 = copy v11 ; bin: 0f 28 ea - ; asm: movaps %xmm5, %xmm2 - [-,%xmm2] v19 = copy v10 ; bin: 0f 28 d5 - - ; asm: movaps %xmm2, %xmm5 - regmove v19, %xmm2 -> %xmm5 ; bin: 0f 28 ea - ; asm: movaps %xmm5, %xmm2 - regmove v19, %xmm5 -> %xmm2 ; bin: 0f 28 d5 - - ; Binary arithmetic. - - ; asm: addsd %xmm2, %xmm5 - [-,%xmm5] v20 = fadd v10, v11 ; bin: f2 0f 58 ea - ; asm: addsd %xmm5, %xmm2 - [-,%xmm2] v21 = fadd v11, v10 ; bin: f2 0f 58 d5 - - ; asm: subsd %xmm2, %xmm5 - [-,%xmm5] v22 = fsub v10, v11 ; bin: f2 0f 5c ea - ; asm: subsd %xmm5, %xmm2 - [-,%xmm2] v23 = fsub v11, v10 ; bin: f2 0f 5c d5 - - ; asm: mulsd %xmm2, %xmm5 - [-,%xmm5] v24 = fmul v10, v11 ; bin: f2 0f 59 ea - ; asm: mulsd %xmm5, %xmm2 - [-,%xmm2] v25 = fmul v11, v10 ; bin: f2 0f 59 d5 - - ; asm: divsd %xmm2, %xmm5 - [-,%xmm5] v26 = fdiv v10, v11 ; bin: f2 0f 5e ea - ; asm: divsd %xmm5, %xmm2 - [-,%xmm2] v27 = fdiv v11, v10 ; bin: f2 0f 5e d5 - - ; Bitwise ops. - ; We use the *ps SSE instructions for everything because they are smaller. - - ; asm: andps %xmm2, %xmm5 - [-,%xmm5] v30 = band v10, v11 ; bin: 0f 54 ea - ; asm: andps %xmm5, %xmm2 - [-,%xmm2] v31 = band v11, v10 ; bin: 0f 54 d5 - - ; asm: andnps %xmm2, %xmm5 - [-,%xmm5] v32 = band_not v11, v10 ; bin: 0f 55 ea - ; asm: andnps %xmm5, %xmm2 - [-,%xmm2] v33 = band_not v10, v11 ; bin: 0f 55 d5 - - ; asm: orps %xmm2, %xmm5 - [-,%xmm5] v34 = bor v10, v11 ; bin: 0f 56 ea - ; asm: orps %xmm5, %xmm2 - [-,%xmm2] v35 = bor v11, v10 ; bin: 0f 56 d5 - - ; asm: xorps %xmm2, %xmm5 - [-,%xmm5] v36 = bxor v10, v11 ; bin: 0f 57 ea - ; asm: xorps %xmm5, %xmm2 - [-,%xmm2] v37 = bxor v11, v10 ; bin: 0f 57 d5 - - ; Convert float to int. (No i64 dest on i386). - - ; asm: cvttsd2si %xmm5, %ecx - [-,%rcx] v40 = x86_cvtt2si.i32 v10 ; bin: f2 0f 2c cd - ; asm: cvttsd2si %xmm2, %esi - [-,%rsi] v41 = x86_cvtt2si.i32 v11 ; bin: f2 0f 2c f2 - - ; Min/max. - - ; asm: minsd %xmm2, %xmm5 - [-,%xmm5] v42 = x86_fmin v10, v11 ; bin: f2 0f 5d ea - ; asm: minsd %xmm5, %xmm2 - [-,%xmm2] v43 = x86_fmin v11, v10 ; bin: f2 0f 5d d5 - ; asm: maxsd %xmm2, %xmm5 - [-,%xmm5] v44 = x86_fmax v10, v11 ; bin: f2 0f 5f ea - ; asm: maxsd %xmm5, %xmm2 - [-,%xmm2] v45 = x86_fmax v11, v10 ; bin: f2 0f 5f d5 - - ; Unary arithmetic. - - ; asm: sqrtsd %xmm5, %xmm2 - [-,%xmm2] v50 = sqrt v10 ; bin: f2 0f 51 d5 - ; asm: sqrtsd %xmm2, %xmm5 - [-,%xmm5] v51 = sqrt v11 ; bin: f2 0f 51 ea - - ; asm: roundsd $0, %xmm5, %xmm4 - [-,%xmm4] v52 = nearest v10 ; bin: 66 0f 3a 0b e5 00 - ; asm: roundsd $0, %xmm2, %xmm5 - [-,%xmm5] v53 = nearest v11 ; bin: 66 0f 3a 0b ea 00 - ; asm: roundsd $0, %xmm5, %xmm2 - [-,%xmm2] v54 = nearest v10 ; bin: 66 0f 3a 0b d5 00 - - ; asm: roundsd $1, %xmm5, %xmm4 - [-,%xmm4] v55 = floor v10 ; bin: 66 0f 3a 0b e5 01 - ; asm: roundsd $1, %xmm2, %xmm5 - [-,%xmm5] v56 = floor v11 ; bin: 66 0f 3a 0b ea 01 - ; asm: roundsd $1, %xmm5, %xmm2 - [-,%xmm2] v57 = floor v10 ; bin: 66 0f 3a 0b d5 01 - - ; asm: roundsd $2, %xmm5, %xmm4 - [-,%xmm4] v58 = ceil v10 ; bin: 66 0f 3a 0b e5 02 - ; asm: roundsd $2, %xmm2, %xmm5 - [-,%xmm5] v59 = ceil v11 ; bin: 66 0f 3a 0b ea 02 - ; asm: roundsd $2, %xmm5, %xmm2 - [-,%xmm2] v60 = ceil v10 ; bin: 66 0f 3a 0b d5 02 - - ; asm: roundsd $3, %xmm5, %xmm4 - [-,%xmm4] v61 = trunc v10 ; bin: 66 0f 3a 0b e5 03 - ; asm: roundsd $3, %xmm2, %xmm5 - [-,%xmm5] v62 = trunc v11 ; bin: 66 0f 3a 0b ea 03 - ; asm: roundsd $3, %xmm5, %xmm2 - [-,%xmm2] v63 = trunc v10 ; bin: 66 0f 3a 0b d5 03 - - ; Load/Store - - ; asm: movsd (%ecx), %xmm5 - [-,%xmm5] v100 = load.f64 v0 ; bin: heap_oob f2 0f 10 29 - ; asm: movsd (%esi), %xmm2 - [-,%xmm2] v101 = load.f64 v1 ; bin: heap_oob f2 0f 10 16 - ; asm: movsd 50(%ecx), %xmm5 - [-,%xmm5] v110 = load.f64 v0+50 ; bin: heap_oob f2 0f 10 69 32 - ; asm: movsd -50(%esi), %xmm2 - [-,%xmm2] v111 = load.f64 v1-50 ; bin: heap_oob f2 0f 10 56 ce - ; asm: movsd 10000(%ecx), %xmm5 - [-,%xmm5] v120 = load.f64 v0+10000 ; bin: heap_oob f2 0f 10 a9 00002710 - ; asm: movsd -10000(%esi), %xmm2 - [-,%xmm2] v121 = load.f64 v1-10000 ; bin: heap_oob f2 0f 10 96 ffffd8f0 - - ; asm: movsd %xmm5, (%ecx) - [-] store.f64 v100, v0 ; bin: heap_oob f2 0f 11 29 - ; asm: movsd %xmm2, (%esi) - [-] store.f64 v101, v1 ; bin: heap_oob f2 0f 11 16 - ; asm: movsd %xmm5, 50(%ecx) - [-] store.f64 v100, v0+50 ; bin: heap_oob f2 0f 11 69 32 - ; asm: movsd %xmm2, -50(%esi) - [-] store.f64 v101, v1-50 ; bin: heap_oob f2 0f 11 56 ce - ; asm: movsd %xmm5, 10000(%ecx) - [-] store.f64 v100, v0+10000 ; bin: heap_oob f2 0f 11 a9 00002710 - ; asm: movsd %xmm2, -10000(%esi) - [-] store.f64 v101, v1-10000 ; bin: heap_oob f2 0f 11 96 ffffd8f0 - - ; Spill / Fill. - - ; asm: movsd %xmm5, 1032(%esp) - [-,ss1] v200 = spill v100 ; bin: stk_ovf f2 0f 11 ac 24 00000408 - ; asm: movsd %xmm2, 1032(%esp) - [-,ss1] v201 = spill v101 ; bin: stk_ovf f2 0f 11 94 24 00000408 - - ; asm: movsd 1032(%esp), %xmm5 - [-,%xmm5] v210 = fill v200 ; bin: f2 0f 10 ac 24 00000408 - ; asm: movsd 1032(%esp), %xmm2 - [-,%xmm2] v211 = fill v201 ; bin: f2 0f 10 94 24 00000408 - - ; asm: movsd %xmm5, 1032(%esp) - regspill v100, %xmm5 -> ss1 ; bin: stk_ovf f2 0f 11 ac 24 00000408 - ; asm: movsd 1032(%esp), %xmm5 - regfill v100, ss1 -> %xmm5 ; bin: f2 0f 10 ac 24 00000408 - - ; Comparisons. - ; - ; Only `supported_floatccs` are tested here. Others are handled by - ; legalization patterns. - - ; asm: ucomisd %xmm2, %xmm5 - ; asm: setnp %bl - [-,%rbx] v300 = fcmp ord v10, v11 ; bin: 66 0f 2e ea 0f 9b c3 - ; asm: ucomisd %xmm5, %xmm2 - ; asm: setp %bl - [-,%rbx] v301 = fcmp uno v11, v10 ; bin: 66 0f 2e d5 0f 9a c3 - ; asm: ucomisd %xmm2, %xmm5 - ; asm: setne %dl - [-,%rdx] v302 = fcmp one v10, v11 ; bin: 66 0f 2e ea 0f 95 c2 - ; asm: ucomisd %xmm5, %xmm2 - ; asm: sete %dl - [-,%rdx] v303 = fcmp ueq v11, v10 ; bin: 66 0f 2e d5 0f 94 c2 - ; asm: ucomisd %xmm2, %xmm5 - ; asm: seta %bl - [-,%rbx] v304 = fcmp gt v10, v11 ; bin: 66 0f 2e ea 0f 97 c3 - ; asm: ucomisd %xmm5, %xmm2 - ; asm: setae %bl - [-,%rbx] v305 = fcmp ge v11, v10 ; bin: 66 0f 2e d5 0f 93 c3 - ; asm: ucomisd %xmm2, %xmm5 - ; asm: setb %dl - [-,%rdx] v306 = fcmp ult v10, v11 ; bin: 66 0f 2e ea 0f 92 c2 - ; asm: ucomisd %xmm5, %xmm2 - ; asm: setbe %dl - [-,%rdx] v307 = fcmp ule v11, v10 ; bin: 66 0f 2e d5 0f 96 c2 - - ; asm: ucomisd %xmm2, %xmm5 - [-,%rflags] v310 = ffcmp v10, v11 ; bin: 66 0f 2e ea - ; asm: ucomisd %xmm2, %xmm5 - [-,%rflags] v311 = ffcmp v11, v10 ; bin: 66 0f 2e d5 - ; asm: ucomisd %xmm5, %xmm5 - [-,%rflags] v312 = ffcmp v10, v10 ; bin: 66 0f 2e ed - - return -} - -function %cpuflags_float(f32 [%xmm0]) { -block0(v0: f32 [%xmm0]): - ; asm: ucomiss %xmm0, %xmm0 - [-,%rflags] v1 = ffcmp v0, v0 ; bin: 0f 2e c0 - - jump block1 - -block1: - ; asm: jnp block1 - brff ord v1, block1 ; bin: 7b fe - jump block2 - -block2: - ; asm: jp block1 - brff uno v1, block1 ; bin: 7a fc - jump block3 - -block3: - ; asm: jne block1 - brff one v1, block1 ; bin: 75 fa - jump block4 - -block4: - ; asm: je block1 - brff ueq v1, block1 ; bin: 74 f8 - jump block5 - -block5: - ; asm: ja block1 - brff gt v1, block1 ; bin: 77 f6 - jump block6 - -block6: - ; asm: jae block1 - brff ge v1, block1 ; bin: 73 f4 - jump block7 - -block7: - ; asm: jb block1 - brff ult v1, block1 ; bin: 72 f2 - jump block8 - -block8: - ; asm: jbe block1 - brff ule v1, block1 ; bin: 76 f0 - jump block9 - -block9: - ; asm: jp .+4; ud2 - trapff ord v1, user0 ; bin: 7a 02 user0 0f 0b - ; asm: jnp .+4; ud2 - trapff uno v1, user0 ; bin: 7b 02 user0 0f 0b - ; asm: je .+4; ud2 - trapff one v1, user0 ; bin: 74 02 user0 0f 0b - ; asm: jne .+4; ud2 - trapff ueq v1, user0 ; bin: 75 02 user0 0f 0b - ; asm: jna .+4; ud2 - trapff gt v1, user0 ; bin: 76 02 user0 0f 0b - ; asm: jnae .+4; ud2 - trapff ge v1, user0 ; bin: 72 02 user0 0f 0b - ; asm: jnb .+4; ud2 - trapff ult v1, user0 ; bin: 73 02 user0 0f 0b - ; asm: jnbe .+4; ud2 - trapff ule v1, user0 ; bin: 77 02 user0 0f 0b - - ; asm: setnp %bl - [-,%rbx] v10 = trueff ord v1 ; bin: 0f 9b c3 - ; asm: setp %bl - [-,%rbx] v11 = trueff uno v1 ; bin: 0f 9a c3 - ; asm: setne %dl - [-,%rdx] v12 = trueff one v1 ; bin: 0f 95 c2 - ; asm: sete %dl - [-,%rdx] v13 = trueff ueq v1 ; bin: 0f 94 c2 - ; asm: seta %al - [-,%rax] v14 = trueff gt v1 ; bin: 0f 97 c0 - ; asm: setae %al - [-,%rax] v15 = trueff ge v1 ; bin: 0f 93 c0 - ; asm: setb %cl - [-,%rcx] v16 = trueff ult v1 ; bin: 0f 92 c1 - ; asm: setbe %cl - [-,%rcx] v17 = trueff ule v1 ; bin: 0f 96 c1 - - return -} diff --git a/cranelift/filetests/filetests/isa/x86/binary32.clif b/cranelift/filetests/filetests/isa/x86/binary32.clif deleted file mode 100644 index 11268d5c4e..0000000000 --- a/cranelift/filetests/filetests/isa/x86/binary32.clif +++ /dev/null @@ -1,721 +0,0 @@ -; binary emission of x86-32 code. -test binemit -set opt_level=speed_and_size -target i686 legacy haswell - -; The binary encodings can be verified with the command: -; -; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/binary32.clif | llvm-mc -show-encoding -triple=i386 -; - -function %I32() { - sig0 = () - fn0 = %foo() - - gv0 = symbol %some_gv - - ss0 = incoming_arg 8, offset 0 - ss1 = incoming_arg 1024, offset -1024 - ss2 = incoming_arg 1024, offset -2048 - ss3 = incoming_arg 8, offset -2056 - -block0: - ; asm: movl $1, %ecx - [-,%rcx] v1 = iconst.i32 1 ; bin: b9 00000001 - ; asm: movl $2, %esi - [-,%rsi] v2 = iconst.i32 2 ; bin: be 00000002 - - ; asm: movb $1, %cl - [-,%rcx] v9007 = bconst.b1 true ; bin: b9 00000001 - - ; Integer Register-Register Operations. - - ; asm: addl %esi, %ecx - [-,%rcx] v10 = iadd v1, v2 ; bin: 01 f1 - ; asm: addl %ecx, %esi - [-,%rsi] v11 = iadd v2, v1 ; bin: 01 ce - ; asm: subl %esi, %ecx - [-,%rcx] v12 = isub v1, v2 ; bin: 29 f1 - ; asm: subl %ecx, %esi - [-,%rsi] v13 = isub v2, v1 ; bin: 29 ce - - ; asm: andl %esi, %ecx - [-,%rcx] v14 = band v1, v2 ; bin: 21 f1 - ; asm: andl %ecx, %esi - [-,%rsi] v15 = band v2, v1 ; bin: 21 ce - ; asm: orl %esi, %ecx - [-,%rcx] v16 = bor v1, v2 ; bin: 09 f1 - ; asm: orl %ecx, %esi - [-,%rsi] v17 = bor v2, v1 ; bin: 09 ce - ; asm: xorl %esi, %ecx - [-,%rcx] v18 = bxor v1, v2 ; bin: 31 f1 - ; asm: xorl %ecx, %esi - [-,%rsi] v19 = bxor v2, v1 ; bin: 31 ce - - ; Dynamic shifts take the shift amount in %rcx. - - ; asm: shll %cl, %esi - [-,%rsi] v20 = ishl v2, v1 ; bin: d3 e6 - ; asm: shll %cl, %ecx - [-,%rcx] v21 = ishl v1, v1 ; bin: d3 e1 - ; asm: shrl %cl, %esi - [-,%rsi] v22 = ushr v2, v1 ; bin: d3 ee - ; asm: shrl %cl, %ecx - [-,%rcx] v23 = ushr v1, v1 ; bin: d3 e9 - ; asm: sarl %cl, %esi - [-,%rsi] v24 = sshr v2, v1 ; bin: d3 fe - ; asm: sarl %cl, %ecx - [-,%rcx] v25 = sshr v1, v1 ; bin: d3 f9 - ; asm: roll %cl, %esi - [-,%rsi] v26 = rotl v2, v1 ; bin: d3 c6 - ; asm: roll %cl, %ecx - [-,%rcx] v27 = rotl v1, v1 ; bin: d3 c1 - ; asm: rorl %cl, %esi - [-,%rsi] v28 = rotr v2, v1 ; bin: d3 ce - ; asm: rorl %cl, %ecx - [-,%rcx] v29 = rotr v1, v1 ; bin: d3 c9 - - ; Integer Register - Immediate 8-bit operations. - ; The 8-bit immediate is sign-extended. - - ; asm: addl $-128, %ecx - [-,%rcx] v30 = iadd_imm v1, -128 ; bin: 83 c1 80 - ; asm: addl $10, %esi - [-,%rsi] v31 = iadd_imm v2, 10 ; bin: 83 c6 0a - - ; asm: andl $-128, %ecx - [-,%rcx] v32 = band_imm v1, -128 ; bin: 83 e1 80 - ; asm: andl $10, %esi - [-,%rsi] v33 = band_imm v2, 10 ; bin: 83 e6 0a - ; asm: orl $-128, %ecx - [-,%rcx] v34 = bor_imm v1, -128 ; bin: 83 c9 80 - ; asm: orl $10, %esi - [-,%rsi] v35 = bor_imm v2, 10 ; bin: 83 ce 0a - ; asm: xorl $-128, %ecx - [-,%rcx] v36 = bxor_imm v1, -128 ; bin: 83 f1 80 - ; asm: xorl $10, %esi - [-,%rsi] v37 = bxor_imm v2, 10 ; bin: 83 f6 0a - - ; Integer Register - Immediate 32-bit operations. - - ; asm: addl $-128000, %ecx - [-,%rcx] v40 = iadd_imm v1, -128000 ; bin: 81 c1 fffe0c00 - ; asm: addl $1000000, %esi - [-,%rsi] v41 = iadd_imm v2, 1000000 ; bin: 81 c6 000f4240 - - ; asm: andl $-128000, %ecx - [-,%rcx] v42 = band_imm v1, -128000 ; bin: 81 e1 fffe0c00 - ; asm: andl $1000000, %esi - [-,%rsi] v43 = band_imm v2, 1000000 ; bin: 81 e6 000f4240 - ; asm: orl $-128000, %ecx - [-,%rcx] v44 = bor_imm v1, -128000 ; bin: 81 c9 fffe0c00 - ; asm: orl $1000000, %esi - [-,%rsi] v45 = bor_imm v2, 1000000 ; bin: 81 ce 000f4240 - ; asm: xorl $-128000, %ecx - [-,%rcx] v46 = bxor_imm v1, -128000 ; bin: 81 f1 fffe0c00 - ; asm: xorl $1000000, %esi - [-,%rsi] v47 = bxor_imm v2, 1000000 ; bin: 81 f6 000f4240 - - ; More arithmetic. - - ; asm: imull %esi, %ecx - [-,%rcx] v50 = imul v1, v2 ; bin: 0f af ce - ; asm: imull %ecx, %esi - [-,%rsi] v51 = imul v2, v1 ; bin: 0f af f1 - - ; asm: movl $1, %eax - [-,%rax] v52 = iconst.i32 1 ; bin: b8 00000001 - ; asm: movl $2, %edx - [-,%rdx] v53 = iconst.i32 2 ; bin: ba 00000002 - ; asm: idivl %ecx - [-,%rax,%rdx] v54, v55 = x86_sdivmodx v52, v53, v1 ; bin: int_divz f7 f9 - ; asm: idivl %esi - [-,%rax,%rdx] v56, v57 = x86_sdivmodx v52, v53, v2 ; bin: int_divz f7 fe - ; asm: divl %ecx - [-,%rax,%rdx] v58, v59 = x86_udivmodx v52, v53, v1 ; bin: int_divz f7 f1 - ; asm: divl %esi - [-,%rax,%rdx] v60, v61 = x86_udivmodx v52, v53, v2 ; bin: int_divz f7 f6 - - ; Register copies. - - ; asm: movl %esi, %ecx - [-,%rcx] v80 = copy v2 ; bin: 89 f1 - ; asm: movl %ecx, %esi - [-,%rsi] v81 = copy v1 ; bin: 89 ce - - ; Copy Special - ; asm: movl %esp, %ebp - copy_special %rsp -> %rbp ; bin: 89 e5 - ; asm: movl %ebp, %esp - copy_special %rbp -> %rsp ; bin: 89 ec - - - ; Load/Store instructions. - - ; Register indirect addressing with no displacement. - - ; asm: movl %ecx, (%esi) - store v1, v2 ; bin: heap_oob 89 0e - ; asm: movl %esi, (%ecx) - store v2, v1 ; bin: heap_oob 89 31 - ; asm: movw %cx, (%esi) - istore16 v1, v2 ; bin: heap_oob 66 89 0e - ; asm: movw %si, (%ecx) - istore16 v2, v1 ; bin: heap_oob 66 89 31 - ; asm: movb %cl, (%esi) - istore8 v1, v2 ; bin: heap_oob 88 0e - ; Can't store %sil in 32-bit mode (needs REX prefix). - - ; asm: movl (%ecx), %edi - [-,%rdi] v100 = load.i32 v1 ; bin: heap_oob 8b 39 - ; asm: movl (%esi), %edx - [-,%rdx] v101 = load.i32 v2 ; bin: heap_oob 8b 16 - ; asm: movzwl (%ecx), %edi - [-,%rdi] v102 = uload16.i32 v1 ; bin: heap_oob 0f b7 39 - ; asm: movzwl (%esi), %edx - [-,%rdx] v103 = uload16.i32 v2 ; bin: heap_oob 0f b7 16 - ; asm: movswl (%ecx), %edi - [-,%rdi] v104 = sload16.i32 v1 ; bin: heap_oob 0f bf 39 - ; asm: movswl (%esi), %edx - [-,%rdx] v105 = sload16.i32 v2 ; bin: heap_oob 0f bf 16 - ; asm: movzbl (%ecx), %edi - [-,%rdi] v106 = uload8.i32 v1 ; bin: heap_oob 0f b6 39 - ; asm: movzbl (%esi), %edx - [-,%rdx] v107 = uload8.i32 v2 ; bin: heap_oob 0f b6 16 - ; asm: movsbl (%ecx), %edi - [-,%rdi] v108 = sload8.i32 v1 ; bin: heap_oob 0f be 39 - ; asm: movsbl (%esi), %edx - [-,%rdx] v109 = sload8.i32 v2 ; bin: heap_oob 0f be 16 - - ; Register-indirect with 8-bit signed displacement. - - ; asm: movl %ecx, 100(%esi) - store v1, v2+100 ; bin: heap_oob 89 4e 64 - ; asm: movl %esi, -100(%ecx) - store v2, v1-100 ; bin: heap_oob 89 71 9c - ; asm: movw %cx, 100(%esi) - istore16 v1, v2+100 ; bin: heap_oob 66 89 4e 64 - ; asm: movw %si, -100(%ecx) - istore16 v2, v1-100 ; bin: heap_oob 66 89 71 9c - ; asm: movb %cl, 100(%esi) - istore8 v1, v2+100 ; bin: heap_oob 88 4e 64 - - ; asm: movl 50(%ecx), %edi - [-,%rdi] v110 = load.i32 v1+50 ; bin: heap_oob 8b 79 32 - ; asm: movl -50(%esi), %edx - [-,%rdx] v111 = load.i32 v2-50 ; bin: heap_oob 8b 56 ce - ; asm: movzwl 50(%ecx), %edi - [-,%rdi] v112 = uload16.i32 v1+50 ; bin: heap_oob 0f b7 79 32 - ; asm: movzwl -50(%esi), %edx - [-,%rdx] v113 = uload16.i32 v2-50 ; bin: heap_oob 0f b7 56 ce - ; asm: movswl 50(%ecx), %edi - [-,%rdi] v114 = sload16.i32 v1+50 ; bin: heap_oob 0f bf 79 32 - ; asm: movswl -50(%esi), %edx - [-,%rdx] v115 = sload16.i32 v2-50 ; bin: heap_oob 0f bf 56 ce - ; asm: movzbl 50(%ecx), %edi - [-,%rdi] v116 = uload8.i32 v1+50 ; bin: heap_oob 0f b6 79 32 - ; asm: movzbl -50(%esi), %edx - [-,%rdx] v117 = uload8.i32 v2-50 ; bin: heap_oob 0f b6 56 ce - ; asm: movsbl 50(%ecx), %edi - [-,%rdi] v118 = sload8.i32 v1+50 ; bin: heap_oob 0f be 79 32 - ; asm: movsbl -50(%esi), %edx - [-,%rdx] v119 = sload8.i32 v2-50 ; bin: heap_oob 0f be 56 ce - - ; Register-indirect with 32-bit signed displacement. - - ; asm: movl %ecx, 10000(%esi) - store v1, v2+10000 ; bin: heap_oob 89 8e 00002710 - ; asm: movl %esi, -10000(%ecx) - store v2, v1-10000 ; bin: heap_oob 89 b1 ffffd8f0 - ; asm: movw %cx, 10000(%esi) - istore16 v1, v2+10000 ; bin: heap_oob 66 89 8e 00002710 - ; asm: movw %si, -10000(%ecx) - istore16 v2, v1-10000 ; bin: heap_oob 66 89 b1 ffffd8f0 - ; asm: movb %cl, 10000(%esi) - istore8 v1, v2+10000 ; bin: heap_oob 88 8e 00002710 - - ; asm: movl 50000(%ecx), %edi - [-,%rdi] v120 = load.i32 v1+50000 ; bin: heap_oob 8b b9 0000c350 - ; asm: movl -50000(%esi), %edx - [-,%rdx] v121 = load.i32 v2-50000 ; bin: heap_oob 8b 96 ffff3cb0 - ; asm: movzwl 50000(%ecx), %edi - [-,%rdi] v122 = uload16.i32 v1+50000 ; bin: heap_oob 0f b7 b9 0000c350 - ; asm: movzwl -50000(%esi), %edx - [-,%rdx] v123 = uload16.i32 v2-50000 ; bin: heap_oob 0f b7 96 ffff3cb0 - ; asm: movswl 50000(%ecx), %edi - [-,%rdi] v124 = sload16.i32 v1+50000 ; bin: heap_oob 0f bf b9 0000c350 - ; asm: movswl -50000(%esi), %edx - [-,%rdx] v125 = sload16.i32 v2-50000 ; bin: heap_oob 0f bf 96 ffff3cb0 - ; asm: movzbl 50000(%ecx), %edi - [-,%rdi] v126 = uload8.i32 v1+50000 ; bin: heap_oob 0f b6 b9 0000c350 - ; asm: movzbl -50000(%esi), %edx - [-,%rdx] v127 = uload8.i32 v2-50000 ; bin: heap_oob 0f b6 96 ffff3cb0 - ; asm: movsbl 50000(%ecx), %edi - [-,%rdi] v128 = sload8.i32 v1+50000 ; bin: heap_oob 0f be b9 0000c350 - ; asm: movsbl -50000(%esi), %edx - [-,%rdx] v129 = sload8.i32 v2-50000 ; bin: heap_oob 0f be 96 ffff3cb0 - - ; Bit-counting instructions. - - ; asm: popcntl %esi, %ecx - [-,%rcx] v200 = popcnt v2 ; bin: f3 0f b8 ce - ; asm: popcntl %ecx, %esi - [-,%rsi] v201 = popcnt v1 ; bin: f3 0f b8 f1 - - ; asm: lzcntl %esi, %ecx - [-,%rcx] v202 = clz v2 ; bin: f3 0f bd ce - ; asm: lzcntl %ecx, %esi - [-,%rsi] v203 = clz v1 ; bin: f3 0f bd f1 - - ; asm: tzcntl %esi, %ecx - [-,%rcx] v204 = ctz v2 ; bin: f3 0f bc ce - ; asm: tzcntl %ecx, %esi - [-,%rsi] v205 = ctz v1 ; bin: f3 0f bc f1 - - ; Integer comparisons. - - ; asm: cmpl %esi, %ecx - ; asm: sete %bl - [-,%rbx] v300 = icmp eq v1, v2 ; bin: 39 f1 0f 94 c3 - ; asm: cmpl %ecx, %esi - ; asm: sete %dl - [-,%rdx] v301 = icmp eq v2, v1 ; bin: 39 ce 0f 94 c2 - - ; asm: cmpl %esi, %ecx - ; asm: setne %bl - [-,%rbx] v302 = icmp ne v1, v2 ; bin: 39 f1 0f 95 c3 - ; asm: cmpl %ecx, %esi - ; asm: setne %dl - [-,%rdx] v303 = icmp ne v2, v1 ; bin: 39 ce 0f 95 c2 - - ; asm: cmpl %esi, %ecx - ; asm: setl %bl - [-,%rbx] v304 = icmp slt v1, v2 ; bin: 39 f1 0f 9c c3 - ; asm: cmpl %ecx, %esi - ; asm: setl %dl - [-,%rdx] v305 = icmp slt v2, v1 ; bin: 39 ce 0f 9c c2 - - ; asm: cmpl %esi, %ecx - ; asm: setge %bl - [-,%rbx] v306 = icmp sge v1, v2 ; bin: 39 f1 0f 9d c3 - ; asm: cmpl %ecx, %esi - ; asm: setge %dl - [-,%rdx] v307 = icmp sge v2, v1 ; bin: 39 ce 0f 9d c2 - - ; asm: cmpl %esi, %ecx - ; asm: setg %bl - [-,%rbx] v308 = icmp sgt v1, v2 ; bin: 39 f1 0f 9f c3 - ; asm: cmpl %ecx, %esi - ; asm: setg %dl - [-,%rdx] v309 = icmp sgt v2, v1 ; bin: 39 ce 0f 9f c2 - - ; asm: cmpl %esi, %ecx - ; asm: setle %bl - [-,%rbx] v310 = icmp sle v1, v2 ; bin: 39 f1 0f 9e c3 - ; asm: cmpl %ecx, %esi - ; asm: setle %dl - [-,%rdx] v311 = icmp sle v2, v1 ; bin: 39 ce 0f 9e c2 - - ; asm: cmpl %esi, %ecx - ; asm: setb %bl - [-,%rbx] v312 = icmp ult v1, v2 ; bin: 39 f1 0f 92 c3 - ; asm: cmpl %ecx, %esi - ; asm: setb %dl - [-,%rdx] v313 = icmp ult v2, v1 ; bin: 39 ce 0f 92 c2 - - ; asm: cmpl %esi, %ecx - ; asm: setae %bl - [-,%rbx] v314 = icmp uge v1, v2 ; bin: 39 f1 0f 93 c3 - ; asm: cmpl %ecx, %esi - ; asm: setae %dl - [-,%rdx] v315 = icmp uge v2, v1 ; bin: 39 ce 0f 93 c2 - - ; asm: cmpl %esi, %ecx - ; asm: seta %bl - [-,%rbx] v316 = icmp ugt v1, v2 ; bin: 39 f1 0f 97 c3 - ; asm: cmpl %ecx, %esi - ; asm: seta %dl - [-,%rdx] v317 = icmp ugt v2, v1 ; bin: 39 ce 0f 97 c2 - - ; asm: cmpl %esi, %ecx - ; asm: setbe %bl - [-,%rbx] v318 = icmp ule v1, v2 ; bin: 39 f1 0f 96 c3 - ; asm: cmpl %ecx, %esi - ; asm: setbe %dl - [-,%rdx] v319 = icmp ule v2, v1 ; bin: 39 ce 0f 96 c2 - - ; Bool-to-int conversions. - - ; asm: movzbl %bl, %ecx - [-,%rcx] v350 = bint.i32 v300 ; bin: 0f b6 cb - ; asm: movzbl %dl, %esi - [-,%rsi] v351 = bint.i32 v301 ; bin: 0f b6 f2 - - ; asm: call foo - call fn0() ; bin: stk_ovf e8 CallPCRel4(%foo-4) 00000000 - - ; asm: movl $0, %ecx - [-,%rcx] v400 = func_addr.i32 fn0 ; bin: b9 Abs4(%foo) 00000000 - ; asm: movl $0, %esi - [-,%rsi] v401 = func_addr.i32 fn0 ; bin: be Abs4(%foo) 00000000 - - ; asm: call *%ecx - call_indirect sig0, v400() ; bin: stk_ovf ff d1 - ; asm: call *%esi - call_indirect sig0, v401() ; bin: stk_ovf ff d6 - - ; asm: movl $0, %ecx - [-,%rcx] v450 = symbol_value.i32 gv0 ; bin: b9 Abs4(%some_gv) 00000000 - ; asm: movl $0, %esi - [-,%rsi] v451 = symbol_value.i32 gv0 ; bin: be Abs4(%some_gv) 00000000 - - ; Spill / Fill. - - ; asm: movl %ecx, 1032(%esp) - [-,ss1] v500 = spill v1 ; bin: stk_ovf 89 8c 24 00000408 - ; asm: movl %esi, 1032(%esp) - [-,ss1] v501 = spill v2 ; bin: stk_ovf 89 b4 24 00000408 - - ; asm: movl 1032(%esp), %ecx - [-,%rcx] v510 = fill v500 ; bin: 8b 8c 24 00000408 - ; asm: movl 1032(%esp), %esi - [-,%rsi] v511 = fill v501 ; bin: 8b b4 24 00000408 - - ; asm: movl %ecx, 1032(%esp) - regspill v1, %rcx -> ss1 ; bin: stk_ovf 89 8c 24 00000408 - ; asm: movl 1032(%esp), %ecx - regfill v1, ss1 -> %rcx ; bin: 8b 8c 24 00000408 - - ; Push and Pop - ; asm: pushl %ecx - x86_push v1 ; bin: stk_ovf 51 - ; asm: popl %ecx - [-,%rcx] v512 = x86_pop.i32 ; bin: 59 - - ; Adjust Stack Pointer Up - ; asm: addl $64, %esp - adjust_sp_up_imm 64 ; bin: 83 c4 40 - ; asm: addl $-64, %esp - adjust_sp_up_imm -64 ; bin: 83 c4 c0 - ; asm: addl $1024, %esp - adjust_sp_up_imm 1024 ; bin: 81 c4 00000400 - ; asm: addl $-1024, %esp - adjust_sp_up_imm -1024 ; bin: 81 c4 fffffc00 - ; asm: addl $2147483647, %esp - adjust_sp_up_imm 2147483647 ; bin: 81 c4 7fffffff - ; asm: addl $-2147483648, %esp - adjust_sp_up_imm -2147483648 ; bin: 81 c4 80000000 - - ; Adjust Stack Pointer Down - ; asm: subl %ecx, %esp - adjust_sp_down v1 ; bin: 29 cc - ; asm: subl %esi, %esp - adjust_sp_down v2 ; bin: 29 f4 - ; asm: addl $64, %esp - adjust_sp_down_imm 64 ; bin: 83 ec 40 - ; asm: addl $-64, %esp - adjust_sp_down_imm -64 ; bin: 83 ec c0 - ; asm: addl $1024, %esp - adjust_sp_down_imm 1024 ; bin: 81 ec 00000400 - ; asm: addl $-1024, %esp - adjust_sp_down_imm -1024 ; bin: 81 ec fffffc00 - ; asm: addl $2147483647, %esp - adjust_sp_down_imm 2147483647 ; bin: 81 ec 7fffffff - ; asm: addl $-2147483648, %esp - adjust_sp_down_imm -2147483648 ; bin: 81 ec 80000000 - - ; Shift immediates - ; asm: shll $2, %esi - [-,%rsi] v513 = ishl_imm v2, 2 ; bin: c1 e6 02 - ; asm: sarl $5, %esi - [-,%rsi] v514 = sshr_imm v2, 5 ; bin: c1 fe 05 - ; asm: shrl $8, %esi - [-,%rsi] v515 = ushr_imm v2, 8 ; bin: c1 ee 08 - - ; Rotate immediates - ; asm: rolq $12, %esi - [-,%rsi] v5101 = rotl_imm v2, 12 ; bin: c1 c6 0c - ; asm: rorq $5, %esi - [-,%rsi] v5103 = rotr_imm v2, 5 ; bin: c1 ce 05 - - ; Load Complex - [-,%rax] v521 = iconst.i32 1 - [-,%rbx] v522 = iconst.i32 1 - ; asm: movl (%eax,%ebx,1), %ecx - [-,%rcx] v526 = load_complex.i32 v521+v522 ; bin: heap_oob 8b 0c 18 - ; asm: movl 1(%eax,%ebx,1), %ecx - [-,%rcx] v528 = load_complex.i32 v521+v522+1 ; bin: heap_oob 8b 4c 18 01 - ; asm: mov 0x100000(%eax,%ebx,1),%ecx - [-,%rcx] v530 = load_complex.i32 v521+v522+0x1000 ; bin: heap_oob 8b 8c 18 00001000 - ; asm: movzbl (%eax,%ebx,1),%ecx - [-,%rcx] v532 = uload8_complex.i32 v521+v522 ; bin: heap_oob 0f b6 0c 18 - ; asm: movsbl (%eax,%ebx,1),%ecx - [-,%rcx] v534 = sload8_complex.i32 v521+v522 ; bin: heap_oob 0f be 0c 18 - ; asm: movzwl (%eax,%ebx,1),%ecx - [-,%rcx] v536 = uload16_complex.i32 v521+v522 ; bin: heap_oob 0f b7 0c 18 - ; asm: movswl (%eax,%ebx,1),%ecx - [-,%rcx] v538 = sload16_complex.i32 v521+v522 ; bin: heap_oob 0f bf 0c 18 - - ; Store Complex - [-,%rcx] v601 = iconst.i32 1 - ; asm: mov %ecx,(%eax,%ebx,1) - store_complex v601, v521+v522 ; bin: heap_oob 89 0c 18 - ; asm: mov %ecx,0x1(%eax,%ebx,1) - store_complex v601, v521+v522+1 ; bin: heap_oob 89 4c 18 01 - ; asm: mov %ecx,0x100000(%eax,%ebx,1) - store_complex v601, v521+v522+0x1000 ; bin: heap_oob 89 8c 18 00001000 - ; asm: mov %cx,(%eax,%ebx,1) - istore16_complex v601, v521+v522 ; bin: heap_oob 66 89 0c 18 - ; asm: mov %cl,(%eax,%ebx,1) - istore8_complex v601, v521+v522 ; bin: heap_oob 88 0c 18 - - ; Carry Addition - ; asm: addl %esi, %ecx - [-,%rcx,%rflags] v701, v702 = iadd_ifcout v1, v2 ; bin: 01 f1 - ; asm: adcl %esi, %ecx - [-,%rcx] v703 = iadd_ifcin v1, v2, v702 ; bin: 11 f1 - ; asm: adcl %esi, %ecx - [-,%rcx,%rflags] v704, v705 = iadd_ifcarry v1, v2, v702 ; bin: 11 f1 - - ; Borrow Subtraction - ; asm: subl %esi, %ecx - [-,%rcx,%rflags] v706, v707 = isub_ifbout v1, v2 ; bin: 29 f1 - ; asm: sbbl %esi, %ecx - [-,%rcx] v708 = isub_ifbin v1, v2, v707 ; bin: 19 f1 - ; asm: sbbl %esi, %ecx - [-,%rcx,%rflags] v709, v710 = isub_ifborrow v1, v2, v707 ; bin: 19 f1 - - ; asm: testl %ecx, %ecx - ; asm: je block1 - brz v1, block1 ; bin: 85 c9 74 0e - fallthrough block3 - -block3: - ; asm: testl %esi, %esi - ; asm: je block1 - brz v2, block1 ; bin: 85 f6 74 0a - fallthrough block4 - -block4: - ; asm: testl %ecx, %ecx - ; asm: jne block1 - brnz v1, block1 ; bin: 85 c9 75 06 - fallthrough block5 - -block5: - ; asm: testl %esi, %esi - ; asm: jne block1 - brnz v2, block1 ; bin: 85 f6 75 02 - - ; asm: jmp block2 - jump block2 ; bin: eb 01 - - ; asm: block1: -block1: - ; asm: ret - return ; bin: c3 - - ; asm: block2: -block2: - trap user0 ; bin: user0 0f 0b -} - -; Special branch encodings only for I32 mode. -function %special_branches() { -block0: - [-,%rcx] v1 = iconst.i32 1 - [-,%rsi] v2 = iconst.i32 2 - [-,%rdi] v3 = icmp eq v1, v2 - [-,%rbx] v4 = icmp ugt v1, v2 - - ; asm: testl $0xff, %edi - ; asm: je block1 - brz v3, block1 ; bin: f7 c7 000000ff 0f 84 00000015 - fallthrough block2 - -block2: - ; asm: testb %bl, %bl - ; asm: je block1 - brz v4, block1 ; bin: 84 db 74 11 - fallthrough block3 - -block3: - ; asm: testl $0xff, %edi - ; asm: jne block1 - brnz v3, block1 ; bin: f7 c7 000000ff 0f 85 00000005 - fallthrough block4 - -block4: - ; asm: testb %bl, %bl - ; asm: jne block1 - brnz v4, block1 ; bin: 84 db 75 01 - fallthrough block5 - -block5: - return - -block1: - return -} - -; CPU flag instructions. -function %cpu_flags() { -block0: - [-,%rcx] v1 = iconst.i32 1 - [-,%rsi] v2 = iconst.i32 2 - jump block1 - -block1: - ; asm: cmpl %esi, %ecx - [-,%rflags] v10 = ifcmp v1, v2 ; bin: 39 f1 - ; asm: cmpl %ecx, %esi - [-,%rflags] v11 = ifcmp v2, v1 ; bin: 39 ce - - ; asm: je block1 - brif eq v11, block1 ; bin: 74 fa - jump block2 - -block2: - ; asm: jne block1 - brif ne v11, block1 ; bin: 75 f8 - jump block3 - -block3: - ; asm: jl block1 - brif slt v11, block1 ; bin: 7c f6 - jump block4 - -block4: - ; asm: jge block1 - brif sge v11, block1 ; bin: 7d f4 - jump block5 - -block5: - ; asm: jg block1 - brif sgt v11, block1 ; bin: 7f f2 - jump block6 - -block6: - ; asm: jle block1 - brif sle v11, block1 ; bin: 7e f0 - jump block7 - -block7: - ; asm: jb block1 - brif ult v11, block1 ; bin: 72 ee - jump block8 - -block8: - ; asm: jae block1 - brif uge v11, block1 ; bin: 73 ec - jump block9 - -block9: - ; asm: ja block1 - brif ugt v11, block1 ; bin: 77 ea - jump block10 - -block10: - ; asm: jbe block1 - brif ule v11, block1 ; bin: 76 e8 - jump block11 - -block11: - - ; asm: sete %bl - [-,%rbx] v20 = trueif eq v11 ; bin: 0f 94 c3 - ; asm: setne %bl - [-,%rbx] v21 = trueif ne v11 ; bin: 0f 95 c3 - ; asm: setl %dl - [-,%rdx] v22 = trueif slt v11 ; bin: 0f 9c c2 - ; asm: setge %dl - [-,%rdx] v23 = trueif sge v11 ; bin: 0f 9d c2 - ; asm: setg %bl - [-,%rbx] v24 = trueif sgt v11 ; bin: 0f 9f c3 - ; asm: setle %bl - [-,%rbx] v25 = trueif sle v11 ; bin: 0f 9e c3 - ; asm: setb %dl - [-,%rdx] v26 = trueif ult v11 ; bin: 0f 92 c2 - ; asm: setae %dl - [-,%rdx] v27 = trueif uge v11 ; bin: 0f 93 c2 - ; asm: seta %bl - [-,%rbx] v28 = trueif ugt v11 ; bin: 0f 97 c3 - ; asm: setbe %bl - [-,%rbx] v29 = trueif ule v11 ; bin: 0f 96 c3 - - ; The trapif instructions are encoded as macros: a conditional jump over a ud2. - ; asm: jne .+4; ud2 - trapif eq v11, user0 ; bin: 75 02 user0 0f 0b - ; asm: je .+4; ud2 - trapif ne v11, user0 ; bin: 74 02 user0 0f 0b - ; asm: jnl .+4; ud2 - trapif slt v11, user0 ; bin: 7d 02 user0 0f 0b - ; asm: jnge .+4; ud2 - trapif sge v11, user0 ; bin: 7c 02 user0 0f 0b - ; asm: jng .+4; ud2 - trapif sgt v11, user0 ; bin: 7e 02 user0 0f 0b - ; asm: jnle .+4; ud2 - trapif sle v11, user0 ; bin: 7f 02 user0 0f 0b - ; asm: jnb .+4; ud2 - trapif ult v11, user0 ; bin: 73 02 user0 0f 0b - ; asm: jnae .+4; ud2 - trapif uge v11, user0 ; bin: 72 02 user0 0f 0b - ; asm: jna .+4; ud2 - trapif ugt v11, user0 ; bin: 76 02 user0 0f 0b - ; asm: jnbe .+4; ud2 - trapif ule v11, user0 ; bin: 77 02 user0 0f 0b - ; asm: jo .+4; ud2 - trapif of v11, user0 ; bin: 71 02 user0 0f 0b - ; asm: jno .+4; ud2 - trapif nof v11, user0 ; bin: 70 02 user0 0f 0b - - ; Stack check. - ; asm: cmpl %esp, %ecx - [-,%rflags] v40 = ifcmp_sp v1 ; bin: 39 e1 - ; asm: cmpl %esp, %esi - [-,%rflags] v41 = ifcmp_sp v2 ; bin: 39 e6 - - ; asm: cmpl $-100, %ecx - [-,%rflags] v42 = ifcmp_imm v1, -100 ; bin: 83 f9 9c - ; asm: cmpl $100, %esi - [-,%rflags] v43 = ifcmp_imm v2, 100 ; bin: 83 fe 64 - - ; asm: cmpl $-10000, %ecx - [-,%rflags] v44 = ifcmp_imm v1, -10000 ; bin: 81 f9 ffffd8f0 - ; asm: cmpl $10000, %esi - [-,%rflags] v45 = ifcmp_imm v2, 10000 ; bin: 81 fe 00002710 - - return -} - -; Tests for i32/i8 conversion instructions. -function %I32_I8() { -block0: - [-,%rcx] v1 = iconst.i32 1 - - [-,%rcx] v11 = ireduce.i8 v1 ; bin: - - ; asm: movsbl %cl, %esi - [-,%rsi] v20 = sextend.i32 v11 ; bin: 0f be f1 - - ; asm: movzbl %cl, %esi - [-,%rsi] v30 = uextend.i32 v11 ; bin: 0f b6 f1 - - trap user0 ; bin: user0 0f 0b -} - -; Tests for i32/i16 conversion instructions. -function %I32_I16() { -block0: - [-,%rcx] v1 = iconst.i32 1 - - [-,%rcx] v11 = ireduce.i16 v1 ; bin: - - ; asm: movswl %cx, %esi - [-,%rsi] v20 = sextend.i32 v11 ; bin: 0f bf f1 - - ; asm: movzwl %cx, %esi - [-,%rsi] v30 = uextend.i32 v11 ; bin: 0f b7 f1 - - trap user0 ; bin: user0 0f 0b -} diff --git a/cranelift/filetests/filetests/isa/x86/binary64-float.clif b/cranelift/filetests/filetests/isa/x86/binary64-float.clif deleted file mode 100644 index 6bf6f325b1..0000000000 --- a/cranelift/filetests/filetests/isa/x86/binary64-float.clif +++ /dev/null @@ -1,638 +0,0 @@ -; Binary emission of 64-bit floating point code. -test binemit -set opt_level=speed_and_size -target x86_64 legacy haswell - -; The binary encodings can be verified with the command: -; -; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/binary64-float.clif | llvm-mc -show-encoding -triple=x86_64 -; - -function %F32() { - ss0 = incoming_arg 8, offset 0 - ss1 = incoming_arg 1024, offset -1024 - ss2 = incoming_arg 1024, offset -2048 - ss3 = incoming_arg 8, offset -2056 - -block0: - [-,%r11] v0 = iconst.i32 1 - [-,%rsi] v1 = iconst.i32 2 - [-,%rax] v2 = iconst.i64 11 - [-,%r14] v3 = iconst.i64 12 - [-,%r13] v4 = iconst.i64 13 - - ; asm: cvtsi2ssl %r11d, %xmm5 - [-,%xmm5] v10 = fcvt_from_sint.f32 v0 ; bin: f3 41 0f 2a eb - ; asm: cvtsi2ssl %esi, %xmm10 - [-,%xmm10] v11 = fcvt_from_sint.f32 v1 ; bin: f3 44 0f 2a d6 - - ; asm: cvtsi2ssq %rax, %xmm5 - [-,%xmm5] v12 = fcvt_from_sint.f32 v2 ; bin: f3 48 0f 2a e8 - ; asm: cvtsi2ssq %r14, %xmm10 - [-,%xmm10] v13 = fcvt_from_sint.f32 v3 ; bin: f3 4d 0f 2a d6 - - ; asm: cvtss2sd %xmm10, %xmm5 - [-,%xmm5] v14 = fpromote.f64 v11 ; bin: f3 41 0f 5a ea - ; asm: cvtss2sd %xmm5, %xmm10 - [-,%xmm10] v15 = fpromote.f64 v10 ; bin: f3 44 0f 5a d5 - - ; asm: movd %r11d, %xmm5 - [-,%xmm5] v16 = bitcast.f32 v0 ; bin: 66 41 0f 6e eb - ; asm: movd %esi, %xmm10 - [-,%xmm10] v17 = bitcast.f32 v1 ; bin: 66 44 0f 6e d6 - - ; asm: movd %xmm5, %ecx - [-,%rcx] v18 = bitcast.i32 v10 ; bin: 66 0f 7e e9 - ; asm: movd %xmm10, %esi - [-,%rsi] v19 = bitcast.i32 v11 ; bin: 66 44 0f 7e d6 - - ; Binary arithmetic. - - ; asm: addss %xmm10, %xmm5 - [-,%xmm5] v20 = fadd v10, v11 ; bin: f3 41 0f 58 ea - ; asm: addss %xmm5, %xmm10 - [-,%xmm10] v21 = fadd v11, v10 ; bin: f3 44 0f 58 d5 - - ; asm: subss %xmm10, %xmm5 - [-,%xmm5] v22 = fsub v10, v11 ; bin: f3 41 0f 5c ea - ; asm: subss %xmm5, %xmm10 - [-,%xmm10] v23 = fsub v11, v10 ; bin: f3 44 0f 5c d5 - - ; asm: mulss %xmm10, %xmm5 - [-,%xmm5] v24 = fmul v10, v11 ; bin: f3 41 0f 59 ea - ; asm: mulss %xmm5, %xmm10 - [-,%xmm10] v25 = fmul v11, v10 ; bin: f3 44 0f 59 d5 - - ; asm: divss %xmm10, %xmm5 - [-,%xmm5] v26 = fdiv v10, v11 ; bin: f3 41 0f 5e ea - ; asm: divss %xmm5, %xmm10 - [-,%xmm10] v27 = fdiv v11, v10 ; bin: f3 44 0f 5e d5 - - ; Bitwise ops. - ; We use the *ps SSE instructions for everything because they are smaller. - - ; asm: andps %xmm10, %xmm5 - [-,%xmm5] v30 = band v10, v11 ; bin: 41 0f 54 ea - ; asm: andps %xmm5, %xmm10 - [-,%xmm10] v31 = band v11, v10 ; bin: 44 0f 54 d5 - - ; asm: andnps %xmm10, %xmm5 - [-,%xmm5] v32 = band_not v11, v10 ; bin: 41 0f 55 ea - ; asm: andnps %xmm5, %xmm10 - [-,%xmm10] v33 = band_not v10, v11 ; bin: 44 0f 55 d5 - - ; asm: orps %xmm10, %xmm5 - [-,%xmm5] v34 = bor v10, v11 ; bin: 41 0f 56 ea - ; asm: orps %xmm5, %xmm10 - [-,%xmm10] v35 = bor v11, v10 ; bin: 44 0f 56 d5 - - ; asm: xorps %xmm10, %xmm5 - [-,%xmm5] v36 = bxor v10, v11 ; bin: 41 0f 57 ea - ; asm: xorps %xmm5, %xmm10 - [-,%xmm10] v37 = bxor v11, v10 ; bin: 44 0f 57 d5 - - ; asm: movaps %xmm10, %xmm5 - [-,%xmm5] v38 = copy v11 ; bin: 41 0f 28 ea - ; asm: movaps %xmm5, %xmm10 - [-,%xmm10] v39 = copy v10 ; bin: 44 0f 28 d5 - - ; Copy to SSA - - ; asm: movsd %xmm0, %xmm15 - [-,%xmm15] v400 = copy_to_ssa.f64 %xmm0 ; bin: f2 44 0f 10 f8 - ; asm: movsd %xmm15, %xmm0 - [-,%xmm0] v401 = copy_to_ssa.f64 %xmm15 ; bin: f2 41 0f 10 c7 - ; asm: movsd %xmm7, %xmm6. Unfortunately we get a redundant REX prefix. - [-,%xmm6] v402 = copy_to_ssa.f64 %xmm7 ; bin: f2 40 0f 10 f7 - ; asm: movsd %xmm11, %xmm14 - [-,%xmm14] v403 = copy_to_ssa.f64 %xmm11 ; bin: f2 45 0f 10 f3 - - ; asm: movss %xmm0, %xmm15 - [-,%xmm15] v404 = copy_to_ssa.f32 %xmm0 ; bin: f3 44 0f 10 f8 - ; asm: movss %xmm15, %xmm0 - [-,%xmm0] v405 = copy_to_ssa.f32 %xmm15 ; bin: f3 41 0f 10 c7 - ; asm: movss %xmm7, %xmm6. Unfortunately we get a redundant REX prefix. - [-,%xmm6] v406 = copy_to_ssa.f32 %xmm7 ; bin: f3 40 0f 10 f7 - ; asm: movss %xmm11, %xmm14 - [-,%xmm14] v407 = copy_to_ssa.f32 %xmm11 ; bin: f3 45 0f 10 f3 - - ; Convert float to int. - - ; asm: cvttss2si %xmm5, %ecx - [-,%rcx] v40 = x86_cvtt2si.i32 v10 ; bin: f3 0f 2c cd - ; asm: cvttss2si %xmm10, %esi - [-,%rsi] v41 = x86_cvtt2si.i32 v11 ; bin: f3 41 0f 2c f2 - - ; asm: cvttss2si %xmm5, %rcx - [-,%rcx] v42 = x86_cvtt2si.i64 v10 ; bin: f3 48 0f 2c cd - ; asm: cvttss2si %xmm10, %rsi - [-,%rsi] v43 = x86_cvtt2si.i64 v11 ; bin: f3 49 0f 2c f2 - - ; Min/max. - - ; asm: minss %xmm10, %xmm5 - [-,%xmm5] v44 = x86_fmin v10, v11 ; bin: f3 41 0f 5d ea - ; asm: minss %xmm5, %xmm10 - [-,%xmm10] v45 = x86_fmin v11, v10 ; bin: f3 44 0f 5d d5 - ; asm: maxss %xmm10, %xmm5 - [-,%xmm5] v46 = x86_fmax v10, v11 ; bin: f3 41 0f 5f ea - ; asm: maxss %xmm5, %xmm10 - [-,%xmm10] v47 = x86_fmax v11, v10 ; bin: f3 44 0f 5f d5 - - ; Unary arithmetic. - - ; asm: sqrtss %xmm5, %xmm10 - [-,%xmm10] v50 = sqrt v10 ; bin: f3 44 0f 51 d5 - ; asm: sqrtss %xmm10, %xmm5 - [-,%xmm5] v51 = sqrt v11 ; bin: f3 41 0f 51 ea - - ; asm: roundss $0, %xmm5, %xmm10 - [-,%xmm10] v52 = nearest v10 ; bin: 66 44 0f 3a 0a d5 00 - ; asm: roundss $0, %xmm10, %xmm5 - [-,%xmm5] v53 = nearest v11 ; bin: 66 41 0f 3a 0a ea 00 - ; asm: roundss $0, %xmm5, %xmm2 - [-,%xmm2] v54 = nearest v10 ; bin: 66 0f 3a 0a d5 00 - - ; asm: roundss $1, %xmm5, %xmm10 - [-,%xmm10] v55 = floor v10 ; bin: 66 44 0f 3a 0a d5 01 - ; asm: roundss $1, %xmm10, %xmm5 - [-,%xmm5] v56 = floor v11 ; bin: 66 41 0f 3a 0a ea 01 - ; asm: roundss $1, %xmm5, %xmm2 - [-,%xmm2] v57 = floor v10 ; bin: 66 0f 3a 0a d5 01 - - ; asm: roundss $2, %xmm5, %xmm10 - [-,%xmm10] v58 = ceil v10 ; bin: 66 44 0f 3a 0a d5 02 - ; asm: roundss $2, %xmm10, %xmm5 - [-,%xmm5] v59 = ceil v11 ; bin: 66 41 0f 3a 0a ea 02 - ; asm: roundss $2, %xmm5, %xmm2 - [-,%xmm2] v60 = ceil v10 ; bin: 66 0f 3a 0a d5 02 - - ; asm: roundss $3, %xmm5, %xmm10 - [-,%xmm10] v61 = trunc v10 ; bin: 66 44 0f 3a 0a d5 03 - ; asm: roundss $3, %xmm10, %xmm5 - [-,%xmm5] v62 = trunc v11 ; bin: 66 41 0f 3a 0a ea 03 - ; asm: roundss $3, %xmm5, %xmm2 - [-,%xmm2] v63 = trunc v10 ; bin: 66 0f 3a 0a d5 03 - - ; Load/Store - - ; asm: movss (%r14), %xmm5 - [-,%xmm5] v100 = load.f32 v3 ; bin: heap_oob f3 41 0f 10 2e - ; asm: movss (%rax), %xmm10 - [-,%xmm10] v101 = load.f32 v2 ; bin: heap_oob f3 44 0f 10 10 - ; asm: movss 50(%r14), %xmm5 - [-,%xmm5] v110 = load.f32 v3+50 ; bin: heap_oob f3 41 0f 10 6e 32 - ; asm: movss -50(%rax), %xmm10 - [-,%xmm10] v111 = load.f32 v2-50 ; bin: heap_oob f3 44 0f 10 50 ce - ; asm: movss 10000(%r14), %xmm5 - [-,%xmm5] v120 = load.f32 v3+10000 ; bin: heap_oob f3 41 0f 10 ae 00002710 - ; asm: movss -10000(%rax), %xmm10 - [-,%xmm10] v121 = load.f32 v2-10000 ; bin: heap_oob f3 44 0f 10 90 ffffd8f0 - - ; asm: movss %xmm5, (%r14) - [-] store.f32 v100, v3 ; bin: heap_oob f3 41 0f 11 2e - ; asm: movss %xmm10, (%rax) - [-] store.f32 v101, v2 ; bin: heap_oob f3 44 0f 11 10 - ; asm: movss %xmm5, (%r13) - [-] store.f32 v100, v4 ; bin: heap_oob f3 41 0f 11 6d 00 - ; asm: movss %xmm10, (%r13) - [-] store.f32 v101, v4 ; bin: heap_oob f3 45 0f 11 55 00 - ; asm: movss %xmm5, 50(%r14) - [-] store.f32 v100, v3+50 ; bin: heap_oob f3 41 0f 11 6e 32 - ; asm: movss %xmm10, -50(%rax) - [-] store.f32 v101, v2-50 ; bin: heap_oob f3 44 0f 11 50 ce - ; asm: movss %xmm5, 10000(%r14) - [-] store.f32 v100, v3+10000 ; bin: heap_oob f3 41 0f 11 ae 00002710 - ; asm: movss %xmm10, -10000(%rax) - [-] store.f32 v101, v2-10000 ; bin: heap_oob f3 44 0f 11 90 ffffd8f0 - - ; Spill / Fill. - - ; asm: movss %xmm5, 1032(%rsp) - [-,ss1] v200 = spill v100 ; bin: stk_ovf f3 0f 11 ac 24 00000408 - ; asm: movss %xmm10, 1032(%rsp) - [-,ss1] v201 = spill v101 ; bin: stk_ovf f3 44 0f 11 94 24 00000408 - - ; asm: movss 1032(%rsp), %xmm5 - [-,%xmm5] v210 = fill v200 ; bin: f3 0f 10 ac 24 00000408 - ; asm: movss 1032(%rsp), %xmm10 - [-,%xmm10] v211 = fill v201 ; bin: f3 44 0f 10 94 24 00000408 - - ; asm: movss %xmm5, 1032(%rsp) - regspill v100, %xmm5 -> ss1 ; bin: stk_ovf f3 0f 11 ac 24 00000408 - ; asm: movss 1032(%rsp), %xmm5 - regfill v100, ss1 -> %xmm5 ; bin: f3 0f 10 ac 24 00000408 - - ; Comparisons. - ; - ; Only `supported_floatccs` are tested here. Others are handled by - ; legalization patterns. - - ; asm: ucomiss %xmm10, %xmm5 - ; asm: setnp %bl - [-,%rbx] v300 = fcmp ord v10, v11 ; bin: 41 0f 2e ea 0f 9b c3 - ; asm: ucomiss %xmm5, %xmm10 - ; asm: setp %bl - [-,%rbx] v301 = fcmp uno v11, v10 ; bin: 44 0f 2e d5 0f 9a c3 - ; asm: ucomiss %xmm10, %xmm5 - ; asm: setne %dl - [-,%rdx] v302 = fcmp one v10, v11 ; bin: 41 0f 2e ea 0f 95 c2 - ; asm: ucomiss %xmm5, %xmm10 - ; asm: sete %dl - [-,%rdx] v303 = fcmp ueq v11, v10 ; bin: 44 0f 2e d5 0f 94 c2 - ; asm: ucomiss %xmm10, %xmm5 - ; asm: seta %bl - [-,%rbx] v304 = fcmp gt v10, v11 ; bin: 41 0f 2e ea 0f 97 c3 - ; asm: ucomiss %xmm5, %xmm10 - ; asm: setae %bl - [-,%rbx] v305 = fcmp ge v11, v10 ; bin: 44 0f 2e d5 0f 93 c3 - ; asm: ucomiss %xmm10, %xmm5 - ; asm: setb %dl - [-,%rdx] v306 = fcmp ult v10, v11 ; bin: 41 0f 2e ea 0f 92 c2 - ; asm: ucomiss %xmm5, %xmm10 - ; asm: setbe %dl - [-,%rdx] v307 = fcmp ule v11, v10 ; bin: 44 0f 2e d5 0f 96 c2 - - ; asm: ucomiss %xmm10, %xmm5 - [-,%rflags] v310 = ffcmp v10, v11 ; bin: 41 0f 2e ea - ; asm: ucomiss %xmm10, %xmm5 - [-,%rflags] v311 = ffcmp v11, v10 ; bin: 44 0f 2e d5 - ; asm: ucomiss %xmm5, %xmm5 - [-,%rflags] v312 = ffcmp v10, v10 ; bin: 0f 2e ed - - - ; Load/Store Complex - - [-,%rax] v350 = iconst.i64 1 - [-,%rbx] v351 = iconst.i64 2 - - ; asm: movss (%rax,%rbx,1),%xmm5 - [-,%xmm5] v352 = load_complex.f32 v350+v351 ; bin: heap_oob f3 0f 10 2c 18 - ; asm: movss 0x32(%rax,%rbx,1),%xmm5 - [-,%xmm5] v353 = load_complex.f32 v350+v351+50 ; bin: heap_oob f3 0f 10 6c 18 32 - ; asm: movss -0x32(%rax,%rbx,1),%xmm10 - [-,%xmm10] v354 = load_complex.f32 v350+v351-50 ; bin: heap_oob f3 44 0f 10 54 18 ce - ; asm: 0x2710(%rax,%rbx,1),%xmm5 - [-,%xmm5] v355 = load_complex.f32 v350+v351+10000 ; bin: heap_oob f3 0f 10 ac 18 00002710 - ; asm: -0x2710(%rax,%rbx,1),%xmm10 - [-,%xmm10] v356 = load_complex.f32 v350+v351-10000 ; bin: heap_oob f3 44 0f 10 94 18 ffffd8f0 - - ; asm: movsd %xmm5, (%rax,%rbx,1) - [-] store_complex.f32 v100, v350+v351 ; bin: heap_oob f3 0f 11 2c 18 - ; asm: movsd %xmm5, 50(%rax,%rbx,1) - [-] store_complex.f32 v100, v350+v351+50 ; bin: heap_oob f3 0f 11 6c 18 32 - ; asm: movsd %xmm10, -50(%rax,%rbx,1) - [-] store_complex.f32 v101, v350+v351-50 ; bin: heap_oob f3 44 0f 11 54 18 ce - ; asm: movsd %xmm5, 10000(%rax,%rbx,1) - [-] store_complex.f32 v100, v350+v351+10000 ; bin: heap_oob f3 0f 11 ac 18 00002710 - ; asm: movsd %xmm10, -10000(%rax,%rbx,1) - [-] store_complex.f32 v101, v350+v351-10000 ; bin: heap_oob f3 44 0f 11 94 18 ffffd8f0 - - return -} - -function %F64() { - ss0 = incoming_arg 8, offset 0 - ss1 = incoming_arg 1024, offset -1024 - ss2 = incoming_arg 1024, offset -2048 - ss3 = incoming_arg 8, offset -2056 - -block0: - [-,%r11] v0 = iconst.i32 1 - [-,%rsi] v1 = iconst.i32 2 - [-,%rax] v2 = iconst.i64 11 - [-,%r14] v3 = iconst.i64 12 - [-,%r13] v4 = iconst.i64 13 - - ; asm: cvtsi2sdl %r11d, %xmm5 - [-,%xmm5] v10 = fcvt_from_sint.f64 v0 ; bin: f2 41 0f 2a eb - ; asm: cvtsi2sdl %esi, %xmm10 - [-,%xmm10] v11 = fcvt_from_sint.f64 v1 ; bin: f2 44 0f 2a d6 - - ; asm: cvtsi2sdq %rax, %xmm5 - [-,%xmm5] v12 = fcvt_from_sint.f64 v2 ; bin: f2 48 0f 2a e8 - ; asm: cvtsi2sdq %r14, %xmm10 - [-,%xmm10] v13 = fcvt_from_sint.f64 v3 ; bin: f2 4d 0f 2a d6 - - ; asm: cvtsd2ss %xmm10, %xmm5 - [-,%xmm5] v14 = fdemote.f32 v11 ; bin: f2 41 0f 5a ea - ; asm: cvtsd2ss %xmm5, %xmm10 - [-,%xmm10] v15 = fdemote.f32 v10 ; bin: f2 44 0f 5a d5 - - ; asm: movq %rax, %xmm5 - [-,%xmm5] v16 = bitcast.f64 v2 ; bin: 66 48 0f 6e e8 - ; asm: movq %r14, %xmm10 - [-,%xmm10] v17 = bitcast.f64 v3 ; bin: 66 4d 0f 6e d6 - - ; asm: movq %xmm5, %rcx - [-,%rcx] v18 = bitcast.i64 v10 ; bin: 66 48 0f 7e e9 - ; asm: movq %xmm10, %rsi - [-,%rsi] v19 = bitcast.i64 v11 ; bin: 66 4c 0f 7e d6 - - ; Binary arithmetic. - - ; asm: addsd %xmm10, %xmm5 - [-,%xmm5] v20 = fadd v10, v11 ; bin: f2 41 0f 58 ea - ; asm: addsd %xmm5, %xmm10 - [-,%xmm10] v21 = fadd v11, v10 ; bin: f2 44 0f 58 d5 - - ; asm: subsd %xmm10, %xmm5 - [-,%xmm5] v22 = fsub v10, v11 ; bin: f2 41 0f 5c ea - ; asm: subsd %xmm5, %xmm10 - [-,%xmm10] v23 = fsub v11, v10 ; bin: f2 44 0f 5c d5 - - ; asm: mulsd %xmm10, %xmm5 - [-,%xmm5] v24 = fmul v10, v11 ; bin: f2 41 0f 59 ea - ; asm: mulsd %xmm5, %xmm10 - [-,%xmm10] v25 = fmul v11, v10 ; bin: f2 44 0f 59 d5 - - ; asm: divsd %xmm10, %xmm5 - [-,%xmm5] v26 = fdiv v10, v11 ; bin: f2 41 0f 5e ea - ; asm: divsd %xmm5, %xmm10 - [-,%xmm10] v27 = fdiv v11, v10 ; bin: f2 44 0f 5e d5 - - ; Bitwise ops. - ; We use the *ps SSE instructions for everything because they are smaller. - - ; asm: andps %xmm10, %xmm5 - [-,%xmm5] v30 = band v10, v11 ; bin: 41 0f 54 ea - ; asm: andps %xmm5, %xmm10 - [-,%xmm10] v31 = band v11, v10 ; bin: 44 0f 54 d5 - - ; asm: andnps %xmm10, %xmm5 - [-,%xmm5] v32 = band_not v11, v10 ; bin: 41 0f 55 ea - ; asm: andnps %xmm5, %xmm10 - [-,%xmm10] v33 = band_not v10, v11 ; bin: 44 0f 55 d5 - - ; asm: orps %xmm10, %xmm5 - [-,%xmm5] v34 = bor v10, v11 ; bin: 41 0f 56 ea - ; asm: orps %xmm5, %xmm10 - [-,%xmm10] v35 = bor v11, v10 ; bin: 44 0f 56 d5 - - ; asm: xorps %xmm10, %xmm5 - [-,%xmm5] v36 = bxor v10, v11 ; bin: 41 0f 57 ea - ; asm: xorps %xmm5, %xmm10 - [-,%xmm10] v37 = bxor v11, v10 ; bin: 44 0f 57 d5 - - ; asm: movaps %xmm10, %xmm5 - [-,%xmm5] v38 = copy v11 ; bin: 41 0f 28 ea - ; asm: movaps %xmm5, %xmm10 - [-,%xmm10] v39 = copy v10 ; bin: 44 0f 28 d5 - - ; Convert float to int. - - ; asm: cvttsd2si %xmm5, %ecx - [-,%rcx] v40 = x86_cvtt2si.i32 v10 ; bin: f2 0f 2c cd - ; asm: cvttsd2si %xmm10, %esi - [-,%rsi] v41 = x86_cvtt2si.i32 v11 ; bin: f2 41 0f 2c f2 - - ; asm: cvttsd2si %xmm5, %rcx - [-,%rcx] v42 = x86_cvtt2si.i64 v10 ; bin: f2 48 0f 2c cd - ; asm: cvttsd2si %xmm10, %rsi - [-,%rsi] v43 = x86_cvtt2si.i64 v11 ; bin: f2 49 0f 2c f2 - - ; Min/max. - - ; asm: minsd %xmm10, %xmm5 - [-,%xmm5] v44 = x86_fmin v10, v11 ; bin: f2 41 0f 5d ea - ; asm: minsd %xmm5, %xmm10 - [-,%xmm10] v45 = x86_fmin v11, v10 ; bin: f2 44 0f 5d d5 - ; asm: maxsd %xmm10, %xmm5 - [-,%xmm5] v46 = x86_fmax v10, v11 ; bin: f2 41 0f 5f ea - ; asm: maxsd %xmm5, %xmm10 - [-,%xmm10] v47 = x86_fmax v11, v10 ; bin: f2 44 0f 5f d5 - - ; Unary arithmetic. - - ; asm: sqrtsd %xmm5, %xmm10 - [-,%xmm10] v50 = sqrt v10 ; bin: f2 44 0f 51 d5 - ; asm: sqrtsd %xmm10, %xmm5 - [-,%xmm5] v51 = sqrt v11 ; bin: f2 41 0f 51 ea - - ; asm: roundsd $0, %xmm5, %xmm10 - [-,%xmm10] v52 = nearest v10 ; bin: 66 44 0f 3a 0b d5 00 - ; asm: roundsd $0, %xmm10, %xmm5 - [-,%xmm5] v53 = nearest v11 ; bin: 66 41 0f 3a 0b ea 00 - ; asm: roundsd $0, %xmm5, %xmm2 - [-,%xmm2] v54 = nearest v10 ; bin: 66 0f 3a 0b d5 00 - - ; asm: roundsd $1, %xmm5, %xmm10 - [-,%xmm10] v55 = floor v10 ; bin: 66 44 0f 3a 0b d5 01 - ; asm: roundsd $1, %xmm10, %xmm5 - [-,%xmm5] v56 = floor v11 ; bin: 66 41 0f 3a 0b ea 01 - ; asm: roundsd $1, %xmm5, %xmm2 - [-,%xmm2] v57 = floor v10 ; bin: 66 0f 3a 0b d5 01 - - ; asm: roundsd $2, %xmm5, %xmm10 - [-,%xmm10] v58 = ceil v10 ; bin: 66 44 0f 3a 0b d5 02 - ; asm: roundsd $2, %xmm10, %xmm5 - [-,%xmm5] v59 = ceil v11 ; bin: 66 41 0f 3a 0b ea 02 - ; asm: roundsd $2, %xmm5, %xmm2 - [-,%xmm2] v60 = ceil v10 ; bin: 66 0f 3a 0b d5 02 - - ; asm: roundsd $3, %xmm5, %xmm10 - [-,%xmm10] v61 = trunc v10 ; bin: 66 44 0f 3a 0b d5 03 - ; asm: roundsd $3, %xmm10, %xmm5 - [-,%xmm5] v62 = trunc v11 ; bin: 66 41 0f 3a 0b ea 03 - ; asm: roundsd $3, %xmm5, %xmm2 - [-,%xmm2] v63 = trunc v10 ; bin: 66 0f 3a 0b d5 03 - - ; Load/Store - - ; asm: movsd (%r14), %xmm5 - [-,%xmm5] v100 = load.f64 v3 ; bin: heap_oob f2 41 0f 10 2e - ; asm: movsd (%rax), %xmm10 - [-,%xmm10] v101 = load.f64 v2 ; bin: heap_oob f2 44 0f 10 10 - ; asm: movsd 50(%r14), %xmm5 - [-,%xmm5] v110 = load.f64 v3+50 ; bin: heap_oob f2 41 0f 10 6e 32 - ; asm: movsd -50(%rax), %xmm10 - [-,%xmm10] v111 = load.f64 v2-50 ; bin: heap_oob f2 44 0f 10 50 ce - ; asm: movsd 10000(%r14), %xmm5 - [-,%xmm5] v120 = load.f64 v3+10000 ; bin: heap_oob f2 41 0f 10 ae 00002710 - ; asm: movsd -10000(%rax), %xmm10 - [-,%xmm10] v121 = load.f64 v2-10000 ; bin: heap_oob f2 44 0f 10 90 ffffd8f0 - - ; asm: movsd %xmm5, (%r14) - [-] store.f64 v100, v3 ; bin: heap_oob f2 41 0f 11 2e - ; asm: movsd %xmm10, (%rax) - [-] store.f64 v101, v2 ; bin: heap_oob f2 44 0f 11 10 - ; asm: movsd %xmm5, (%r13) - [-] store.f64 v100, v4 ; bin: heap_oob f2 41 0f 11 6d 00 - ; asm: movsd %xmm10, (%r13) - [-] store.f64 v101, v4 ; bin: heap_oob f2 45 0f 11 55 00 - ; asm: movsd %xmm5, 50(%r14) - [-] store.f64 v100, v3+50 ; bin: heap_oob f2 41 0f 11 6e 32 - ; asm: movsd %xmm10, -50(%rax) - [-] store.f64 v101, v2-50 ; bin: heap_oob f2 44 0f 11 50 ce - ; asm: movsd %xmm5, 10000(%r14) - [-] store.f64 v100, v3+10000 ; bin: heap_oob f2 41 0f 11 ae 00002710 - ; asm: movsd %xmm10, -10000(%rax) - [-] store.f64 v101, v2-10000 ; bin: heap_oob f2 44 0f 11 90 ffffd8f0 - - ; Spill / Fill. - - ; asm: movsd %xmm5, 1032(%rsp) - [-,ss1] v200 = spill v100 ; bin: stk_ovf f2 0f 11 ac 24 00000408 - ; asm: movsd %xmm10, 1032(%rsp) - [-,ss1] v201 = spill v101 ; bin: stk_ovf f2 44 0f 11 94 24 00000408 - - ; asm: movsd 1032(%rsp), %xmm5 - [-,%xmm5] v210 = fill v200 ; bin: f2 0f 10 ac 24 00000408 - ; asm: movsd 1032(%rsp), %xmm10 - [-,%xmm10] v211 = fill v201 ; bin: f2 44 0f 10 94 24 00000408 - - ; asm: movsd %xmm5, 1032(%rsp) - regspill v100, %xmm5 -> ss1 ; bin: stk_ovf f2 0f 11 ac 24 00000408 - ; asm: movsd 1032(%rsp), %xmm5 - regfill v100, ss1 -> %xmm5 ; bin: f2 0f 10 ac 24 00000408 - - ; Comparisons. - ; - ; Only `supported_floatccs` are tested here. Others are handled by - ; legalization patterns. - - ; asm: ucomisd %xmm10, %xmm5 - ; asm: setnp %bl - [-,%rbx] v300 = fcmp ord v10, v11 ; bin: 66 41 0f 2e ea 0f 9b c3 - ; asm: ucomisd %xmm5, %xmm10 - ; asm: setp %bl - [-,%rbx] v301 = fcmp uno v11, v10 ; bin: 66 44 0f 2e d5 0f 9a c3 - ; asm: ucomisd %xmm10, %xmm5 - ; asm: setne %dl - [-,%rdx] v302 = fcmp one v10, v11 ; bin: 66 41 0f 2e ea 0f 95 c2 - ; asm: ucomisd %xmm5, %xmm10 - ; asm: sete %dl - [-,%rdx] v303 = fcmp ueq v11, v10 ; bin: 66 44 0f 2e d5 0f 94 c2 - ; asm: ucomisd %xmm10, %xmm5 - ; asm: seta %bl - [-,%rbx] v304 = fcmp gt v10, v11 ; bin: 66 41 0f 2e ea 0f 97 c3 - ; asm: ucomisd %xmm5, %xmm10 - ; asm: setae %bl - [-,%rbx] v305 = fcmp ge v11, v10 ; bin: 66 44 0f 2e d5 0f 93 c3 - ; asm: ucomisd %xmm10, %xmm5 - ; asm: setb %dl - [-,%rdx] v306 = fcmp ult v10, v11 ; bin: 66 41 0f 2e ea 0f 92 c2 - ; asm: ucomisd %xmm5, %xmm10 - ; asm: setbe %dl - [-,%rdx] v307 = fcmp ule v11, v10 ; bin: 66 44 0f 2e d5 0f 96 c2 - - ; asm: ucomisd %xmm10, %xmm5 - [-,%rflags] v310 = ffcmp v10, v11 ; bin: 66 41 0f 2e ea - ; asm: ucomisd %xmm10, %xmm5 - [-,%rflags] v311 = ffcmp v11, v10 ; bin: 66 44 0f 2e d5 - ; asm: ucomisd %xmm5, %xmm5 - [-,%rflags] v312 = ffcmp v10, v10 ; bin: 66 0f 2e ed - - ; Load/Store Complex - - [-,%rax] v350 = iconst.i64 1 - [-,%rbx] v351 = iconst.i64 2 - ; asm: movsd (%rax,%rbx,1),%xmm5 - [-,%xmm5] v352 = load_complex.f64 v350+v351 ; bin: heap_oob f2 0f 10 2c 18 - ; asm: movsd 0x32(%rax,%rbx,1),%xmm5 - [-,%xmm5] v353 = load_complex.f64 v350+v351+50 ; bin: heap_oob f2 0f 10 6c 18 32 - ; asm: movsd -0x32(%rax,%rbx,1),%xmm10 - [-,%xmm10] v354 = load_complex.f64 v350+v351-50 ; bin: heap_oob f2 44 0f 10 54 18 ce - ; asm: movsd 0x2710(%rax,%rbx,1),%xmm5 - [-,%xmm5] v355 = load_complex.f64 v350+v351+10000 ; bin: heap_oob f2 0f 10 ac 18 00002710 - ; asm: movsd -0x2710(%rax,%rbx,1),%xmm10 - [-,%xmm10] v356 = load_complex.f64 v350+v351-10000 ; bin: heap_oob f2 44 0f 10 94 18 ffffd8f0 - - ; asm: movsd %xmm5, (%rax,%rbx,1) - [-] store_complex.f64 v100, v350+v351 ; bin: heap_oob f2 0f 11 2c 18 - ; asm: movsd %xmm5, 50(%rax,%rbx,1) - [-] store_complex.f64 v100, v350+v351+50 ; bin: heap_oob f2 0f 11 6c 18 32 - ; asm: movsd %xmm10, -50(%rax,%rbx,1) - [-] store_complex.f64 v101, v350+v351-50 ; bin: heap_oob f2 44 0f 11 54 18 ce - ; asm: movsd %xmm5, 10000(%rax,%rbx,1) - [-] store_complex.f64 v100, v350+v351+10000 ; bin: heap_oob f2 0f 11 ac 18 00002710 - ; asm: movsd %xmm10, -10000(%rax,%rbx,1) - [-] store_complex.f64 v101, v350+v351-10000 ; bin: heap_oob f2 44 0f 11 94 18 ffffd8f0 - - return -} - -function %cpuflags_float(f32 [%xmm0]) { -block0(v0: f32 [%xmm0]): - ; asm: ucomiss %xmm0, %xmm0 - [-,%rflags] v1 = ffcmp v0, v0 ; bin: 0f 2e c0 - - jump block1 - -block1: - ; asm: jnp block1 - brff ord v1, block1 ; bin: 7b fe - jump block2 - -block2: - ; asm: jp block1 - brff uno v1, block1 ; bin: 7a fc - jump block3 - -block3: - ; asm: jne block1 - brff one v1, block1 ; bin: 75 fa - jump block4 - -block4: - ; asm: je block1 - brff ueq v1, block1 ; bin: 74 f8 - jump block5 - -block5: - ; asm: ja block1 - brff gt v1, block1 ; bin: 77 f6 - jump block6 - -block6: - ; asm: jae block1 - brff ge v1, block1 ; bin: 73 f4 - jump block7 - -block7: - ; asm: jb block1 - brff ult v1, block1 ; bin: 72 f2 - jump block8 - -block8: - ; asm: jbe block1 - brff ule v1, block1 ; bin: 76 f0 - jump block9 - -block9: - ; asm: jp .+4; ud2 - trapff ord v1, user0 ; bin: 7a 02 user0 0f 0b - ; asm: jnp .+4; ud2 - trapff uno v1, user0 ; bin: 7b 02 user0 0f 0b - ; asm: je .+4; ud2 - trapff one v1, user0 ; bin: 74 02 user0 0f 0b - ; asm: jne .+4; ud2 - trapff ueq v1, user0 ; bin: 75 02 user0 0f 0b - ; asm: jna .+4; ud2 - trapff gt v1, user0 ; bin: 76 02 user0 0f 0b - ; asm: jnae .+4; ud2 - trapff ge v1, user0 ; bin: 72 02 user0 0f 0b - ; asm: jnb .+4; ud2 - trapff ult v1, user0 ; bin: 73 02 user0 0f 0b - ; asm: jnbe .+4; ud2 - trapff ule v1, user0 ; bin: 77 02 user0 0f 0b - - ; asm: setnp %bl - [-,%rbx] v10 = trueff ord v1 ; bin: 0f 9b c3 - ; asm: setp %bl - [-,%rbx] v11 = trueff uno v1 ; bin: 0f 9a c3 - ; asm: setne %dl - [-,%rdx] v12 = trueff one v1 ; bin: 0f 95 c2 - ; asm: sete %dl - [-,%rdx] v13 = trueff ueq v1 ; bin: 0f 94 c2 - ; asm: seta %r10b - [-,%r10] v14 = trueff gt v1 ; bin: 41 0f 97 c2 - ; asm: setae %r10b - [-,%r10] v15 = trueff ge v1 ; bin: 41 0f 93 c2 - ; asm: setb %r14b - [-,%r14] v16 = trueff ult v1 ; bin: 41 0f 92 c6 - ; asm: setbe %r14b - [-,%r14] v17 = trueff ule v1 ; bin: 41 0f 96 c6 - - return -} diff --git a/cranelift/filetests/filetests/isa/x86/binary64-pic.clif b/cranelift/filetests/filetests/isa/x86/binary64-pic.clif deleted file mode 100644 index 4f2c650592..0000000000 --- a/cranelift/filetests/filetests/isa/x86/binary64-pic.clif +++ /dev/null @@ -1,83 +0,0 @@ -; binary emission of 64-bit code. -test binemit -set opt_level=speed_and_size -set is_pic -target x86_64 legacy haswell - -; The binary encodings can be verified with the command: -; -; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/binary64-pic.clif | llvm-mc -show-encoding -triple=x86_64 -; - -; Tests for i64 instructions. -function %I64() { - sig0 = () - fn0 = %foo() - fn1 = colocated %bar() - - gv0 = symbol %some_gv - gv1 = symbol colocated %some_gv - - ; Use incoming_arg stack slots because they won't be relocated by the frame - ; layout. - ss0 = incoming_arg 8, offset 0 - ss1 = incoming_arg 1024, offset -1024 - ss2 = incoming_arg 1024, offset -2048 - ss3 = incoming_arg 8, offset -2056 - -block0: - - ; Colocated functions. - - ; asm: call foo - call fn1() ; bin: stk_ovf e8 CallPCRel4(%bar-4) 00000000 - - ; asm: lea 0x0(%rip), %rax - [-,%rax] v0 = func_addr.i64 fn1 ; bin: 48 8d 05 PCRel4(%bar-4) 00000000 - ; asm: lea 0x0(%rip), %rsi - [-,%rsi] v1 = func_addr.i64 fn1 ; bin: 48 8d 35 PCRel4(%bar-4) 00000000 - ; asm: lea 0x0(%rip), %r10 - [-,%r10] v2 = func_addr.i64 fn1 ; bin: 4c 8d 15 PCRel4(%bar-4) 00000000 - - ; asm: call *%rax - call_indirect sig0, v0() ; bin: stk_ovf ff d0 - ; asm: call *%rsi - call_indirect sig0, v1() ; bin: stk_ovf ff d6 - ; asm: call *%r10 - call_indirect sig0, v2() ; bin: stk_ovf 41 ff d2 - - ; Non-colocated functions. - - ; asm: call foo@PLT - call fn0() ; bin: stk_ovf e8 CallPLTRel4(%foo-4) 00000000 - - ; asm: mov 0x0(%rip), %rax - [-,%rax] v100 = func_addr.i64 fn0 ; bin: 48 8b 05 GOTPCRel4(%foo-4) 00000000 - ; asm: mov 0x0(%rip), %rsi - [-,%rsi] v101 = func_addr.i64 fn0 ; bin: 48 8b 35 GOTPCRel4(%foo-4) 00000000 - ; asm: mov 0x0(%rip), %r10 - [-,%r10] v102 = func_addr.i64 fn0 ; bin: 4c 8b 15 GOTPCRel4(%foo-4) 00000000 - - ; asm: call *%rax - call_indirect sig0, v100() ; bin: stk_ovf ff d0 - ; asm: call *%rsi - call_indirect sig0, v101() ; bin: stk_ovf ff d6 - ; asm: call *%r10 - call_indirect sig0, v102() ; bin: stk_ovf 41 ff d2 - - ; asm: mov 0x0(%rip), %rcx - [-,%rcx] v3 = symbol_value.i64 gv0 ; bin: 48 8b 0d GOTPCRel4(%some_gv-4) 00000000 - ; asm: mov 0x0(%rip), %rsi - [-,%rsi] v4 = symbol_value.i64 gv0 ; bin: 48 8b 35 GOTPCRel4(%some_gv-4) 00000000 - ; asm: mov 0x0(%rip), %r10 - [-,%r10] v5 = symbol_value.i64 gv0 ; bin: 4c 8b 15 GOTPCRel4(%some_gv-4) 00000000 - - ; asm: lea 0x0(%rip), %rcx - [-,%rcx] v6 = symbol_value.i64 gv1 ; bin: 48 8d 0d PCRel4(%some_gv-4) 00000000 - ; asm: lea 0x0(%rip), %rsi - [-,%rsi] v7 = symbol_value.i64 gv1 ; bin: 48 8d 35 PCRel4(%some_gv-4) 00000000 - ; asm: lea 0x0(%rip), %r10 - [-,%r10] v8 = symbol_value.i64 gv1 ; bin: 4c 8d 15 PCRel4(%some_gv-4) 00000000 - - return -} diff --git a/cranelift/filetests/filetests/isa/x86/binary64.clif b/cranelift/filetests/filetests/isa/x86/binary64.clif deleted file mode 100644 index c5e1cf5099..0000000000 --- a/cranelift/filetests/filetests/isa/x86/binary64.clif +++ /dev/null @@ -1,1692 +0,0 @@ -; binary emission of x86-64 code. -test binemit -set opt_level=speed_and_size -target x86_64 legacy haswell - -; The binary encodings can be verified with the command: -; -; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/binary64.clif | llvm-mc -show-encoding -triple=x86_64 -; - -; Tests for i64 instructions. -function %I64() { - sig0 = () - fn0 = %foo() - fn1 = colocated %bar() - - gv0 = symbol %some_gv - - ; Use incoming_arg stack slots because they won't be relocated by the frame - ; layout. - ss0 = incoming_arg 8, offset 0 - ss1 = incoming_arg 1024, offset -1024 - ss2 = incoming_arg 1024, offset -2048 - ss3 = incoming_arg 8, offset -2056 - -block0: - - ; Integer Constants. - - ; asm: movq $0x01020304f1f2f3f4, %rcx - [-,%rcx] v1 = iconst.i64 0x0102_0304_f1f2_f3f4 ; bin: 48 b9 01020304f1f2f3f4 - ; asm: movq $0x11020304f1f2f3f4, %rsi - [-,%rsi] v2 = iconst.i64 0x1102_0304_f1f2_f3f4 ; bin: 48 be 11020304f1f2f3f4 - ; asm: movq $0x21020304f1f2f3f4, %r10 - [-,%r10] v3 = iconst.i64 0x2102_0304_f1f2_f3f4 ; bin: 49 ba 21020304f1f2f3f4 - ; asm: movl $0xff001122, %r8d # 32-bit zero-extended constant. - [-,%r8] v4 = iconst.i64 0xff00_1122 ; bin: 41 b8 ff001122 - ; asm: movq $0xffffffff88001122, %r14 # 32-bit sign-extended constant. - [-,%r14] v5 = iconst.i64 0xffff_ffff_8800_1122 ; bin: 49 c7 c6 88001122 - - ; asm: movb $1, %cl - [-,%rcx] v9007 = bconst.b1 true ; bin: b9 00000001 - ; asm: movb $1, %sil - [-,%r10] v9008 = bconst.b1 true ; bin: 41 ba 00000001 - - ; Integer Register Operations. - - ; asm: notq %rcx - [-,%rcx] v4000 = bnot v1 ; bin: 48 f7 d1 - ; asm: notq %rsi - [-,%rsi] v4001 = bnot v2 ; bin: 48 f7 d6 - ; asm: notq %r10 - [-,%r10] v4002 = bnot v3 ; bin: 49 f7 d2 - - ; Integer Register-Register Operations. - - ; asm: addq %rsi, %rcx - [-,%rcx] v10 = iadd v1, v2 ; bin: 48 01 f1 - ; asm: addq %r10, %rsi - [-,%rsi] v11 = iadd v2, v3 ; bin: 4c 01 d6 - ; asm: addq %rcx, %r10 - [-,%r10] v12 = iadd v3, v1 ; bin: 49 01 ca - - ; asm: subq %rsi, %rcx - [-,%rcx] v20 = isub v1, v2 ; bin: 48 29 f1 - ; asm: subq %r10, %rsi - [-,%rsi] v21 = isub v2, v3 ; bin: 4c 29 d6 - ; asm: subq %rcx, %r10 - [-,%r10] v22 = isub v3, v1 ; bin: 49 29 ca - - ; asm: andq %rsi, %rcx - [-,%rcx] v30 = band v1, v2 ; bin: 48 21 f1 - ; asm: andq %r10, %rsi - [-,%rsi] v31 = band v2, v3 ; bin: 4c 21 d6 - ; asm: andq %rcx, %r10 - [-,%r10] v32 = band v3, v1 ; bin: 49 21 ca - - ; asm: orq %rsi, %rcx - [-,%rcx] v40 = bor v1, v2 ; bin: 48 09 f1 - ; asm: orq %r10, %rsi - [-,%rsi] v41 = bor v2, v3 ; bin: 4c 09 d6 - ; asm: orq %rcx, %r10 - [-,%r10] v42 = bor v3, v1 ; bin: 49 09 ca - - ; asm: xorq %rsi, %rcx - [-,%rcx] v50 = bxor v1, v2 ; bin: 48 31 f1 - ; asm: xorq %r10, %rsi - [-,%rsi] v51 = bxor v2, v3 ; bin: 4c 31 d6 - ; asm: xorq %rcx, %r10 - [-,%r10] v52 = bxor v3, v1 ; bin: 49 31 ca - - ; asm: shlq %cl, %rsi - [-,%rsi] v60 = ishl v2, v1 ; bin: 48 d3 e6 - ; asm: shlq %cl, %r10 - [-,%r10] v61 = ishl v3, v1 ; bin: 49 d3 e2 - ; asm: sarq %cl, %rsi - [-,%rsi] v62 = sshr v2, v1 ; bin: 48 d3 fe - ; asm: sarq %cl, %r10 - [-,%r10] v63 = sshr v3, v1 ; bin: 49 d3 fa - ; asm: shrq %cl, %rsi - [-,%rsi] v64 = ushr v2, v1 ; bin: 48 d3 ee - ; asm: shrq %cl, %r10 - [-,%r10] v65 = ushr v3, v1 ; bin: 49 d3 ea - - ; asm: rolq %cl, %rsi - [-,%rsi] v66 = rotl v2, v1 ; bin: 48 d3 c6 - ; asm: rolq %cl, %r10 - [-,%r10] v67 = rotl v3, v1 ; bin: 49 d3 c2 - ; asm: rorq %cl, %rsi - [-,%rsi] v68 = rotr v2, v1 ; bin: 48 d3 ce - ; asm: rorq %cl, %r10 - [-,%r10] v69 = rotr v3, v1 ; bin: 49 d3 ca - - ; Integer Register-Immediate Operations. - ; These 64-bit ops all use a 32-bit immediate that is sign-extended to 64 bits. - ; Some take 8-bit immediates that are sign-extended to 64 bits. - - ; asm: addq $-100000, %rcx - [-,%rcx] v70 = iadd_imm v1, -100000 ; bin: 48 81 c1 fffe7960 - ; asm: addq $100000, %rsi - [-,%rsi] v71 = iadd_imm v2, 100000 ; bin: 48 81 c6 000186a0 - ; asm: addq $0x7fffffff, %r10 - [-,%r10] v72 = iadd_imm v3, 0x7fff_ffff ; bin: 49 81 c2 7fffffff - ; asm: addq $100, %r8 - [-,%r8] v73 = iadd_imm v4, 100 ; bin: 49 83 c0 64 - ; asm: addq $-100, %r14 - [-,%r14] v74 = iadd_imm v5, -100 ; bin: 49 83 c6 9c - - ; asm: andq $-100000, %rcx - [-,%rcx] v80 = band_imm v1, -100000 ; bin: 48 81 e1 fffe7960 - ; asm: andq $100000, %rsi - [-,%rsi] v81 = band_imm v2, 100000 ; bin: 48 81 e6 000186a0 - ; asm: andq $0x7fffffff, %r10 - [-,%r10] v82 = band_imm v3, 0x7fff_ffff ; bin: 49 81 e2 7fffffff - ; asm: andq $100, %r8 - [-,%r8] v83 = band_imm v4, 100 ; bin: 49 83 e0 64 - ; asm: andq $-100, %r14 - [-,%r14] v84 = band_imm v5, -100 ; bin: 49 83 e6 9c - - ; asm: orq $-100000, %rcx - [-,%rcx] v90 = bor_imm v1, -100000 ; bin: 48 81 c9 fffe7960 - ; asm: orq $100000, %rsi - [-,%rsi] v91 = bor_imm v2, 100000 ; bin: 48 81 ce 000186a0 - ; asm: orq $0x7fffffff, %r10 - [-,%r10] v92 = bor_imm v3, 0x7fff_ffff ; bin: 49 81 ca 7fffffff - ; asm: orq $100, %r8 - [-,%r8] v93 = bor_imm v4, 100 ; bin: 49 83 c8 64 - ; asm: orq $-100, %r14 - [-,%r14] v94 = bor_imm v5, -100 ; bin: 49 83 ce 9c - ; asm: ret - - ; asm: xorq $-100000, %rcx - [-,%rcx] v100 = bxor_imm v1, -100000 ; bin: 48 81 f1 fffe7960 - ; asm: xorq $100000, %rsi - [-,%rsi] v101 = bxor_imm v2, 100000 ; bin: 48 81 f6 000186a0 - ; asm: xorq $0x7fffffff, %r10 - [-,%r10] v102 = bxor_imm v3, 0x7fff_ffff ; bin: 49 81 f2 7fffffff - ; asm: xorq $100, %r8 - [-,%r8] v103 = bxor_imm v4, 100 ; bin: 49 83 f0 64 - ; asm: xorq $-100, %r14 - [-,%r14] v104 = bxor_imm v5, -100 ; bin: 49 83 f6 9c - - ; Register copies. - - ; asm: movq %rsi, %rcx - [-,%rcx] v110 = copy v2 ; bin: 48 89 f1 - ; asm: movq %r10, %rsi - [-,%rsi] v111 = copy v3 ; bin: 4c 89 d6 - ; asm: movq %rcx, %r10 - [-,%r10] v112 = copy v1 ; bin: 49 89 ca - - ; Copy Special - ; asm: movq %rsp, %rbp - copy_special %rsp -> %rbp ; bin: 48 89 e5 - ; asm: movq %r10, %r11 - copy_special %r10 -> %r11 ; bin: 4d 89 d3 - ; asm: movq %rsp, %r11 - copy_special %rsp -> %r11 ; bin: 49 89 e3 - ; asm: movq %r10, %rsp - copy_special %r10 -> %rsp ; bin: 4c 89 d4 - - ; Copy to SSA - - ; asm: movq %rax, %r15 - [-,%r15] v700 = copy_to_ssa.i64 %rax ; bin: 49 89 c7 - ; asm: movq %r15, %rax - [-,%rax] v701 = copy_to_ssa.i64 %r15 ; bin: 4c 89 f8 - ; asm: movq %rdi, %rsi - [-,%rsi] v702 = copy_to_ssa.i64 %rdi ; bin: 48 89 fe - ; asm: movq %r11, %r14 - [-,%r14] v703 = copy_to_ssa.i64 %r11 ; bin: 4d 89 de - - ; asm: movl %eax, %r15d - [-,%r15] v704 = copy_to_ssa.i32 %rax ; bin: 41 89 c7 - ; asm: movl %r15d, %eax - [-,%rax] v705 = copy_to_ssa.i32 %r15 ; bin: 44 89 f8 - ; asm: movl %edi, %esi. Unfortunately we get a redundant REX prefix. - [-,%rsi] v706 = copy_to_ssa.i32 %rdi ; bin: 40 89 fe - ; asm: movl %r11, %r14 - [-,%r14] v707 = copy_to_ssa.i32 %r11 ; bin: 45 89 de - - ; Load/Store instructions. - - ; Register indirect addressing with no displacement. - - ; asm: movq %rcx, (%r10) - store v1, v3 ; bin: heap_oob 49 89 0a - ; asm: movq %r10, (%rcx) - store v3, v1 ; bin: heap_oob 4c 89 11 - ; asm: movl %ecx, (%r10) - istore32 v1, v3 ; bin: heap_oob 41 89 0a - ; asm: movl %r10d, (%rcx) - istore32 v3, v1 ; bin: heap_oob 44 89 11 - ; asm: movw %cx, (%r10) - istore16 v1, v3 ; bin: heap_oob 66 41 89 0a - ; asm: movw %r10w, (%rcx) - istore16 v3, v1 ; bin: heap_oob 66 44 89 11 - ; asm: movb %cl, (%r10) - istore8 v1, v3 ; bin: heap_oob 41 88 0a - ; asm: movb %r10b, (%rcx) - istore8 v3, v1 ; bin: heap_oob 44 88 11 - - ; asm: movq (%rcx), %r14 - [-,%r14] v120 = load.i64 v1 ; bin: heap_oob 4c 8b 31 - ; asm: movq (%r10), %rdx - [-,%rdx] v121 = load.i64 v3 ; bin: heap_oob 49 8b 12 - ; asm: movl (%rcx), %r14d - [-,%r14] v122 = uload32.i64 v1 ; bin: heap_oob 44 8b 31 - ; asm: movl (%r10), %edx - [-,%rdx] v123 = uload32.i64 v3 ; bin: heap_oob 41 8b 12 - ; asm: movslq (%rcx), %r14 - [-,%r14] v124 = sload32.i64 v1 ; bin: heap_oob 4c 63 31 - ; asm: movslq (%r10), %rdx - [-,%rdx] v125 = sload32.i64 v3 ; bin: heap_oob 49 63 12 - ; asm: movzwq (%rcx), %r14 - [-,%r14] v126 = uload16.i64 v1 ; bin: heap_oob 4c 0f b7 31 - ; asm: movzwq (%r10), %rdx - [-,%rdx] v127 = uload16.i64 v3 ; bin: heap_oob 49 0f b7 12 - ; asm: movswq (%rcx), %r14 - [-,%r14] v128 = sload16.i64 v1 ; bin: heap_oob 4c 0f bf 31 - ; asm: movswq (%r10), %rdx - [-,%rdx] v129 = sload16.i64 v3 ; bin: heap_oob 49 0f bf 12 - ; asm: movzbq (%rcx), %r14 - [-,%r14] v130 = uload8.i64 v1 ; bin: heap_oob 4c 0f b6 31 - ; asm: movzbq (%r10), %rdx - [-,%rdx] v131 = uload8.i64 v3 ; bin: heap_oob 49 0f b6 12 - ; asm: movsbq (%rcx), %r14 - [-,%r14] v132 = sload8.i64 v1 ; bin: heap_oob 4c 0f be 31 - ; asm: movsbq (%r10), %rdx - [-,%rdx] v133 = sload8.i64 v3 ; bin: heap_oob 49 0f be 12 - - ; Register-indirect with 8-bit signed displacement. - - ; asm: movq %rcx, 100(%r10) - store v1, v3+100 ; bin: heap_oob 49 89 4a 64 - ; asm: movq %r10, -100(%rcx) - store v3, v1-100 ; bin: heap_oob 4c 89 51 9c - ; asm: movl %ecx, 100(%r10) - istore32 v1, v3+100 ; bin: heap_oob 41 89 4a 64 - ; asm: movl %r10d, -100(%rcx) - istore32 v3, v1-100 ; bin: heap_oob 44 89 51 9c - ; asm: movw %cx, 100(%r10) - istore16 v1, v3+100 ; bin: heap_oob 66 41 89 4a 64 - ; asm: movw %r10w, -100(%rcx) - istore16 v3, v1-100 ; bin: heap_oob 66 44 89 51 9c - ; asm: movb %cl, 100(%r10) - istore8 v1, v3+100 ; bin: heap_oob 41 88 4a 64 - ; asm: movb %r10b, 100(%rcx) - istore8 v3, v1+100 ; bin: heap_oob 44 88 51 64 - - ; asm: movq 50(%rcx), %r10 - [-,%r10] v140 = load.i64 v1+50 ; bin: heap_oob 4c 8b 51 32 - ; asm: movq -50(%r10), %rdx - [-,%rdx] v141 = load.i64 v3-50 ; bin: heap_oob 49 8b 52 ce - ; asm: movl 50(%rcx), %edi - [-,%rdi] v142 = uload32.i64 v1+50 ; bin: heap_oob 8b 79 32 - ; asm: movl -50(%rsi), %edx - [-,%rdx] v143 = uload32.i64 v2-50 ; bin: heap_oob 8b 56 ce - ; asm: movslq 50(%rcx), %rdi - [-,%rdi] v144 = sload32.i64 v1+50 ; bin: heap_oob 48 63 79 32 - ; asm: movslq -50(%rsi), %rdx - [-,%rdx] v145 = sload32.i64 v2-50 ; bin: heap_oob 48 63 56 ce - ; asm: movzwq 50(%rcx), %rdi - [-,%rdi] v146 = uload16.i64 v1+50 ; bin: heap_oob 48 0f b7 79 32 - ; asm: movzwq -50(%rsi), %rdx - [-,%rdx] v147 = uload16.i64 v2-50 ; bin: heap_oob 48 0f b7 56 ce - ; asm: movswq 50(%rcx), %rdi - [-,%rdi] v148 = sload16.i64 v1+50 ; bin: heap_oob 48 0f bf 79 32 - ; asm: movswq -50(%rsi), %rdx - [-,%rdx] v149 = sload16.i64 v2-50 ; bin: heap_oob 48 0f bf 56 ce - ; asm: movzbq 50(%rcx), %rdi - [-,%rdi] v150 = uload8.i64 v1+50 ; bin: heap_oob 48 0f b6 79 32 - ; asm: movzbq -50(%rsi), %rdx - [-,%rdx] v151 = uload8.i64 v2-50 ; bin: heap_oob 48 0f b6 56 ce - ; asm: movsbq 50(%rcx), %rdi - [-,%rdi] v152 = sload8.i64 v1+50 ; bin: heap_oob 48 0f be 79 32 - ; asm: movsbq -50(%rsi), %rdx - [-,%rdx] v153 = sload8.i64 v2-50 ; bin: heap_oob 48 0f be 56 ce - - ; Register-indirect with 32-bit signed displacement. - - ; asm: movq %rcx, 10000(%r10) - store v1, v3+10000 ; bin: heap_oob 49 89 8a 00002710 - ; asm: movq %r10, -10000(%rcx) - store v3, v1-10000 ; bin: heap_oob 4c 89 91 ffffd8f0 - ; asm: movl %ecx, 10000(%rsi) - istore32 v1, v2+10000 ; bin: heap_oob 89 8e 00002710 - ; asm: movl %esi, -10000(%rcx) - istore32 v2, v1-10000 ; bin: heap_oob 89 b1 ffffd8f0 - ; asm: movw %cx, 10000(%rsi) - istore16 v1, v2+10000 ; bin: heap_oob 66 89 8e 00002710 - ; asm: movw %si, -10000(%rcx) - istore16 v2, v1-10000 ; bin: heap_oob 66 89 b1 ffffd8f0 - ; asm: movb %cl, 10000(%rsi) - istore8 v1, v2+10000 ; bin: heap_oob 88 8e 00002710 - ; asm: movb %sil, 10000(%rcx) - istore8 v2, v1+10000 ; bin: heap_oob 40 88 b1 00002710 - - ; asm: movq 50000(%rcx), %r10 - [-,%r10] v160 = load.i64 v1+50000 ; bin: heap_oob 4c 8b 91 0000c350 - ; asm: movq -50000(%r10), %rdx - [-,%rdx] v161 = load.i64 v3-50000 ; bin: heap_oob 49 8b 92 ffff3cb0 - ; asm: movl 50000(%rcx), %edi - [-,%rdi] v162 = uload32.i64 v1+50000 ; bin: heap_oob 8b b9 0000c350 - ; asm: movl -50000(%rsi), %edx - [-,%rdx] v163 = uload32.i64 v2-50000 ; bin: heap_oob 8b 96 ffff3cb0 - ; asm: movslq 50000(%rcx), %rdi - [-,%rdi] v164 = sload32.i64 v1+50000 ; bin: heap_oob 48 63 b9 0000c350 - ; asm: movslq -50000(%rsi), %rdx - [-,%rdx] v165 = sload32.i64 v2-50000 ; bin: heap_oob 48 63 96 ffff3cb0 - ; asm: movzwq 50000(%rcx), %rdi - [-,%rdi] v166 = uload16.i64 v1+50000 ; bin: heap_oob 48 0f b7 b9 0000c350 - ; asm: movzwq -50000(%rsi), %rdx - [-,%rdx] v167 = uload16.i64 v2-50000 ; bin: heap_oob 48 0f b7 96 ffff3cb0 - ; asm: movswq 50000(%rcx), %rdi - [-,%rdi] v168 = sload16.i64 v1+50000 ; bin: heap_oob 48 0f bf b9 0000c350 - ; asm: movswq -50000(%rsi), %rdx - [-,%rdx] v169 = sload16.i64 v2-50000 ; bin: heap_oob 48 0f bf 96 ffff3cb0 - ; asm: movzbq 50000(%rcx), %rdi - [-,%rdi] v170 = uload8.i64 v1+50000 ; bin: heap_oob 48 0f b6 b9 0000c350 - ; asm: movzbq -50000(%rsi), %rdx - [-,%rdx] v171 = uload8.i64 v2-50000 ; bin: heap_oob 48 0f b6 96 ffff3cb0 - ; asm: movsbq 50000(%rcx), %rdi - [-,%rdi] v172 = sload8.i64 v1+50000 ; bin: heap_oob 48 0f be b9 0000c350 - ; asm: movsbq -50000(%rsi), %rdx - [-,%rdx] v173 = sload8.i64 v2-50000 ; bin: heap_oob 48 0f be 96 ffff3cb0 - - - ; More arithmetic. - - ; asm: imulq %rsi, %rcx - [-,%rcx] v180 = imul v1, v2 ; bin: 48 0f af ce - ; asm: imulq %r10, %rsi - [-,%rsi] v181 = imul v2, v3 ; bin: 49 0f af f2 - ; asm: imulq %rcx, %r10 - [-,%r10] v182 = imul v3, v1 ; bin: 4c 0f af d1 - - [-,%rax] v190 = iconst.i64 1 - [-,%rdx] v191 = iconst.i64 2 - ; asm: idivq %rcx - [-,%rax,%rdx] v192, v193 = x86_sdivmodx v190, v191, v1 ; bin: int_divz 48 f7 f9 - ; asm: idivq %rsi - [-,%rax,%rdx] v194, v195 = x86_sdivmodx v190, v191, v2 ; bin: int_divz 48 f7 fe - ; asm: idivq %r10 - [-,%rax,%rdx] v196, v197 = x86_sdivmodx v190, v191, v3 ; bin: int_divz 49 f7 fa - ; asm: divq %rcx - [-,%rax,%rdx] v198, v199 = x86_udivmodx v190, v191, v1 ; bin: int_divz 48 f7 f1 - ; asm: divq %rsi - [-,%rax,%rdx] v200, v201 = x86_udivmodx v190, v191, v2 ; bin: int_divz 48 f7 f6 - ; asm: divq %r10 - [-,%rax,%rdx] v202, v203 = x86_udivmodx v190, v191, v3 ; bin: int_divz 49 f7 f2 - - ; double-length multiply instructions, 64 bit - [-,%rax] v1001 = iconst.i64 1 - [-,%r15] v1002 = iconst.i64 2 - ; asm: mulq %r15 - [-,%rax,%rdx] v1003, v1004 = x86_umulx v1001, v1002 ; bin: 49 f7 e7 - ; asm: imulq %r15 - [-,%rax,%rdx] v1005, v1006 = x86_smulx v1001, v1002 ; bin: 49 f7 ef - - ; double-length multiply instructions, 32 bit - [-,%rax] v1011 = iconst.i32 1 - [-,%r15] v1012 = iconst.i32 2 - [-,%rcx] v1017 = iconst.i32 3 - ; asm: mull %r15d - [-,%rax,%rdx] v1013, v1014 = x86_umulx v1011, v1012 ; bin: 41 f7 e7 - ; asm: imull %r15d - [-,%rax,%rdx] v1015, v1016 = x86_smulx v1011, v1012 ; bin: 41 f7 ef - - ; asm: mull %ecx - [-,%rax,%rdx] v1018, v1019 = x86_umulx v1011, v1017 ; bin: f7 e1 - ; asm: imull %ecx - [-,%rax,%rdx] v1020, v1021 = x86_smulx v1011, v1017 ; bin: f7 e9 - - ; Bit-counting instructions. - - ; asm: popcntq %rsi, %rcx - [-,%rcx] v210 = popcnt v2 ; bin: f3 48 0f b8 ce - ; asm: popcntq %r10, %rsi - [-,%rsi] v211 = popcnt v3 ; bin: f3 49 0f b8 f2 - ; asm: popcntq %rcx, %r10 - [-,%r10] v212 = popcnt v1 ; bin: f3 4c 0f b8 d1 - - ; asm: lzcntq %rsi, %rcx - [-,%rcx] v213 = clz v2 ; bin: f3 48 0f bd ce - ; asm: lzcntq %r10, %rsi - [-,%rsi] v214 = clz v3 ; bin: f3 49 0f bd f2 - ; asm: lzcntq %rcx, %r10 - [-,%r10] v215 = clz v1 ; bin: f3 4c 0f bd d1 - - ; asm: tzcntq %rsi, %rcx - [-,%rcx] v216 = ctz v2 ; bin: f3 48 0f bc ce - ; asm: tzcntq %r10, %rsi - [-,%rsi] v217 = ctz v3 ; bin: f3 49 0f bc f2 - ; asm: tzcntq %rcx, %r10 - [-,%r10] v218 = ctz v1 ; bin: f3 4c 0f bc d1 - - ; Integer comparisons. - - ; asm: cmpq %rsi, %rcx - ; asm: sete %bl - [-,%rbx] v300 = icmp eq v1, v2 ; bin: 48 39 f1 0f 94 c3 - ; asm: cmpq %r10, %rsi - ; asm: sete %dl - [-,%rdx] v301 = icmp eq v2, v3 ; bin: 4c 39 d6 0f 94 c2 - - ; asm: cmpq %rsi, %rcx - ; asm: setne %bl - [-,%rbx] v302 = icmp ne v1, v2 ; bin: 48 39 f1 0f 95 c3 - ; asm: cmpq %r10, %rsi - ; asm: setne %dl - [-,%rdx] v303 = icmp ne v2, v3 ; bin: 4c 39 d6 0f 95 c2 - - ; asm: cmpq %rsi, %rcx - ; asm: setl %bl - [-,%rbx] v304 = icmp slt v1, v2 ; bin: 48 39 f1 0f 9c c3 - ; asm: cmpq %r10, %rsi - ; asm: setl %dl - [-,%rdx] v305 = icmp slt v2, v3 ; bin: 4c 39 d6 0f 9c c2 - - ; asm: cmpq %rsi, %rcx - ; asm: setge %bl - [-,%rbx] v306 = icmp sge v1, v2 ; bin: 48 39 f1 0f 9d c3 - ; asm: cmpq %r10, %rsi - ; asm: setge %dl - [-,%rdx] v307 = icmp sge v2, v3 ; bin: 4c 39 d6 0f 9d c2 - - ; asm: cmpq %rsi, %rcx - ; asm: setg %bl - [-,%rbx] v308 = icmp sgt v1, v2 ; bin: 48 39 f1 0f 9f c3 - ; asm: cmpq %r10, %rsi - ; asm: setg %dl - [-,%rdx] v309 = icmp sgt v2, v3 ; bin: 4c 39 d6 0f 9f c2 - - ; asm: cmpq %rsi, %rcx - ; asm: setle %bl - [-,%rbx] v310 = icmp sle v1, v2 ; bin: 48 39 f1 0f 9e c3 - ; asm: cmpq %r10, %rsi - ; asm: setle %dl - [-,%rdx] v311 = icmp sle v2, v3 ; bin: 4c 39 d6 0f 9e c2 - - ; asm: cmpq %rsi, %rcx - ; asm: setb %bl - [-,%rbx] v312 = icmp ult v1, v2 ; bin: 48 39 f1 0f 92 c3 - ; asm: cmpq %r10, %rsi - ; asm: setb %dl - [-,%rdx] v313 = icmp ult v2, v3 ; bin: 4c 39 d6 0f 92 c2 - - ; asm: cmpq %rsi, %rcx - ; asm: setae %bl - [-,%rbx] v314 = icmp uge v1, v2 ; bin: 48 39 f1 0f 93 c3 - ; asm: cmpq %r10, %rsi - ; asm: setae %dl - [-,%rdx] v315 = icmp uge v2, v3 ; bin: 4c 39 d6 0f 93 c2 - - ; asm: cmpq %rsi, %rcx - ; asm: seta %bl - [-,%rbx] v316 = icmp ugt v1, v2 ; bin: 48 39 f1 0f 97 c3 - ; asm: cmpq %r10, %rsi - ; asm: seta %dl - [-,%rdx] v317 = icmp ugt v2, v3 ; bin: 4c 39 d6 0f 97 c2 - - ; asm: cmpq %rsi, %rcx - ; asm: setbe %bl - [-,%rbx] v318 = icmp ule v1, v2 ; bin: 48 39 f1 0f 96 c3 - ; asm: cmpq %r10, %rsi - ; asm: setbe %dl - [-,%rdx] v319 = icmp ule v2, v3 ; bin: 4c 39 d6 0f 96 c2 - - ; asm: cmpq $37, %rcx - ; asm: setl %bl - [-,%rbx] v320 = icmp_imm slt v1, 37 ; bin: 48 83 f9 25 0f 9c c3 - - ; asm: cmpq $100000, %rcx - ; asm: setl %bl - [-,%rbx] v321 = icmp_imm slt v1, 100000 ; bin: 48 81 f9 000186a0 0f 9c c3 - - ; Bool-to-int conversions. - - ; asm: movzbq %bl, %rcx - [-,%rcx] v350 = bint.i64 v300 ; bin: 0f b6 cb - ; asm: movzbq %dl, %rsi - [-,%rsi] v351 = bint.i64 v301 ; bin: 0f b6 f2 - - ; Colocated functions. - - ; asm: call bar - call fn1() ; bin: stk_ovf e8 CallPCRel4(%bar-4) 00000000 - - ; asm: lea 0x0(%rip), %rcx - [-,%rcx] v400 = func_addr.i64 fn1 ; bin: 48 8d 0d PCRel4(%bar-4) 00000000 - ; asm: lea 0x0(%rip), %rsi - [-,%rsi] v401 = func_addr.i64 fn1 ; bin: 48 8d 35 PCRel4(%bar-4) 00000000 - ; asm: lea 0x0(%rip), %r10 - [-,%r10] v402 = func_addr.i64 fn1 ; bin: 4c 8d 15 PCRel4(%bar-4) 00000000 - - ; asm: call *%rcx - call_indirect sig0, v400() ; bin: stk_ovf ff d1 - ; asm: call *%rsi - call_indirect sig0, v401() ; bin: stk_ovf ff d6 - ; asm: call *%r10 - call_indirect sig0, v402() ; bin: stk_ovf 41 ff d2 - - ; Non-colocated functions. Note that there is no non-colocated non-PIC call. - - ; asm: movabsq $0, %rcx - [-,%rcx] v410 = func_addr.i64 fn0 ; bin: 48 b9 Abs8(%foo) 0000000000000000 - ; asm: movabsq $0, %rsi - [-,%rsi] v411 = func_addr.i64 fn0 ; bin: 48 be Abs8(%foo) 0000000000000000 - ; asm: movabsq $0, %r10 - [-,%r10] v412 = func_addr.i64 fn0 ; bin: 49 ba Abs8(%foo) 0000000000000000 - - ; asm: call *%rcx - call_indirect sig0, v410() ; bin: stk_ovf ff d1 - ; asm: call *%rsi - call_indirect sig0, v411() ; bin: stk_ovf ff d6 - ; asm: call *%r10 - call_indirect sig0, v412() ; bin: stk_ovf 41 ff d2 - - ; asm: movabsq $-1, %rcx - [-,%rcx] v450 = symbol_value.i64 gv0 ; bin: 48 b9 Abs8(%some_gv) 0000000000000000 - ; asm: movabsq $-1, %rsi - [-,%rsi] v451 = symbol_value.i64 gv0 ; bin: 48 be Abs8(%some_gv) 0000000000000000 - ; asm: movabsq $-1, %r10 - [-,%r10] v452 = symbol_value.i64 gv0 ; bin: 49 ba Abs8(%some_gv) 0000000000000000 - - ; Spill / Fill. - - ; asm: movq %rcx, 1032(%rsp) - [-,ss1] v500 = spill v1 ; bin: stk_ovf 48 89 8c 24 00000408 - ; asm: movq %rsi, 1032(%rsp) - [-,ss1] v501 = spill v2 ; bin: stk_ovf 48 89 b4 24 00000408 - ; asm: movq %r10, 1032(%rsp) - [-,ss1] v502 = spill v3 ; bin: stk_ovf 4c 89 94 24 00000408 - - ; asm: movq 1032(%rsp), %rcx - [-,%rcx] v510 = fill v500 ; bin: 48 8b 8c 24 00000408 - ; asm: movq 1032(%rsp), %rsi - [-,%rsi] v511 = fill v501 ; bin: 48 8b b4 24 00000408 - ; asm: movq 1032(%rsp), %r10 - [-,%r10] v512 = fill v502 ; bin: 4c 8b 94 24 00000408 - - ; asm: movq %rcx, 1032(%rsp) - regspill v1, %rcx -> ss1 ; bin: stk_ovf 48 89 8c 24 00000408 - ; asm: movq 1032(%rsp), %rcx - regfill v1, ss1 -> %rcx ; bin: 48 8b 8c 24 00000408 - - ; Push and Pop - ; asm: pushq %rcx - x86_push v1 ; bin: stk_ovf 51 - ; asm: pushq %r10 - x86_push v3 ; bin: stk_ovf 41 52 - ; asm: popq %rcx - [-,%rcx] v513 = x86_pop.i64 ; bin: 59 - ; asm: popq %r10 - [-,%r10] v514 = x86_pop.i64 ; bin: 41 5a - - ; Adjust Stack Pointer Up - ; asm: addq $64, %rsp - adjust_sp_up_imm 64 ; bin: 48 83 c4 40 - ; asm: addq $-64, %rsp - adjust_sp_up_imm -64 ; bin: 48 83 c4 c0 - ; asm: addq $1024, %rsp - adjust_sp_up_imm 1024 ; bin: 48 81 c4 00000400 - ; asm: addq $-1024, %rsp - adjust_sp_up_imm -1024 ; bin: 48 81 c4 fffffc00 - ; asm: addq $2147483647, %rsp - adjust_sp_up_imm 2147483647 ; bin: 48 81 c4 7fffffff - ; asm: addq $-2147483648, %rsp - adjust_sp_up_imm -2147483648 ; bin: 48 81 c4 80000000 - - ; Adjust Stack Pointer Down - ; asm: subq %rcx, %rsp - adjust_sp_down v1 ; bin: 48 29 cc - ; asm: subq %r10, %rsp - adjust_sp_down v3 ; bin: 4c 29 d4 - ; asm: subq $64, %rsp - adjust_sp_down_imm 64 ; bin: 48 83 ec 40 - ; asm: subq $-64, %rsp - adjust_sp_down_imm -64 ; bin: 48 83 ec c0 - ; asm: subq $1024, %rsp - adjust_sp_down_imm 1024 ; bin: 48 81 ec 00000400 - ; asm: subq $-1024, %rsp - adjust_sp_down_imm -1024 ; bin: 48 81 ec fffffc00 - ; asm: subq $2147483647, %rsp - adjust_sp_down_imm 2147483647 ; bin: 48 81 ec 7fffffff - ; asm: subq $-2147483648, %rsp - adjust_sp_down_imm -2147483648 ; bin: 48 81 ec 80000000 - - ; Shift immediates - ; asm: shlq $12, %rsi - [-,%rsi] v515 = ishl_imm v2, 12 ; bin: 48 c1 e6 0c - ; asm: shlq $13, %r8 - [-,%r8] v516 = ishl_imm v4, 13 ; bin: 49 c1 e0 0d - ; asm: sarq $32, %rsi - [-,%rsi] v517 = sshr_imm v2, 32 ; bin: 48 c1 fe 20 - ; asm: sarq $33, %r8 - [-,%r8] v518 = sshr_imm v4, 33 ; bin: 49 c1 f8 21 - ; asm: shrq $62, %rsi - [-,%rsi] v519 = ushr_imm v2, 62 ; bin: 48 c1 ee 3e - ; asm: shrq $63, %r8 - [-,%r8] v520 = ushr_imm v4, 63 ; bin: 49 c1 e8 3f - - - ; Rotate immediates - ; asm: rolq $12, %rsi - [-,%rsi] v5101 = rotl_imm v2, 12 ; bin: 48 c1 c6 0c - ; asm: rolq $13, %r8 - [-,%r8] v5102 = rotl_imm v4, 13 ; bin: 49 c1 c0 0d - ; asm: rorq $32, %rsi - [-,%rsi] v5103 = rotr_imm v2, 32 ; bin: 48 c1 ce 20 - ; asm: rorq $33, %r8 - [-,%r8] v5104 = rotr_imm v4, 33 ; bin: 49 c1 c8 21 - - - ; Load Complex - [-,%rax] v521 = iconst.i64 1 - [-,%rbx] v522 = iconst.i64 1 - [-,%rdi] v523 = iconst.i32 1 - [-,%rsi] v524 = iconst.i32 1 - ; asm: movq (%rax,%rbx,1), %rcx - [-,%rcx] v525 = load_complex.i64 v521+v522 ; bin: heap_oob 48 8b 0c 18 - ; asm: movl (%rax,%rbx,1), %ecx - [-,%rcx] v526 = load_complex.i32 v521+v522 ; bin: heap_oob 8b 0c 18 - ; asm: movq 1(%rax,%rbx,1), %rcx - [-,%rcx] v527 = load_complex.i64 v521+v522+1 ; bin: heap_oob 48 8b 4c 18 01 - ; asm: movl 1(%rax,%rbx,1), %ecx - [-,%rcx] v528 = load_complex.i32 v521+v522+1 ; bin: heap_oob 8b 4c 18 01 - ; asm: mov 0x100000(%rax,%rbx,1),%rcx - [-,%rcx] v529 = load_complex.i64 v521+v522+0x1000 ; bin: heap_oob 48 8b 8c 18 00001000 - ; asm: mov 0x100000(%rax,%rbx,1),%ecx - [-,%rcx] v530 = load_complex.i32 v521+v522+0x1000 ; bin: heap_oob 8b 8c 18 00001000 - ; asm: movzbq (%rax,%rbx,1),%rcx - [-,%rcx] v531 = uload8_complex.i64 v521+v522 ; bin: heap_oob 48 0f b6 0c 18 - ; asm: movzbl (%rax,%rbx,1),%ecx - [-,%rcx] v532 = uload8_complex.i32 v521+v522 ; bin: heap_oob 0f b6 0c 18 - ; asm: movsbq (%rax,%rbx,1),%rcx - [-,%rcx] v533 = sload8_complex.i64 v521+v522 ; bin: heap_oob 48 0f be 0c 18 - ; asm: movsbl (%rax,%rbx,1),%ecx - [-,%rcx] v534 = sload8_complex.i32 v521+v522 ; bin: heap_oob 0f be 0c 18 - ; asm: movzwq (%rax,%rbx,1),%rcx - [-,%rcx] v535 = uload16_complex.i64 v521+v522 ; bin: heap_oob 48 0f b7 0c 18 - ; asm: movzwl (%rax,%rbx,1),%ecx - [-,%rcx] v536 = uload16_complex.i32 v521+v522 ; bin: heap_oob 0f b7 0c 18 - ; asm: movswq (%rax,%rbx,1),%rcx - [-,%rcx] v537 = sload16_complex.i64 v521+v522 ; bin: heap_oob 48 0f bf 0c 18 - ; asm: movswl (%rax,%rbx,1),%ecx - [-,%rcx] v538 = sload16_complex.i32 v521+v522 ; bin: heap_oob 0f bf 0c 18 - ; asm: mov (%rax,%rbx,1),%ecx - [-,%rcx] v539 = uload32_complex v521+v522 ; bin: heap_oob 8b 0c 18 - ; asm: movslq (%rax,%rbx,1),%rcx - [-,%rcx] v540 = sload32_complex v521+v522 ; bin: heap_oob 48 63 0c 18 - [-,%r13] v550 = iconst.i64 1 - [-,%r14] v551 = iconst.i64 1 - ; asm: mov 0x0(%r13,%r14,1),%r12d - [-,%r12] v552 = load_complex.i32 v550+v551 ; bin: heap_oob 47 8b 64 35 00 - - ; Store Complex - [-,%rcx] v600 = iconst.i64 1 - [-,%rcx] v601 = iconst.i32 1 - [-,%r10] v602 = iconst.i64 1 - [-,%r11] v603 = iconst.i32 1 - ; asm: mov %rcx,(%rax,%rbx,1) - store_complex v600, v521+v522 ; bin: heap_oob 48 89 0c 18 - ; asm: mov %rcx,0x1(%rax,%rbx,1) - store_complex v600, v521+v522+1 ; bin: heap_oob 48 89 4c 18 01 - ; asm: mov %rcx,0x100000(%rax,%rbx,1) - store_complex v600, v521+v522+0x1000 ; bin: heap_oob 48 89 8c 18 00001000 - ; asm: mov %ecx,(%rax,%rbx,1) - store_complex v601, v521+v522 ; bin: heap_oob 89 0c 18 - ; asm: mov %ecx,0x1(%rax,%rbx,1) - store_complex v601, v521+v522+1 ; bin: heap_oob 89 4c 18 01 - ; asm: mov %ecx,0x100000(%rax,%rbx,1) - store_complex v601, v521+v522+0x1000 ; bin: heap_oob 89 8c 18 00001000 - ; asm: mov %ecx,(%rax,%rbx,1) - istore32_complex v600, v521+v522 ; bin: heap_oob 89 0c 18 - ; asm: mov %cx,(%rax,%rbx,1) - istore16_complex v600, v521+v522 ; bin: heap_oob 66 89 0c 18 - ; asm: mov %cx,(%rax,%rbx,1) - istore16_complex v601, v521+v522 ; bin: heap_oob 66 89 0c 18 - ; asm: mov %r10w,(%rax,%rbx,1) - istore16_complex v602, v521+v522 ; bin: heap_oob 66 44 89 14 18 - ; asm: mov %r11w,(%rax,%rbx,1) - istore16_complex v603, v521+v522 ; bin: heap_oob 66 44 89 1c 18 - ; asm: mov %cl,(%rax,%rbx,1) - istore8_complex v600, v521+v522 ; bin: heap_oob 88 0c 18 - ; asm: mov %cl,(%rax,%rbx,1) - istore8_complex v601, v521+v522 ; bin: heap_oob 88 0c 18 - - ; asm: testq %rcx, %rcx - ; asm: je block1 - brz v1, block1 ; bin: 48 85 c9 74 1b - fallthrough block3 - -block3: - ; asm: testq %rsi, %rsi - ; asm: je block1 - brz v2, block1 ; bin: 48 85 f6 74 16 - fallthrough block4 - -block4: - ; asm: testq %r10, %r10 - ; asm: je block1 - brz v3, block1 ; bin: 4d 85 d2 74 11 - fallthrough block5 - -block5: - ; asm: testq %rcx, %rcx - ; asm: jne block1 - brnz v1, block1 ; bin: 48 85 c9 75 0c - fallthrough block6 - -block6: - ; asm: testq %rsi, %rsi - ; asm: jne block1 - brnz v2, block1 ; bin: 48 85 f6 75 07 - fallthrough block7 - -block7: - ; asm: testq %r10, %r10 - ; asm: jne block1 - brnz v3, block1 ; bin: 4d 85 d2 75 02 - - ; asm: jmp block2 - jump block2 ; bin: eb 01 - - ; asm: block1: -block1: - return ; bin: c3 - - ; asm: block2: -block2: - ; Add a no-op instruction to prevent fold_redundant_jump from removing this block. - ; asm: notq %rcx - [-,%rcx] v5000 = bnot v1 ; bin: 48 f7 d1 - jump block1 ; bin: eb fa -} - -; CPU flag instructions. -function %cpu_flags_I64() { -block0: - [-,%rcx] v1 = iconst.i64 1 - [-,%r10] v2 = iconst.i64 2 - jump block1 - -block1: - ; asm: cmpq %r10, %rcx - [-,%rflags] v10 = ifcmp v1, v2 ; bin: 4c 39 d1 - ; asm: cmpq %rcx, %r10 - [-,%rflags] v11 = ifcmp v2, v1 ; bin: 49 39 ca - - ; asm: je block1 - brif eq v11, block1 ; bin: 74 f8 - jump block2 - -block2: - ; asm: jne block1 - brif ne v11, block1 ; bin: 75 f6 - jump block3 - -block3: - ; asm: jl block1 - brif slt v11, block1 ; bin: 7c f4 - jump block4 - -block4: - ; asm: jge block1 - brif sge v11, block1 ; bin: 7d f2 - jump block5 - -block5: - ; asm: jg block1 - brif sgt v11, block1 ; bin: 7f f0 - jump block6 - -block6: - ; asm: jle block1 - brif sle v11, block1 ; bin: 7e ee - jump block7 - -block7: - ; asm: jb block1 - brif ult v11, block1 ; bin: 72 ec - jump block8 - -block8: - ; asm: jae block1 - brif uge v11, block1 ; bin: 73 ea - jump block9 - -block9: - ; asm: ja block1 - brif ugt v11, block1 ; bin: 77 e8 - jump block10 - -block10: - ; asm: jbe block1 - brif ule v11, block1 ; bin: 76 e6 - jump block11 - -block11: - - ; asm: sete %bl - [-,%rbx] v20 = trueif eq v11 ; bin: 0f 94 c3 - ; asm: setne %bl - [-,%rbx] v21 = trueif ne v11 ; bin: 0f 95 c3 - ; asm: setl %dl - [-,%rdx] v22 = trueif slt v11 ; bin: 0f 9c c2 - ; asm: setge %dl - [-,%rdx] v23 = trueif sge v11 ; bin: 0f 9d c2 - ; asm: setg %r10b - [-,%r10] v24 = trueif sgt v11 ; bin: 41 0f 9f c2 - ; asm: setle %r10b - [-,%r10] v25 = trueif sle v11 ; bin: 41 0f 9e c2 - ; asm: setb %r14b - [-,%r14] v26 = trueif ult v11 ; bin: 41 0f 92 c6 - ; asm: setae %r14b - [-,%r14] v27 = trueif uge v11 ; bin: 41 0f 93 c6 - ; asm: seta %r11b - [-,%r11] v28 = trueif ugt v11 ; bin: 41 0f 97 c3 - ; asm: setbe %r11b - [-,%r11] v29 = trueif ule v11 ; bin: 41 0f 96 c3 - - ; The trapif instructions are encoded as macros: a conditional jump over a ud2. - ; asm: jne .+4; ud2 - trapif eq v11, user0 ; bin: 75 02 user0 0f 0b - ; asm: je .+4; ud2 - trapif ne v11, user0 ; bin: 74 02 user0 0f 0b - ; asm: jnl .+4; ud2 - trapif slt v11, user0 ; bin: 7d 02 user0 0f 0b - ; asm: jnge .+4; ud2 - trapif sge v11, user0 ; bin: 7c 02 user0 0f 0b - ; asm: jng .+4; ud2 - trapif sgt v11, user0 ; bin: 7e 02 user0 0f 0b - ; asm: jnle .+4; ud2 - trapif sle v11, user0 ; bin: 7f 02 user0 0f 0b - ; asm: jnb .+4; ud2 - trapif ult v11, user0 ; bin: 73 02 user0 0f 0b - ; asm: jnae .+4; ud2 - trapif uge v11, user0 ; bin: 72 02 user0 0f 0b - ; asm: jna .+4; ud2 - trapif ugt v11, user0 ; bin: 76 02 user0 0f 0b - ; asm: jnbe .+4; ud2 - trapif ule v11, user0 ; bin: 77 02 user0 0f 0b - ; asm: jo .+4; ud2 - trapif of v11, user0 ; bin: 71 02 user0 0f 0b - ; asm: jno .+4; ud2 - trapif nof v11, user0 ; bin: 70 02 user0 0f 0b - - ; Debug trap. - debugtrap ; bin: cc - - ; Stack check. - ; asm: cmpq %rsp, %rcx - [-,%rflags] v40 = ifcmp_sp v1 ; bin: 48 39 e1 - ; asm: cmpq %rsp, %r10 - [-,%rflags] v41 = ifcmp_sp v2 ; bin: 49 39 e2 - - ; asm: cmpq $-100, %rcx - [-,%rflags] v522 = ifcmp_imm v1, -100 ; bin: 48 83 f9 9c - ; asm: cmpq $100, %r10 - [-,%rflags] v523 = ifcmp_imm v2, 100 ; bin: 49 83 fa 64 - - ; asm: cmpq $-10000, %rcx - [-,%rflags] v524 = ifcmp_imm v1, -10000 ; bin: 48 81 f9 ffffd8f0 - ; asm: cmpq $10000, %r10 - [-,%rflags] v525 = ifcmp_imm v2, 10000 ; bin: 49 81 fa 00002710 - - - return -} - -; Test for the encoding of outgoing_arg stack slots. -function %outargs() { - ss0 = incoming_arg 16, offset -16 - ss1 = outgoing_arg 8, offset 8 - ss2 = outgoing_arg 8, offset 0 - -block0: - [-,%rcx] v1 = iconst.i64 1 - - ; asm: movq %rcx, 8(%rsp) - [-,ss1] v10 = spill v1 ; bin: stk_ovf 48 89 8c 24 00000008 - ; asm: movq %rcx, (%rsp) - [-,ss2] v11 = spill v1 ; bin: stk_ovf 48 89 8c 24 00000000 - - return -} - -; Tests for i32 instructions in 64-bit mode. -; -; Note that many i32 instructions can be encoded both with and without a REX -; prefix if they only use the low 8 registers. Here, we are testing the REX -; encodings which are chosen by default. Switching to non-REX encodings should -; be done by an instruction shrinking pass. -function %I32() { - sig0 = () - fn0 = %foo() - - ss0 = incoming_arg 8, offset 0 - ss1 = incoming_arg 1024, offset -1024 - ss2 = incoming_arg 1024, offset -2048 - ss3 = incoming_arg 8, offset -2056 - -block0: - - ; Integer Constants. - - ; asm: movl $0x01020304, %ecx - [-,%rcx] v1 = iconst.i32 0x0102_0304 ; bin: b9 01020304 - ; asm: movl $0x11020304, %esi - [-,%rsi] v2 = iconst.i32 0x1102_0304 ; bin: be 11020304 - ; asm: movl $0x21020304, %r10d - [-,%r10] v3 = iconst.i32 0x2102_0304 ; bin: 41 ba 21020304 - ; asm: movl $0xff001122, %r8d - [-,%r8] v4 = iconst.i32 0xff00_1122 ; bin: 41 b8 ff001122 - ; asm: movl $0x88001122, %r14d - [-,%r14] v5 = iconst.i32 0xffff_ffff_8800_1122 ; bin: 41 be 88001122 - - ; Load/Store instructions. - - ; Register indirect addressing with no displacement. - - ; asm: movl (%rcx), %edi - [-,%rdi] v10 = load.i32 v1 ; bin: heap_oob 8b 39 - ; asm: movl (%rsi), %edx - [-,%rdx] v11 = load.i32 v2 ; bin: heap_oob 8b 16 - ; asm: movzwl (%rcx), %edi - [-,%rdi] v12 = uload16.i32 v1 ; bin: heap_oob 0f b7 39 - ; asm: movzwl (%rsi), %edx - [-,%rdx] v13 = uload16.i32 v2 ; bin: heap_oob 0f b7 16 - ; asm: movswl (%rcx), %edi - [-,%rdi] v14 = sload16.i32 v1 ; bin: heap_oob 0f bf 39 - ; asm: movswl (%rsi), %edx - [-,%rdx] v15 = sload16.i32 v2 ; bin: heap_oob 0f bf 16 - ; asm: movzbl (%rcx), %edi - [-,%rdi] v16 = uload8.i32 v1 ; bin: heap_oob 0f b6 39 - ; asm: movzbl (%rsi), %edx - [-,%rdx] v17 = uload8.i32 v2 ; bin: heap_oob 0f b6 16 - ; asm: movsbl (%rcx), %edi - [-,%rdi] v18 = sload8.i32 v1 ; bin: heap_oob 0f be 39 - ; asm: movsbl (%rsi), %edx - [-,%rdx] v19 = sload8.i32 v2 ; bin: heap_oob 0f be 16 - - ; Register-indirect with 8-bit signed displacement. - - ; asm: movl 50(%rcx), %edi - [-,%rdi] v20 = load.i32 v1+50 ; bin: heap_oob 8b 79 32 - ; asm: movl -50(%rsi), %edx - [-,%rdx] v21 = load.i32 v2-50 ; bin: heap_oob 8b 56 ce - ; asm: movzwl 50(%rcx), %edi - [-,%rdi] v22 = uload16.i32 v1+50 ; bin: heap_oob 0f b7 79 32 - ; asm: movzwl -50(%rsi), %edx - [-,%rdx] v23 = uload16.i32 v2-50 ; bin: heap_oob 0f b7 56 ce - ; asm: movswl 50(%rcx), %edi - [-,%rdi] v24 = sload16.i32 v1+50 ; bin: heap_oob 0f bf 79 32 - ; asm: movswl -50(%rsi), %edx - [-,%rdx] v25 = sload16.i32 v2-50 ; bin: heap_oob 0f bf 56 ce - ; asm: movzbl 50(%rcx), %edi - [-,%rdi] v26 = uload8.i32 v1+50 ; bin: heap_oob 0f b6 79 32 - ; asm: movzbl -50(%rsi), %edx - [-,%rdx] v27 = uload8.i32 v2-50 ; bin: heap_oob 0f b6 56 ce - ; asm: movsbl 50(%rcx), %edi - [-,%rdi] v28 = sload8.i32 v1+50 ; bin: heap_oob 0f be 79 32 - ; asm: movsbl -50(%rsi), %edx - [-,%rdx] v29 = sload8.i32 v2-50 ; bin: heap_oob 0f be 56 ce - - ; Register-indirect with 32-bit signed displacement. - - ; asm: movl 50000(%rcx), %edi - [-,%rdi] v30 = load.i32 v1+50000 ; bin: heap_oob 8b b9 0000c350 - ; asm: movl -50000(%rsi), %edx - [-,%rdx] v31 = load.i32 v2-50000 ; bin: heap_oob 8b 96 ffff3cb0 - ; asm: movzwl 50000(%rcx), %edi - [-,%rdi] v32 = uload16.i32 v1+50000 ; bin: heap_oob 0f b7 b9 0000c350 - ; asm: movzwl -50000(%rsi), %edx - [-,%rdx] v33 = uload16.i32 v2-50000 ; bin: heap_oob 0f b7 96 ffff3cb0 - ; asm: movswl 50000(%rcx), %edi - [-,%rdi] v34 = sload16.i32 v1+50000 ; bin: heap_oob 0f bf b9 0000c350 - ; asm: movswl -50000(%rsi), %edx - [-,%rdx] v35 = sload16.i32 v2-50000 ; bin: heap_oob 0f bf 96 ffff3cb0 - ; asm: movzbl 50000(%rcx), %edi - [-,%rdi] v36 = uload8.i32 v1+50000 ; bin: heap_oob 0f b6 b9 0000c350 - ; asm: movzbl -50000(%rsi), %edx - [-,%rdx] v37 = uload8.i32 v2-50000 ; bin: heap_oob 0f b6 96 ffff3cb0 - ; asm: movsbl 50000(%rcx), %edi - [-,%rdi] v38 = sload8.i32 v1+50000 ; bin: heap_oob 0f be b9 0000c350 - ; asm: movsbl -50000(%rsi), %edx - [-,%rdx] v39 = sload8.i32 v2-50000 ; bin: heap_oob 0f be 96 ffff3cb0 - - ; Integer Register Operations. - - ; asm: notl %ecx - [-,%rcx] v4000 = bnot v1 ; bin: f7 d1 - ; asm: notl %esi - [-,%rsi] v4001 = bnot v2 ; bin: f7 d6 - ; asm: notl %r10d - [-,%r10] v4002 = bnot v3 ; bin: 41 f7 d2 - - ; Integer Register-Register Operations. - - ; asm: addl %esi, %ecx - [-,%rcx] v40 = iadd v1, v2 ; bin: 01 f1 - ; asm: addl %r10d, %esi - [-,%rsi] v41 = iadd v2, v3 ; bin: 44 01 d6 - ; asm: addl %ecx, %r10d - [-,%r10] v42 = iadd v3, v1 ; bin: 41 01 ca - - ; asm: subl %esi, %ecx - [-,%rcx] v50 = isub v1, v2 ; bin: 29 f1 - ; asm: subl %r10d, %esi - [-,%rsi] v51 = isub v2, v3 ; bin: 44 29 d6 - ; asm: subl %ecx, %r10d - [-,%r10] v52 = isub v3, v1 ; bin: 41 29 ca - - ; asm: andl %esi, %ecx - [-,%rcx] v60 = band v1, v2 ; bin: 21 f1 - ; asm: andl %r10d, %esi - [-,%rsi] v61 = band v2, v3 ; bin: 44 21 d6 - ; asm: andl %ecx, %r10d - [-,%r10] v62 = band v3, v1 ; bin: 41 21 ca - - ; asm: orl %esi, %ecx - [-,%rcx] v70 = bor v1, v2 ; bin: 09 f1 - ; asm: orl %r10d, %esi - [-,%rsi] v71 = bor v2, v3 ; bin: 44 09 d6 - ; asm: orl %ecx, %r10d - [-,%r10] v72 = bor v3, v1 ; bin: 41 09 ca - - ; asm: xorl %esi, %ecx - [-,%rcx] v80 = bxor v1, v2 ; bin: 31 f1 - ; asm: xorl %r10d, %esi - [-,%rsi] v81 = bxor v2, v3 ; bin: 44 31 d6 - ; asm: xorl %ecx, %r10d - [-,%r10] v82 = bxor v3, v1 ; bin: 41 31 ca - - ; asm: shll %cl, %esi - [-,%rsi] v90 = ishl v2, v1 ; bin: d3 e6 - ; asm: shll %cl, %r10d - [-,%r10] v91 = ishl v3, v1 ; bin: 41 d3 e2 - ; asm: sarl %cl, %esi - [-,%rsi] v92 = sshr v2, v1 ; bin: d3 fe - ; asm: sarl %cl, %r10d - [-,%r10] v93 = sshr v3, v1 ; bin: 41 d3 fa - ; asm: shrl %cl, %esi - [-,%rsi] v94 = ushr v2, v1 ; bin: d3 ee - ; asm: shrl %cl, %r10d - [-,%r10] v95 = ushr v3, v1 ; bin: 41 d3 ea - - ; asm: roll %cl, %esi - [-,%rsi] v96 = rotl v2, v1 ; bin: d3 c6 - ; asm: roll %cl, %r10d - [-,%r10] v97 = rotl v3, v1 ; bin: 41 d3 c2 - ; asm: rorl %cl, %esi - [-,%rsi] v98 = rotr v2, v1 ; bin: d3 ce - ; asm: rorl %cl, %r10d - [-,%r10] v99 = rotr v3, v1 ; bin: 41 d3 ca - - ; Integer Register-Immediate Operations. - ; These 64-bit ops all use a 32-bit immediate that is sign-extended to 64 bits. - ; Some take 8-bit immediates that are sign-extended to 64 bits. - - ; asm: addl $-100000, %ecx - [-,%rcx] v100 = iadd_imm v1, -100000 ; bin: 81 c1 fffe7960 - ; asm: addl $100000, %esi - [-,%rsi] v101 = iadd_imm v2, 100000 ; bin: 81 c6 000186a0 - ; asm: addl $0x7fffffff, %r10d - [-,%r10] v102 = iadd_imm v3, 0x7fff_ffff ; bin: 41 81 c2 7fffffff - ; asm: addl $100, %r8d - [-,%r8] v103 = iadd_imm v4, 100 ; bin: 41 83 c0 64 - ; asm: addl $-100, %r14d - [-,%r14] v104 = iadd_imm v5, -100 ; bin: 41 83 c6 9c - - ; asm: andl $-100000, %ecx - [-,%rcx] v110 = band_imm v1, -100000 ; bin: 81 e1 fffe7960 - ; asm: andl $100000, %esi - [-,%rsi] v111 = band_imm v2, 100000 ; bin: 81 e6 000186a0 - ; asm: andl $0x7fffffff, %r10d - [-,%r10] v112 = band_imm v3, 0x7fff_ffff ; bin: 41 81 e2 7fffffff - ; asm: andl $100, %r8d - [-,%r8] v113 = band_imm v4, 100 ; bin: 41 83 e0 64 - ; asm: andl $-100, %r14d - [-,%r14] v114 = band_imm v5, -100 ; bin: 41 83 e6 9c - - ; asm: orl $-100000, %ecx - [-,%rcx] v120 = bor_imm v1, -100000 ; bin: 81 c9 fffe7960 - ; asm: orl $100000, %esi - [-,%rsi] v121 = bor_imm v2, 100000 ; bin: 81 ce 000186a0 - ; asm: orl $0x7fffffff, %r10d - [-,%r10] v122 = bor_imm v3, 0x7fff_ffff ; bin: 41 81 ca 7fffffff - ; asm: orl $100, %r8d - [-,%r8] v123 = bor_imm v4, 100 ; bin: 41 83 c8 64 - ; asm: orl $-100, %r14d - [-,%r14] v124 = bor_imm v5, -100 ; bin: 41 83 ce 9c - ; asm: ret - - ; asm: xorl $-100000, %ecx - [-,%rcx] v130 = bxor_imm v1, -100000 ; bin: 81 f1 fffe7960 - ; asm: xorl $100000, %esi - [-,%rsi] v131 = bxor_imm v2, 100000 ; bin: 81 f6 000186a0 - ; asm: xorl $0x7fffffff, %r10d - [-,%r10] v132 = bxor_imm v3, 0x7fff_ffff ; bin: 41 81 f2 7fffffff - ; asm: xorl $100, %r8d - [-,%r8] v133 = bxor_imm v4, 100 ; bin: 41 83 f0 64 - ; asm: xorl $-100, %r14d - [-,%r14] v134 = bxor_imm v5, -100 ; bin: 41 83 f6 9c - - ; Register copies. - - ; asm: movl %esi, %ecx - [-,%rcx] v140 = copy v2 ; bin: 89 f1 - ; asm: movl %r10d, %esi - [-,%rsi] v141 = copy v3 ; bin: 44 89 d6 - ; asm: movl %ecx, %r10d - [-,%r10] v142 = copy v1 ; bin: 41 89 ca - - ; More arithmetic. - - ; asm: imull %esi, %ecx - [-,%rcx] v150 = imul v1, v2 ; bin: 0f af ce - ; asm: imull %r10d, %esi - [-,%rsi] v151 = imul v2, v3 ; bin: 41 0f af f2 - ; asm: imull %ecx, %r10d - [-,%r10] v152 = imul v3, v1 ; bin: 44 0f af d1 - - [-,%rax] v160 = iconst.i32 1 - [-,%rdx] v161 = iconst.i32 2 - ; asm: idivl %ecx - [-,%rax,%rdx] v162, v163 = x86_sdivmodx v160, v161, v1 ; bin: int_divz f7 f9 - ; asm: idivl %esi - [-,%rax,%rdx] v164, v165 = x86_sdivmodx v160, v161, v2 ; bin: int_divz f7 fe - ; asm: idivl %r10d - [-,%rax,%rdx] v166, v167 = x86_sdivmodx v160, v161, v3 ; bin: int_divz 41 f7 fa - ; asm: divl %ecx - [-,%rax,%rdx] v168, v169 = x86_udivmodx v160, v161, v1 ; bin: int_divz f7 f1 - ; asm: divl %esi - [-,%rax,%rdx] v170, v171 = x86_udivmodx v160, v161, v2 ; bin: int_divz f7 f6 - ; asm: divl %r10d - [-,%rax,%rdx] v172, v173 = x86_udivmodx v160, v161, v3 ; bin: int_divz 41 f7 f2 - - ; Bit-counting instructions. - - ; asm: popcntl %esi, %ecx - [-,%rcx] v200 = popcnt v2 ; bin: f3 0f b8 ce - ; asm: popcntl %r10d, %esi - [-,%rsi] v201 = popcnt v3 ; bin: f3 41 0f b8 f2 - ; asm: popcntl %ecx, %r10d - [-,%r10] v202 = popcnt v1 ; bin: f3 44 0f b8 d1 - - ; asm: lzcntl %esi, %ecx - [-,%rcx] v203 = clz v2 ; bin: f3 0f bd ce - ; asm: lzcntl %r10d, %esi - [-,%rsi] v204 = clz v3 ; bin: f3 41 0f bd f2 - ; asm: lzcntl %ecx, %r10d - [-,%r10] v205 = clz v1 ; bin: f3 44 0f bd d1 - - ; asm: tzcntl %esi, %ecx - [-,%rcx] v206 = ctz v2 ; bin: f3 0f bc ce - ; asm: tzcntl %r10d, %esi - [-,%rsi] v207 = ctz v3 ; bin: f3 41 0f bc f2 - ; asm: tzcntl %ecx, %r10d - [-,%r10] v208 = ctz v1 ; bin: f3 44 0f bc d1 - - ; Integer comparisons. - - ; asm: cmpl %esi, %ecx - ; asm: sete %bl - [-,%rbx] v300 = icmp eq v1, v2 ; bin: 39 f1 0f 94 c3 - ; asm: cmpl %r10d, %esi - ; asm: sete %dl - [-,%rdx] v301 = icmp eq v2, v3 ; bin: 44 39 d6 0f 94 c2 - - ; asm: cmpl %esi, %ecx - ; asm: setne %bl - [-,%rbx] v302 = icmp ne v1, v2 ; bin: 39 f1 0f 95 c3 - ; asm: cmpl %r10d, %esi - ; asm: setne %dl - [-,%rdx] v303 = icmp ne v2, v3 ; bin: 44 39 d6 0f 95 c2 - - ; asm: cmpl %esi, %ecx - ; asm: setl %bl - [-,%rbx] v304 = icmp slt v1, v2 ; bin: 39 f1 0f 9c c3 - ; asm: cmpl %r10d, %esi - ; asm: setl %dl - [-,%rdx] v305 = icmp slt v2, v3 ; bin: 44 39 d6 0f 9c c2 - - ; asm: cmpl %esi, %ecx - ; asm: setge %bl - [-,%rbx] v306 = icmp sge v1, v2 ; bin: 39 f1 0f 9d c3 - ; asm: cmpl %r10d, %esi - ; asm: setge %dl - [-,%rdx] v307 = icmp sge v2, v3 ; bin: 44 39 d6 0f 9d c2 - - ; asm: cmpl %esi, %ecx - ; asm: setg %bl - [-,%rbx] v308 = icmp sgt v1, v2 ; bin: 39 f1 0f 9f c3 - ; asm: cmpl %r10d, %esi - ; asm: setg %dl - [-,%rdx] v309 = icmp sgt v2, v3 ; bin: 44 39 d6 0f 9f c2 - - ; asm: cmpl %esi, %ecx - ; asm: setle %bl - [-,%rbx] v310 = icmp sle v1, v2 ; bin: 39 f1 0f 9e c3 - ; asm: cmpl %r10d, %esi - ; asm: setle %dl - [-,%rdx] v311 = icmp sle v2, v3 ; bin: 44 39 d6 0f 9e c2 - - ; asm: cmpl %esi, %ecx - ; asm: setb %bl - [-,%rbx] v312 = icmp ult v1, v2 ; bin: 39 f1 0f 92 c3 - ; asm: cmpl %r10d, %esi - ; asm: setb %dl - [-,%rdx] v313 = icmp ult v2, v3 ; bin: 44 39 d6 0f 92 c2 - - ; asm: cmpl %esi, %ecx - ; asm: setae %bl - [-,%rbx] v314 = icmp uge v1, v2 ; bin: 39 f1 0f 93 c3 - ; asm: cmpl %r10d, %esi - ; asm: setae %dl - [-,%rdx] v315 = icmp uge v2, v3 ; bin: 44 39 d6 0f 93 c2 - - ; asm: cmpl %esi, %ecx - ; asm: seta %bl - [-,%rbx] v316 = icmp ugt v1, v2 ; bin: 39 f1 0f 97 c3 - ; asm: cmpl %r10d, %esi - ; asm: seta %dl - [-,%rdx] v317 = icmp ugt v2, v3 ; bin: 44 39 d6 0f 97 c2 - - ; asm: cmpl %esi, %ecx - ; asm: setbe %bl - [-,%rbx] v318 = icmp ule v1, v2 ; bin: 39 f1 0f 96 c3 - ; asm: cmpl %r10d, %esi - ; asm: setbe %dl - [-,%rdx] v319 = icmp ule v2, v3 ; bin: 44 39 d6 0f 96 c2 - - ; asm: cmpl $37, %ecx - ; asm: setl %bl - [-,%rbx] v320 = icmp_imm slt v1, 37 ; bin: 83 f9 25 0f 9c c3 - - ; asm: cmpl $100000, %ecx - ; asm: setl %bl - [-,%rbx] v321 = icmp_imm slt v1, 100000 ; bin: 81 f9 000186a0 0f 9c c3 - - ; Bool-to-int conversions. - - ; asm: movzbl %bl, %ecx - [-,%rcx] v350 = bint.i32 v300 ; bin: 0f b6 cb - ; asm: movzbl %dl, %esi - [-,%rsi] v351 = bint.i32 v301 ; bin: 0f b6 f2 - - ; Spill / Fill. - - ; asm: movl %ecx, 1032(%rsp) - [-,ss1] v500 = spill v1 ; bin: stk_ovf 89 8c 24 00000408 - ; asm: movl %esi, 1032(%rsp) - [-,ss1] v501 = spill v2 ; bin: stk_ovf 89 b4 24 00000408 - ; asm: movl %r10d, 1032(%rsp) - [-,ss1] v502 = spill v3 ; bin: stk_ovf 44 89 94 24 00000408 - - ; asm: movl 1032(%rsp), %ecx - [-,%rcx] v510 = fill v500 ; bin: 8b 8c 24 00000408 - ; asm: movl 1032(%rsp), %esi - [-,%rsi] v511 = fill v501 ; bin: 8b b4 24 00000408 - ; asm: movl 1032(%rsp), %r10d - [-,%r10] v512 = fill v502 ; bin: 44 8b 94 24 00000408 - - ; asm: movl %ecx, 1032(%rsp) - regspill v1, %rcx -> ss1 ; bin: stk_ovf 89 8c 24 00000408 - ; asm: movl 1032(%rsp), %ecx - regfill v1, ss1 -> %rcx ; bin: 8b 8c 24 00000408 - - ; asm: cmpl %esi, %ecx - [-,%rflags] v520 = ifcmp v1, v2 ; bin: 39 f1 - ; asm: cmpl %r10d, %esi - [-,%rflags] v521 = ifcmp v2, v3 ; bin: 44 39 d6 - - ; asm: cmpl $-100, %ecx - [-,%rflags] v522 = ifcmp_imm v1, -100 ; bin: 83 f9 9c - ; asm: cmpl $100, %r10d - [-,%rflags] v523 = ifcmp_imm v3, 100 ; bin: 41 83 fa 64 - - ; asm: cmpl $-10000, %ecx - [-,%rflags] v524 = ifcmp_imm v1, -10000 ; bin: 81 f9 ffffd8f0 - ; asm: cmpl $10000, %r10d - [-,%rflags] v525 = ifcmp_imm v3, 10000 ; bin: 41 81 fa 00002710 - - ; asm: shll $2, %esi - [-,%rsi] v526 = ishl_imm v2, 2 ; bin: c1 e6 02 - ; asm: shll $12, %r10d - [-,%r10] v527 = ishl_imm v3, 12 ; bin: 41 c1 e2 0c - ; asm: sarl $5, %esi - [-,%rsi] v529 = sshr_imm v2, 5 ; bin: c1 fe 05 - ; asm: sarl $32, %r10d - [-,%r10] v530 = sshr_imm v3, 32 ; bin: 41 c1 fa 20 - ; asm: shrl $8, %esi - [-,%rsi] v532 = ushr_imm v2, 8 ; bin: c1 ee 08 - ; asm: shrl $31, %r10d - [-,%r10] v533 = ushr_imm v3, 31 ; bin: 41 c1 ea 1f - - ; asm: testl %ecx, %ecx - ; asm: je block1x - brz v1, block1 ; bin: 85 c9 74 18 - fallthrough block3 - -block3: - ; asm: testl %esi, %esi - ; asm: je block1x - brz v2, block1 ; bin: 85 f6 74 14 - fallthrough block4 - -block4: - ; asm: testl %r10d, %r10d - ; asm: je block1x - brz v3, block1 ; bin: 45 85 d2 74 0f - fallthrough block5 - -block5: - ; asm: testl %ecx, %ecx - ; asm: jne block1x - brnz v1, block1 ; bin: 85 c9 75 0b - fallthrough block6 - -block6: - ; asm: testl %esi, %esi - ; asm: jne block1x - brnz v2, block1 ; bin: 85 f6 75 07 - fallthrough block7 - -block7: - ; asm: testl %r10d, %r10d - ; asm: jne block1x - brnz v3, block1 ; bin: 45 85 d2 75 02 - - ; asm: jmp block2x - jump block2 ; bin: eb 01 - - ; asm: block1x: -block1: - return ; bin: c3 - - ; asm: block2x: -block2: - ; Add a no-op instruction to prevent fold_redundant_jump from removing this block. - ; asm: notl %ecx - [-,%rcx] v5000 = bnot v1 ; bin: f7 d1 - jump block1 ; bin: eb fb - -} - -; Tests for i32/i8 conversion instructions. -function %I32_I8() { -block0: - [-,%rcx] v1 = iconst.i32 1 - [-,%rsi] v2 = iconst.i32 2 - [-,%r10] v3 = iconst.i32 3 - - [-,%rcx] v11 = ireduce.i8 v1 ; bin: - [-,%rsi] v12 = ireduce.i8 v2 ; bin: - [-,%r10] v13 = ireduce.i8 v3 ; bin: - - ; asm: movsbl %cl, %esi - [-,%rsi] v20 = sextend.i32 v11 ; bin: 0f be f1 - ; asm: movsbl %sil, %r10d - [-,%r10] v21 = sextend.i32 v12 ; bin: 44 0f be d6 - ; asm: movsbl %r10b, %ecx - [-,%rcx] v22 = sextend.i32 v13 ; bin: 41 0f be ca - - ; asm: movzbl %cl, %esi - [-,%rsi] v30 = uextend.i32 v11 ; bin: 0f b6 f1 - ; asm: movzbl %sil, %r10d - [-,%r10] v31 = uextend.i32 v12 ; bin: 44 0f b6 d6 - ; asm: movzbl %r10b, %ecx - [-,%rcx] v32 = uextend.i32 v13 ; bin: 41 0f b6 ca - - trap user0 ; bin: user0 0f 0b -} - -; Tests for i32/i16 conversion instructions. -function %I32_I16() { -block0: - [-,%rcx] v1 = iconst.i32 1 - [-,%rsi] v2 = iconst.i32 2 - [-,%r10] v3 = iconst.i32 3 - - [-,%rcx] v11 = ireduce.i16 v1 ; bin: - [-,%rsi] v12 = ireduce.i16 v2 ; bin: - [-,%r10] v13 = ireduce.i16 v3 ; bin: - - ; asm: movswl %cx, %esi - [-,%rsi] v20 = sextend.i32 v11 ; bin: 0f bf f1 - ; asm: movswl %si, %r10d - [-,%r10] v21 = sextend.i32 v12 ; bin: 44 0f bf d6 - ; asm: movswl %r10w, %ecx - [-,%rcx] v22 = sextend.i32 v13 ; bin: 41 0f bf ca - - ; asm: movzwl %cx, %esi - [-,%rsi] v30 = uextend.i32 v11 ; bin: 0f b7 f1 - ; asm: movzwl %si, %r10d - [-,%r10] v31 = uextend.i32 v12 ; bin: 44 0f b7 d6 - ; asm: movzwl %r10w, %ecx - [-,%rcx] v32 = uextend.i32 v13 ; bin: 41 0f b7 ca - - trap user0 ; bin: user0 0f 0b -} - -; Tests for i64/i8 conversion instructions. -function %I64_I8() { -block0: - [-,%rcx] v1 = iconst.i64 1 - [-,%rsi] v2 = iconst.i64 2 - [-,%r10] v3 = iconst.i64 3 - - [-,%rcx] v11 = ireduce.i8 v1 ; bin: - [-,%rsi] v12 = ireduce.i8 v2 ; bin: - [-,%r10] v13 = ireduce.i8 v3 ; bin: - - ; asm: movsbq %cl, %rsi - [-,%rsi] v20 = sextend.i64 v11 ; bin: 48 0f be f1 - ; asm: movsbq %sil, %r10 - [-,%r10] v21 = sextend.i64 v12 ; bin: 4c 0f be d6 - ; asm: movsbq %r10b, %rcx - [-,%rcx] v22 = sextend.i64 v13 ; bin: 49 0f be ca - - ; asm: movzbl %cl, %esi - [-,%rsi] v30 = uextend.i64 v11 ; bin: 0f b6 f1 - ; asm: movzbl %sil, %r10d - [-,%r10] v31 = uextend.i64 v12 ; bin: 44 0f b6 d6 - ; asm: movzbl %r10b, %ecx - [-,%rcx] v32 = uextend.i64 v13 ; bin: 41 0f b6 ca - - trap user0 ; bin: user0 0f 0b -} - -; Tests for i64/i16 conversion instructions. -function %I64_I16() { -block0: - [-,%rcx] v1 = iconst.i64 1 - [-,%rsi] v2 = iconst.i64 2 - [-,%r10] v3 = iconst.i64 3 - - [-,%rcx] v11 = ireduce.i16 v1 ; bin: - [-,%rsi] v12 = ireduce.i16 v2 ; bin: - [-,%r10] v13 = ireduce.i16 v3 ; bin: - - ; asm: movswq %cx, %rsi - [-,%rsi] v20 = sextend.i64 v11 ; bin: 48 0f bf f1 - ; asm: movswq %si, %r10 - [-,%r10] v21 = sextend.i64 v12 ; bin: 4c 0f bf d6 - ; asm: movswq %r10w, %rcx - [-,%rcx] v22 = sextend.i64 v13 ; bin: 49 0f bf ca - - ; asm: movzwl %cx, %esi - [-,%rsi] v30 = uextend.i64 v11 ; bin: 0f b7 f1 - ; asm: movzwl %si, %r10d - [-,%r10] v31 = uextend.i64 v12 ; bin: 44 0f b7 d6 - ; asm: movzwl %r10w, %ecx - [-,%rcx] v32 = uextend.i64 v13 ; bin: 41 0f b7 ca - - trap user0 ; bin: user0 0f 0b -} - -; Tests for i64/i32 conversion instructions. -function %I64_I32() { -block0: - [-,%rcx] v1 = iconst.i64 1 - [-,%rsi] v2 = iconst.i64 2 - [-,%r10] v3 = iconst.i64 3 - - [-,%rcx] v11 = ireduce.i32 v1 ; bin: - [-,%rsi] v12 = ireduce.i32 v2 ; bin: - [-,%r10] v13 = ireduce.i32 v3 ; bin: - - ; asm: movslq %ecx, %rsi - [-,%rsi] v20 = sextend.i64 v11 ; bin: 48 63 f1 - ; asm: movslq %esi, %r10 - [-,%r10] v21 = sextend.i64 v12 ; bin: 4c 63 d6 - ; asm: movslq %r10d, %rcx - [-,%rcx] v22 = sextend.i64 v13 ; bin: 49 63 ca - - ; asm: movl %ecx, %esi - [-,%rsi] v30 = uextend.i64 v11 ; bin: 89 ce - ; asm: movl %esi, %r10d - [-,%r10] v31 = uextend.i64 v12 ; bin: 41 89 f2 - ; asm: movl %r10d, %ecx - [-,%rcx] v32 = uextend.i64 v13 ; bin: 44 89 d1 - - trap user0 ; bin: user0 0f 0b -} - -; Tests for i64 jump table instructions. -function %I64_JT(i64 [%rdi]) { - jt0 = jump_table [block1, block2, block3] - -block0(v0: i64 [%rdi]): - ; Note: The next two lines will need to change whenever instructions are - ; added or removed from this test. - [-, %rax] v1 = jump_table_base.i64 jt0 ; bin: 48 8d 05 00000039 PCRelRodata4(jt0) - [-, %r10] v2 = jump_table_base.i64 jt0 ; bin: 4c 8d 15 00000032 PCRelRodata4(jt0) - - [-, %rbx] v10 = iconst.i64 1 - [-, %r13] v11 = iconst.i64 2 - - [-, %rax] v20 = jump_table_entry.i64 v10, v1, 4, jt0 ; bin: 48 63 04 98 - [-, %rax] v21 = jump_table_entry.i64 v10, v2, 4, jt0 ; bin: 49 63 04 9a - [-, %rax] v22 = jump_table_entry.i64 v11, v1, 4, jt0 ; bin: 4a 63 04 a8 - [-, %rax] v23 = jump_table_entry.i64 v11, v2, 4, jt0 ; bin: 4b 63 04 aa - - [-, %r10] v30 = jump_table_entry.i64 v10, v1, 4, jt0 ; bin: 4c 63 14 98 - [-, %r10] v31 = jump_table_entry.i64 v10, v2, 4, jt0 ; bin: 4d 63 14 9a - [-, %r10] v32 = jump_table_entry.i64 v11, v1, 4, jt0 ; bin: 4e 63 14 a8 - [-, %r10] v33 = jump_table_entry.i64 v11, v2, 4, jt0 ; bin: 4f 63 14 aa - - fallthrough block10 - -block10: - indirect_jump_table_br v10, jt0 ; bin: ff e3 -block11: - indirect_jump_table_br v11, jt0 ; bin: 41 ff e5 - -block1: - fallthrough block2 -block2: - fallthrough block3 -block3: - trap user0 -} - -function %r12_r13_loads() { -block0: - [-,%r12] v1 = iconst.i64 0x0123_4567_89ab_cdef - [-,%r13] v2 = iconst.i64 0xfedc_ba98_7654_3210 - [-,%rax] v3 = iconst.i64 0x1 - - ;; Simple GPR load. - ; asm: movq (%r12), %rdx - [-,%rdx] v4 = load.i64 notrap v1 ; bin: 49 8b 14 24 - ; asm: movq (%r13), %rdx - [-,%rdx] v5 = load.i64 notrap v2 ; bin: 49 8b 55 00 - - ;; Load with disp8. - ; asm: movq 0x1(%r12), %rdx - [-,%rdx] v6 = load.i64 notrap v1+1 ; bin: 49 8b 54 24 01 - ; asm: movq 0x1(%r13), %rdx - [-,%rdx] v7 = load.i64 notrap v2+1 ; bin: 49 8b 55 01 - - ;; Load with disp32. - ; asm: movq 0x100(%r12), %rdx - [-,%rdx] v8 = load.i64 notrap v1+256 ; bin: 49 8b 94 24 00000100 - ; asm: movq 0x100(%r13), %rdx - [-,%rdx] v9 = load.i64 notrap v2+256 ; bin: 49 8b 95 00000100 - - ;; Load for base+index. - ; asm: movq (%r12, %rax, 1), %rdx - [-,%rdx] v10 = load_complex.i64 notrap v1+v3 ; bin: 49 8b 14 04 - ; asm: movq (%r13, %rax, 1), %rdx - [-,%rdx] v11 = load_complex.i64 notrap v2+v3 ; bin: 49 8b 54 05 00 - - ;; Now for FP values. - ; asm: movss (%r12), %xmm0 - [-,%xmm0] v12 = load.f32 notrap v1 ; bin: f3 41 0f 10 04 24 - ; asm: movss (%r13), %xmm0 - [-,%xmm0] v13 = load.f32 notrap v2 ; bin: f3 41 0f 10 45 00 - - ;; Load with disp8. - ; asm: movss 0x1(%r12), %xmm0 - [-,%xmm0] v14 = load.f32 notrap v1+1 ; bin: f3 41 0f 10 44 24 01 - ; asm: movss 0x1(%r13), %xmm0 - [-,%xmm0] v15 = load.f32 notrap v2+1 ; bin: f3 41 0f 10 45 01 - - ;; Load with disp32. - ; asm: movss 0x100(%r12), %xmm0 - [-,%xmm0] v16 = load.f32 notrap v1+256 ; bin: f3 41 0f 10 84 24 00000100 - ; asm: movss 0x100(%r13), %xmm0 - [-,%xmm0] v17 = load.f32 notrap v2+256 ; bin: f3 41 0f 10 85 00000100 - - ;; Load for base+index. - ; asm: movss (%r12, %rax, 1), %xmm0 - [-,%xmm0] v18 = load_complex.f32 notrap v1+v3 ; bin: f3 41 0f 10 04 04 - ; asm: movss (%r13, %rax, 1), %xmm0 - [-,%xmm0] v19 = load_complex.f32 notrap v2+v3 ; bin: f3 41 0f 10 44 05 00 - - return -} - -function %r12_r13_stores() { -block0: - [-,%r12] v1 = iconst.i64 0x0123_4567_89ab_cdef - [-,%r13] v2 = iconst.i64 0xfedc_ba98_7654_3210 - [-,%rax] v3 = iconst.i64 0x1 - [-,%xmm0] v4 = f32const 0x1.0 - - ;; Simple GPR load. - ; asm: movq %rax, (%r12) - store notrap v3, v1; bin: 49 89 04 24 - ; asm: movq (%r13), %rdx - store notrap v3, v2; bin: 49 89 45 00 - - ; asm: movq %rax, 0x1(%r12) - store notrap v3, v1+1; bin: 49 89 44 24 01 - ; asm: movq %rax, 0x1(%r13) - store notrap v3, v2+1; bin: 49 89 45 01 - - ; asm: movq %rax, 0x100(%r12) - store notrap v3, v1+256; bin: 49 89 84 24 00000100 - ; asm: movq %rax, 0x100(%r13) - store notrap v3, v2+256; bin: 49 89 85 00000100 - - ; asm: movq %rax, (%r12, %rax, 1) - store_complex notrap v3, v1+v3; bin: 49 89 04 04 - ; asm: movq %rax, (%r13, %rax, 1) - store_complex notrap v3, v2+v3; bin: 49 89 44 05 00 - - ; asm: movb %al, (%r12) - istore8 notrap v3, v1; bin: 41 88 04 24 - ; asm: movb %al, (%r13) - istore8 notrap v3, v2; bin: 41 88 45 00 - - ; asm: movb %al, 0x1(%r12) - istore8 notrap v3, v1+1; bin: 41 88 44 24 01 - ; asm: movb %al, 0x1(%r13) - istore8 notrap v3, v2+1; bin: 41 88 45 01 - - ; asm: movb %al, 0x100(%r12) - istore8 notrap v3, v1+256; bin: 41 88 84 24 00000100 - ; asm: movb %al, 0x100(%r13) - istore8 notrap v3, v2+256; bin: 41 88 85 00000100 - - ; asm: movb %al, (%r12, %rax, 1) - istore8_complex notrap v3, v1+v3; bin: 41 88 04 04 - ; asm: movb %al, (%r13, %rax, 1) - istore8_complex notrap v3, v2+v3; bin: 41 88 44 05 00 - - ; asm: movss %xmm0, (%r12) - store notrap v4, v1; bin: f3 41 0f 11 04 24 - ; asm: movss %xmm0, (%r13) - store notrap v4, v2; bin: f3 41 0f 11 45 00 - - ; asm: movss %xmm0, 0x1(%r12) - store notrap v4, v1+1; bin: f3 41 0f 11 44 24 01 - ; asm: movss %xmm0, 0x1(%r13) - store notrap v4, v2+1; bin: f3 41 0f 11 45 01 - - ; asm: movss %xmm0, 0x100(%r12) - store notrap v4, v1+256; bin: f3 41 0f 11 84 24 00000100 - ; asm: movss %xmm0, 0x100(%r13) - store notrap v4, v2+256; bin: f3 41 0f 11 85 00000100 - - ; asm: movss %xmm0, (%r12, %rax, 1) - store_complex notrap v4, v1+v3; bin: f3 41 0f 11 04 04 - ; asm: movss %xmm0, (%r13, %rax, 1) - store_complex notrap v4, v2+v3; bin: f3 41 0f 11 44 05 00 - - return -} - -function %B64() { -block0: - [-, %rax] v1 = bconst.b64 true ; bin: 40 b8 00000001 - [-, %r10] v0 = bconst.b64 true ; bin: 41 ba 00000001 - return -} - -function %V128() { -block0: - [-,%r10] v3 = iconst.i64 0x2102_0304_f1f2_f3f4 ; bin: 49 ba 21020304f1f2f3f4 - [-, %xmm9] v4 = vconst.i32x4 [0 1 2 3] ; bin: 44 0f 10 0d 0000000f PCRelRodata4(33) - store v4, v3 ; bin: heap_oob 45 0f 11 0a - - [-, %r11] v5 = iconst.i64 0x1234 - [-, %xmm2] v6 = load.i32x4 v5 ; bin: heap_oob 41 0f 10 13 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/br-i128.clif b/cranelift/filetests/filetests/isa/x86/br-i128.clif deleted file mode 100644 index fccc691aa3..0000000000 --- a/cranelift/filetests/filetests/isa/x86/br-i128.clif +++ /dev/null @@ -1,42 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0(i128) -> i8 fast { -block0(v0: i128): - brz v0, block2 - ; check: v0 = iconcat v3, v4 - ; nextln: v5 = icmp_imm eq v3, 0 - ; nextln: v6 = icmp_imm eq v4, 0 - ; nextln: v7 = band v5, v6 - ; nextln: brnz v7, block2 - jump block1 - -block1: - v1 = iconst.i8 0 - return v1 - -block2: - v2 = iconst.i8 1 - return v2 -} - -function u0:1(i128) -> i8 fast { -block0(v0: i128): - brnz v0, block2 - ; check: v0 = iconcat v3, v4 - ; nextln: brnz v3, block2 - ; nextln: fallthrough block3 - - ; check: block3: - ; nextln: brnz.i64 v4, block2 - jump block1 - ; nextln: fallthrough block1 - -block1: - v1 = iconst.i8 0 - return v1 - -block2: - v2 = iconst.i8 1 - return v2 -} diff --git a/cranelift/filetests/filetests/isa/x86/brz-i8.clif b/cranelift/filetests/filetests/isa/x86/brz-i8.clif deleted file mode 100644 index fda005bc81..0000000000 --- a/cranelift/filetests/filetests/isa/x86/brz-i8.clif +++ /dev/null @@ -1,38 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0() -> b1 { -block0: - v0 = iconst.i8 0 - ; check: v0 = iconst.i8 0 - brz v0, block1 - ; nextln: v3 = uextend.i32 v0 - ; nextln: brz v3, block1 - jump block2 - -block1: - v1 = bconst.b1 true - return v1 - -block2: - v2 = bconst.b1 false - return v2 -} - -function u0:1() -> b1 { -block0: - v0 = iconst.i8 0 - ; check: v0 = iconst.i8 0 - brnz v0, block1 - ; nextln: v3 = uextend.i32 v0 - ; nextln: brnz v3, block1 - jump block2 - -block1: - v1 = bconst.b1 false - return v1 - -block2: - v2 = bconst.b1 true - return v2 -} diff --git a/cranelift/filetests/filetests/isa/x86/brz-x86_32-i64.clif b/cranelift/filetests/filetests/isa/x86/brz-x86_32-i64.clif deleted file mode 100644 index eb537d7c1a..0000000000 --- a/cranelift/filetests/filetests/isa/x86/brz-x86_32-i64.clif +++ /dev/null @@ -1,36 +0,0 @@ -test compile -target i686 legacy - -function u0:0(i32, i32) -> i32 { -block0(v0: i32, v1: i32): - v2 = iconcat v0, v1 - ; check: v6 = fill v0 - ; nextln: v3 = icmp_imm eq v6, 0 - ; nextln: v7 = fill v1 - ; nextln: v4 = icmp_imm eq v7, 0 - ; nextln: v5 = band v3, v4 - ; nextln: brnz v5, block1 - brz v2, block1 - jump block2 -block1: - trap unreachable -block2: - trap unreachable -} - -function u0:1(i32, i32) -> i32 { -block0(v0: i32, v1: i32): - v2 = iconcat v0, v1 - ; check: v3 = fill v0 - ; nextln: brnz v3, block1 - ; nextln: fallthrough block3 - ; check: block3: - ; nextln: v4 = fill.i32 v1 - ; nextln: brnz v4, block1 - brnz v2, block1 - jump block2 -block1: - trap unreachable -block2: - trap unreachable -} diff --git a/cranelift/filetests/filetests/isa/x86/extend-i128.clif b/cranelift/filetests/filetests/isa/x86/extend-i128.clif deleted file mode 100644 index e7da3f0387..0000000000 --- a/cranelift/filetests/filetests/isa/x86/extend-i128.clif +++ /dev/null @@ -1,37 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0() -> b1 { -block0: - v0 = iconst.i64 0xffff_ffff_eeee_0000 - ; check: v0 = iconst.i64 0xffff_ffff_eeee_0000 - ; nextln: v2 -> v0 - v1 = uextend.i128 v0 - ; nextln: v7 = iconst.i64 0 - ; nextln: v3 -> v7 - ; nextln: v1 = iconcat v0, v7 - - v2, v3 = isplit v1 - v4 = icmp_imm eq v2, 0xffff_ffff_eeee_0000 - v5 = icmp_imm eq v3, 0 - - v6 = band v4, v5 - return v6 -} - -function u0:1() -> b1 { -block0: - v0 = iconst.i64 0xffff_ffff_eeee_0000 - ; check: v0 = iconst.i64 0xffff_ffff_eeee_0000 - ; nextln: v2 -> v0 - v1 = sextend.i128 v0 - ; nextln: v8 = copy v0 - ; nextln: v7 = sshr_imm v8, 63 - ; nextln: v3 -> v7 - - v2, v3 = isplit v1 - v4 = icmp_imm eq v2, 0xffff_ffff_eeee_0000 - v5 = icmp_imm eq v3, 0xffff_ffff_ffff_ffff - v6 = band v4, v5 - return v6 -} diff --git a/cranelift/filetests/filetests/isa/x86/extend-i64.clif b/cranelift/filetests/filetests/isa/x86/extend-i64.clif deleted file mode 100644 index a3d892c488..0000000000 --- a/cranelift/filetests/filetests/isa/x86/extend-i64.clif +++ /dev/null @@ -1,37 +0,0 @@ -test compile -target i686 legacy - -function u0:0() -> b1 { -block0: - v0 = iconst.i32 0xffff_ee00 - ; check: v0 = iconst.i32 0xffff_ee00 - ; nextln: v2 -> v0 - v1 = uextend.i64 v0 - ; nextln: v7 = iconst.i32 0 - ; nextln: v3 -> v7 - ; nextln: v1 = iconcat v0, v7 - - v2, v3 = isplit v1 - v4 = icmp_imm eq v2, 0xffff_ee00 - v5 = icmp_imm eq v3, 0 - - v6 = band v4, v5 - return v6 -} - -function u0:1() -> b1 { -block0: - v0 = iconst.i32 0xffff_ee00 - ; check: v0 = iconst.i32 0xffff_ee00 - ; nextln: v2 -> v0 - v1 = sextend.i64 v0 - ; nextln: v10 = copy v0 - ; nextln: v7 = sshr_imm v10, 31 - ; nextln: v3 -> v7 - - v2, v3 = isplit v1 - v4 = icmp_imm eq v2, 0xffff_ee00 - v5 = icmp_imm eq v3, 0xffff_ffff - v6 = band v4, v5 - return v6 -} diff --git a/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants-32bit.clif b/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants-32bit.clif deleted file mode 100644 index 3bc9adf5bc..0000000000 --- a/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants-32bit.clif +++ /dev/null @@ -1,17 +0,0 @@ -; Check that floating-point and integer constants equal to zero are optimized correctly. -test binemit -target i686 legacy - -function %foo() -> f32 fast { -block0: - ; asm: xorps %xmm0, %xmm0 - [-,%xmm0] v0 = f32const 0.0 ; bin: 0f 57 c0 - return v0 -} - -function %bar() -> f64 fast { -block0: - ; asm: xorpd %xmm0, %xmm0 - [-,%xmm0] v1 = f64const 0.0 ; bin: 66 0f 57 c0 - return v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants.clif b/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants.clif deleted file mode 100644 index 6fff51c7b1..0000000000 --- a/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants.clif +++ /dev/null @@ -1,31 +0,0 @@ -; Check that floating-point constants equal to zero are optimized correctly. -test binemit -target x86_64 legacy - -function %zero_const_32bit_no_rex() -> f32 fast { -block0: - ; asm: xorps %xmm0, %xmm0 - [-,%xmm0] v0 = f32const 0.0 ; bin: 40 0f 57 c0 - return v0 -} - -function %zero_const_32bit_rex() -> f32 fast { -block0: - ; asm: xorps %xmm8, %xmm8 - [-,%xmm8] v1 = f32const 0.0 ; bin: 45 0f 57 c0 - return v1 -} - -function %zero_const_64bit_no_rex() -> f64 fast { -block0: - ; asm: xorpd %xmm0, %xmm0 - [-,%xmm0] v0 = f64const 0.0 ; bin: 66 40 0f 57 c0 - return v0 -} - -function %zero_const_64bit_rex() -> f64 fast { -block0: - ; asm: xorpd %xmm8, %xmm8 - [-,%xmm8] v1 = f64const 0.0 ; bin: 66 45 0f 57 c0 - return v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/i128-isplit-forward-jump.clif b/cranelift/filetests/filetests/isa/x86/i128-isplit-forward-jump.clif deleted file mode 100644 index eda7b6dffd..0000000000 --- a/cranelift/filetests/filetests/isa/x86/i128-isplit-forward-jump.clif +++ /dev/null @@ -1,25 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0() -> i128 system_v { -block0: - v0 = iconst.i64 0 - v1 = iconst.i64 0 - v2 = iconcat v0, v1 - jump block5 - -block2: - jump block4(v27) - -block4(v23: i128): - return v23 - -block5: - v27 = bxor.i128 v2, v2 - v32 = iconst.i32 0 - brz v32, block2 - jump block6 - -block6: - trap user0 -} diff --git a/cranelift/filetests/filetests/isa/x86/i128.clif b/cranelift/filetests/filetests/isa/x86/i128.clif deleted file mode 100644 index b171c0ccfd..0000000000 --- a/cranelift/filetests/filetests/isa/x86/i128.clif +++ /dev/null @@ -1,46 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0(i64, i64) -> i128 fast { -block0(v0: i64, v1: i64): -;check: block0(v0: i64 [%rdi], v1: i64 [%rsi], v3: i64 [%rbp]): - - v2 = iconcat.i64 v0, v1 - ; check: regmove v0, %rdi -> %rax - ; check: regmove v1, %rsi -> %rdx - - return v2 - ; check: v4 = x86_pop.i64 - ; check: return v0, v1, v4 -} - -function u0:1(i128) -> i64, i64 fast { -block0(v0: i128): -; check: block0(v3: i64 [%rdi], v4: i64 [%rsi], v5: i64 [%rbp]): - - v1, v2 = isplit v0 - ; check: regmove v3, %rdi -> %rax - ; check: regmove v4, %rsi -> %rdx - - return v1, v2 - ; check: v6 = x86_pop.i64 - ; check: return v3, v4, v6 -} - -function u0:2(i64, i128) fast { -; check: block0(v0: i64 [%rdi], v2: i64 [%rsi], v3: i64 [%rdx], v6: i64 [%rbp]): -block0(v0: i64, v1: i128): - ; check: store v2, v0+8 - ; check: store v3, v0+16 - store v1, v0+8 - return -} - -function u0:3(i64) -> i128 fast { -block0(v0: i64): - ; check: v2 = load.i64 v0+8 - ; check: v3 = load.i64 v0+16 - v1 = load.i128 v0+8 - ; check: return v2, v3, v5 - return v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/ireduce-i16-to-i8.clif b/cranelift/filetests/filetests/isa/x86/ireduce-i16-to-i8.clif deleted file mode 100644 index dd75cac4a1..0000000000 --- a/cranelift/filetests/filetests/isa/x86/ireduce-i16-to-i8.clif +++ /dev/null @@ -1,8 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0(i16) -> i8 fast { -block0(v0: i16): - v1 = ireduce.i8 v0 - return v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/isplit-not-legalized-twice.clif b/cranelift/filetests/filetests/isa/x86/isplit-not-legalized-twice.clif deleted file mode 100644 index 9aedb61001..0000000000 --- a/cranelift/filetests/filetests/isa/x86/isplit-not-legalized-twice.clif +++ /dev/null @@ -1,20 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0(i64, i64) -> i128 system_v { -block0(v0: i64, v1: i64): - trap user0 - -block30: - v245 = iconst.i64 0 - v246 = iconcat v245, v245 - ; The next instruction used to be legalized twice, causing a panic the second time. - v250, v251 = isplit.i128 v370 - v252, v253 = isplit v246 - trap user0 - -block45: - v369 = iconst.i64 0 - v370 = load.i128 v369 - trap user0 -} diff --git a/cranelift/filetests/filetests/isa/x86/isub_imm-i8.clif b/cranelift/filetests/filetests/isa/x86/isub_imm-i8.clif deleted file mode 100644 index 948fa34d99..0000000000 --- a/cranelift/filetests/filetests/isa/x86/isub_imm-i8.clif +++ /dev/null @@ -1,14 +0,0 @@ -test compile -set opt_level=speed_and_size -target x86_64 legacy - -function u0:0(i8) -> i8 fast { -block0(v0: i8): - v1 = iconst.i8 0 - v2 = isub v1, v0 - ; check: uextend.i32 - ; nextln: iconst.i32 - ; nextln: isub - ; nextln: ireduce.i8 - return v2 -} diff --git a/cranelift/filetests/filetests/isa/x86/jump_i128_param_unused.clif b/cranelift/filetests/filetests/isa/x86/jump_i128_param_unused.clif deleted file mode 100644 index a08356ca53..0000000000 --- a/cranelift/filetests/filetests/isa/x86/jump_i128_param_unused.clif +++ /dev/null @@ -1,10 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0(i128) system_v { -block0(v0: i128): - jump block1(v0) - -block1(v1: i128): - return -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-bint-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-bint-i8.clif deleted file mode 100644 index 9d88db9d17..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-bint-i8.clif +++ /dev/null @@ -1,10 +0,0 @@ -test compile - -target x86_64 legacy - -function u0:0() -> i8 fast { -block0: - v14 = bconst.b1 false - v15 = bint.i8 v14 - return v15 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-bnot.clif b/cranelift/filetests/filetests/isa/x86/legalize-bnot.clif deleted file mode 100644 index acdd21c9f0..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-bnot.clif +++ /dev/null @@ -1,28 +0,0 @@ -test compile - -target x86_64 legacy - -function u0:51(i64, i64) system_v { - ss0 = explicit_slot 0 - ss1 = explicit_slot 1 - ss2 = explicit_slot 1 - ss3 = explicit_slot 1 - -block0(v0: i64, v1: i64): - v2 = stack_addr.i64 ss1 - v3 = load.i8 v1 - store v3, v2 - v4 = stack_addr.i64 ss2 - v5 = stack_addr.i64 ss3 - jump block1 - -block1: - v6 = load.i8 v2 - store v6, v5 - v7 = load.i8 v5 - v8 = bnot v7 - store v8, v4 - v9 = load.i8 v4 - store v9, v0 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-br-icmp.clif b/cranelift/filetests/filetests/isa/x86/legalize-br-icmp.clif deleted file mode 100644 index f64108531c..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-br-icmp.clif +++ /dev/null @@ -1,46 +0,0 @@ -test legalizer - -target x86_64 legacy - -function %br_icmp(i64) fast { -block0(v0: i64): - v1 = iconst.i64 0 - br_icmp eq v0, v1, block1 - jump block1 - -block1: - return -} - -; sameln: function %br_icmp(i64 [%rdi]) fast { -; nextln: block0(v0: i64): -; nextln: [RexOp1pu_id#b8] v1 = iconst.i64 0 -; nextln: [RexOp1icscc#8039] v2 = icmp eq v0, v1 -; nextln: [RexOp1t8jccb#75] brnz v2, block1 -; nextln: [Op1jmpb#eb] jump block1 -; nextln: -; nextln: block1: -; nextln: [Op1ret#c3] return -; nextln: } - - -function %br_icmp_args(i64) fast { -block0(v0: i64): - v1 = iconst.i64 0 - br_icmp eq v0, v1, block1(v0) - jump block1(v0) - -block1(v2: i64): - return -} - -; sameln: function %br_icmp_args(i64 [%rdi]) fast { -; nextln: block0(v0: i64): -; nextln: [RexOp1pu_id#b8] v1 = iconst.i64 0 -; nextln: [RexOp1icscc#8039] v3 = icmp eq v0, v1 -; nextln: [RexOp1t8jccb#75] brnz v3, block1(v0) -; nextln: [Op1jmpb#eb] jump block1(v0) -; nextln: -; nextln: block1(v2: i64): -; nextln: [Op1ret#c3] return -; nextln: } diff --git a/cranelift/filetests/filetests/isa/x86/legalize-br-table.clif b/cranelift/filetests/filetests/isa/x86/legalize-br-table.clif deleted file mode 100644 index c931d6cacf..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-br-table.clif +++ /dev/null @@ -1,31 +0,0 @@ -test compile -set opt_level=speed_and_size -target x86_64 legacy -; regex: V=v\d+ -; regex: BB=block\d+ - -function u0:0(i64) system_v { - ss0 = explicit_slot 1 - jt0 = jump_table [block1] - -block0(v0: i64): - v1 = stack_addr.i64 ss0 - v2 = load.i8 v1 - br_table v2, block2, jt0 -; check: $(oob=$V) = ifcmp_imm $(idx=$V), 1 -; block2 is replaced by block1 by fold_redundant_jump -; nextln: brif uge $oob, block1 -; nextln: fallthrough $(inb=$BB) -; check: $inb: -; nextln: $(final_idx=$V) = uextend.i64 $idx -; nextln: $(base=$V) = jump_table_base.i64 jt0 -; nextln: $(rel_addr=$V) = jump_table_entry $final_idx, $base, 4, jt0 -; nextln: $(addr=$V) = iadd $base, $rel_addr -; nextln: indirect_jump_table_br $addr, jt0 - -block2: - jump block1 - -block1: - return -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-byte-ops-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-byte-ops-i8.clif deleted file mode 100644 index 7c135d54ae..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-byte-ops-i8.clif +++ /dev/null @@ -1,36 +0,0 @@ -test compile -target x86_64 legacy - -; regex: V=v\d+ - -function u0:0(i8, i8) fast { -fn0 = %black_box(i8) -ss0 = explicit_slot 1 ; black box - -block0(v0: i8, v1: i8): - v99 = stack_addr.i64 ss0 - - ; check: istore8 $(V), $(V) - - v2 = band v0, v1 - store v2, v99 - v3 = bor v0, v1 - store v3, v99 - v4 = bxor v0, v1 - store v4, v99 - v5 = bnot v0 - store v5, v99 - v6 = band_not v0, v1 - store v6, v99 - v7 = bor_not v0, v1 - store v7, v99 - v8 = bxor_not v0, v1 - store v8, v99 - v9 = band_imm v0, 42 - store v9, v99 - v10 = bor_imm v0, 42 - store v10, v99 - v11 = bxor_imm v0, 42 - store v11, v99 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-call.clif b/cranelift/filetests/filetests/isa/x86/legalize-call.clif deleted file mode 100644 index b21099281e..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-call.clif +++ /dev/null @@ -1,14 +0,0 @@ -; Test legalization of a non-colocated call in 64-bit non-PIC mode. -test legalizer -set opt_level=speed_and_size -target x86_64 legacy haswell - -function %call() { - fn0 = %foo() -block0: - call fn0() - return -} - -; check: v0 = func_addr.i64 fn0 -; nextln: call_indirect sig0, v0() diff --git a/cranelift/filetests/filetests/isa/x86/legalize-clz-ctz-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-clz-ctz-i8.clif deleted file mode 100644 index af5e158b07..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-clz-ctz-i8.clif +++ /dev/null @@ -1,25 +0,0 @@ -test compile -target x86_64 legacy - -; regex: V=v\d+ - -function u0:0(i8) -> i8, i8 fast { -block0(v0: i8): - v1 = clz v0 - ; check: v3 = uextend.i32 v0 - ; nextln: v6 = iconst.i32 -1 - ; nextln: v7 = iconst.i32 31 - ; nextln: v8, v9 = x86_bsr v3 - ; nextln: v10 = selectif.i32 eq v9, v6, v8 - ; nextln: v4 = isub v7, v10 - ; nextln: v5 = iadd_imm v4, -24 - ; nextln: v1 = ireduce.i8 v5 - v2 = ctz v0 - ; nextln: v11 = uextend.i32 v0 - ; nextln: v12 = bor_imm v11, 256 - ; nextln: v14 = iconst.i32 32 - ; nextln: v15, v16 = x86_bsf v12 - ; nextln: v13 = selectif.i32 eq v16, v14, v15 - ; nextln: v2 = ireduce.i8 v13 - return v1, v2 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-custom.clif b/cranelift/filetests/filetests/isa/x86/legalize-custom.clif deleted file mode 100644 index 0c51e064dd..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-custom.clif +++ /dev/null @@ -1,133 +0,0 @@ -; Test the custom legalizations. -test legalizer -target i686 legacy -target x86_64 legacy - -; regex: V=v\d+ -; regex: BB=block\d+ - -function %cond_trap(i32) { -block0(v1: i32): - trapz v1, user67 - return - ; check: block0(v1: i32 - ; nextln: $(f=$V) = ifcmp_imm v1, 0 - ; nextln: trapif eq $f, user67 - ; nextln: return -} - -function %cond_trap2(i32) { -block0(v1: i32): - trapnz v1, int_ovf - return - ; check: block0(v1: i32 - ; nextln: $(f=$V) = ifcmp_imm v1, 0 - ; nextln: trapif ne $f, int_ovf - ; nextln: return -} - -function %cond_trap_b1(i32) { -block0(v1: i32): - v2 = icmp_imm eq v1, 6 - trapz v2, user7 - return - ; check: block0(v1: i32 - ; check: brnz v2, $(new=$BB) - ; check: jump $(trap=$BB) - ; check: $trap: - ; nextln: trap user7 - ; check: $new: - ; nextln: return -} - -function %cond_trap2_b1(i32) { -block0(v1: i32): - v2 = icmp_imm eq v1, 6 - trapnz v2, user9 - return - ; check: block0(v1: i32 - ; check: brz v2, $(new=$BB) - ; check: jump $(trap=$BB) - ; check: $trap: - ; nextln: trap user9 - ; check: $new: - ; nextln: return -} - -function %f32const() -> f32 { -block0: - v1 = f32const 0x1.0p1 - ; check: $(tmp=$V) = iconst.i32 - ; check: v1 = bitcast.f32 $tmp - return v1 -} - -function %select_f64(f64, f64, i32) -> f64 { -block0(v0: f64, v1: f64, v2: i32): - v3 = select v2, v0, v1 - ; check: brnz v2, $(new=$BB)(v0) - ; nextln: jump $new(v1) - ; check: $new(v3: f64): - ; nextln: return v3 - return v3 -} - -function %f32_min(f32, f32) -> f32 { -block0(v0: f32, v1: f32): - v2 = fmin v0, v1 - return v2 - ; check: $(vnat=$V) = x86_fmin.f32 v0, v1 - ; nextln: jump $(done=$BB)($vnat) - - ; check: $(uno=$BB): - ; nextln: $(vuno=$V) = fadd.f32 v0, v1 - ; nextln: jump $(done=$BB)($vuno) - - ; check: $(ueq=$BB): - ; check: $(veq=$V) = bor.f32 v0, v1 - ; nextln: jump $(done=$BB)($veq) - - ; check: $done(v2: f32): - ; nextln: return v2 -} - -function %ineg_legalized_i8() { -block0: - v0 = iconst.i8 1 - v1 = ineg v0 - ; check: v2 = iconst.i32 1 - ; nextln: v0 = ireduce.i8 v2 - ; nextln: v3 = iconst.i8 0 - ; nextln: v4 = uextend.i32 v3 - ; nextln: v5 = uextend.i32 v0 - ; nextln: v6 = isub v4, v5 - ; nextln: v1 = ireduce.i8 v6 - - return -} - -function %ineg_legalized_i16() { -block0: - v0 = iconst.i16 1 - v1 = ineg v0 - ; check: v2 = iconst.i32 1 - ; nextln: v0 = ireduce.i16 v2 - ; nextln: v3 = iconst.i16 0 - ; nextln: v4 = uextend.i32 v3 - ; nextln: v5 = uextend.i32 v0 - ; nextln: v6 = isub v4, v5 - ; nextln: v1 = ireduce.i16 v6 - - return -} - -function %ineg_legalized_i32() { -block0: - v0 = iconst.i32 1 - v1 = ineg v0 - ; check: v0 = iconst.i32 1 - ; nextln: v2 = iconst.i32 0 - ; nextln: v1 = isub v2, v0 - - return -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-div-traps.clif b/cranelift/filetests/filetests/isa/x86/legalize-div-traps.clif deleted file mode 100644 index 9e579c1bcd..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-div-traps.clif +++ /dev/null @@ -1,192 +0,0 @@ -; Test the division legalizations. -test legalizer -; See also legalize-div.clif. -set avoid_div_traps=1 -target x86_64 legacy - -; regex: V=v\d+ -; regex: BB=block\d+ - -function %udiv(i64, i64) -> i64 { -block0(v0: i64, v1: i64): - ; check: block0( - v2 = udiv v0, v1 - ; nextln: $(fz=$V) = ifcmp_imm v1, 0 - ; nextln: trapif eq $fz, int_divz - ; nextln: $(hi=$V) = iconst.i64 0 - ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1 - return v2 - ; nextln: return $d -} - -function %udiv_0(i64) -> i64 { -block0(v0: i64): - ; check: block0( - v1 = iconst.i64 0 - ; nextln: v1 = iconst.i64 0 - v2 = udiv v0, v1 - ; nextln: $(fz=$V) = ifcmp_imm v1, 0 - ; nextln: trapif eq $fz, int_divz - ; nextln: $(hi=$V) = iconst.i64 0 - ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1 - return v2 - ; nextln: return $d -} - -function %udiv_minus_1(i64) -> i64 { -block0(v0: i64): - ; check: block0( - v1 = iconst.i64 -1 - ; nextln: v1 = iconst.i64 -1 - v2 = udiv v0, v1 - ; nextln: $(hi=$V) = iconst.i64 0 - ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1 - return v2 - ; nextln: return $d -} - -function %urem(i64, i64) -> i64 { -block0(v0: i64, v1: i64): - ; check: block0( - v2 = urem v0, v1 - ; nextln: $(fz=$V) = ifcmp_imm v1, 0 - ; nextln: trapif eq $fz, int_divz - ; nextln: $(hi=$V) = iconst.i64 0 - ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1 - return v2 - ; nextln: return $r -} - -function %urem_0(i64) -> i64 { -block0(v0: i64): - ; check: block0( - v1 = iconst.i64 0 - ; nextln: v1 = iconst.i64 0 - v2 = urem v0, v1 - ; nextln: $(fz=$V) = ifcmp_imm v1, 0 - ; nextln: trapif eq $fz, int_divz - ; nextln: $(hi=$V) = iconst.i64 0 - ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1 - return v2 - ; nextln: return $r -} - -function %urem_minus_1(i64) -> i64 { -block0(v0: i64): - ; check: block0( - v1 = iconst.i64 -1 - ; nextln: v1 = iconst.i64 -1 - v2 = urem v0, v1 - ; nextln: $(hi=$V) = iconst.i64 0 - ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1 - return v2 - ; nextln: return $r -} - -function %sdiv(i64, i64) -> i64 { -block0(v0: i64, v1: i64): - ; check: block0( - v2 = sdiv v0, v1 - ; nextln: $(fz=$V) = ifcmp_imm v1, 0 - ; nextln: trapif eq $fz, int_divz - ; nextln: $(fm1=$V) = ifcmp_imm v1, -1 - ; nextln: brif eq $fm1, $(m1=$BB) - ; check: $(hi=$V) = sshr_imm - ; nextln: $(q=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1 - ; nextln: jump $(done=$BB)($q) - ; check: $m1: - ; nextln: $(imin=$V) = iconst.i64 0x8000_0000_0000_0000 - ; nextln: $(fm=$V) = ifcmp.i64 v0, $imin - ; nextln: trapif eq $fm, int_ovf - ; check: $done(v2: i64): - return v2 - ; nextln: return v2 -} - -function %sdiv_0(i64) -> i64 { -block0(v0: i64): - ; check: block0( - v1 = iconst.i64 0 - ; nextln: v1 = iconst.i64 0 - v2 = sdiv v0, v1 - ; nextln: $(fz=$V) = ifcmp_imm v1, 0 - ; nextln: trapif eq $fz, int_divz - ; check: $(hi=$V) = sshr_imm - ; nextln: $(q=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1 - return v2 - ; nextln: return v2 -} - -function %sdiv_minus_1(i64) -> i64 { -block0(v0: i64): - ; check: block0( - v1 = iconst.i64 -1 - ; nextln: v1 = iconst.i64 -1 - v2 = sdiv v0, v1 - ; nextln: $(fm1=$V) = ifcmp_imm v1, -1 - ; nextln: brif eq $fm1, $(m1=$BB) - ; check: $(hi=$V) = sshr_imm - ; nextln: $(q=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1 - ; nextln: jump $(done=$BB)($q) - ; check: $m1: - ; nextln: $(imin=$V) = iconst.i64 0x8000_0000_0000_0000 - ; nextln: $(fm=$V) = ifcmp.i64 v0, $imin - ; nextln: trapif eq $fm, int_ovf - ; check: $done(v2: i64): - return v2 - ; nextln: return v2 -} - -; The srem expansion needs to special-case x % -1 since x86_sdivmodx traps on INT_MIN/-1. -; TODO: Add more explicit pattern matching once we've cleaned up the ifcmp+brif pattern. -function %srem(i64, i64) -> i64 { -block0(v0: i64, v1: i64): - ; check: block0( - v2 = srem v0, v1 - ; nextln: $(fz=$V) = ifcmp_imm v1, 0 - ; nextln: trapif eq $fz, int_divz - ; nextln: $(fm1=$V) = ifcmp_imm v1, -1 - ; nextln: brif eq $fm1, $(m1=$BB) - ; check: $(hi=$V) = sshr_imm - ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1 - ; nextln: jump $(done=$BB)($r) - ; check: $m1: - ; nextln: $(zero=$V) = iconst.i64 0 - ; nextln: jump $(done=$BB)($zero) - ; check: $done(v2: i64): - return v2 - ; nextln: return v2 -} - -function %srem_0(i64) -> i64 { -block0(v0: i64): - ; check: block0( - v1 = iconst.i64 0 - ; nextln: v1 = iconst.i64 0 - v2 = srem v0, v1 - ; nextln: $(fz=$V) = ifcmp_imm v1, 0 - ; nextln: trapif eq $fz, int_divz - ; check: $(hi=$V) = sshr_imm - ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1 - return v2 - ; nextln: return v2 -} - -function %srem_minus_1(i64) -> i64 { -block0(v0: i64): - ; check: block0( - v1 = iconst.i64 -1 - ; nextln: v1 = iconst.i64 -1 - v2 = srem v0, v1 - ; nextln: $(fm1=$V) = ifcmp_imm v1, -1 - ; nextln: brif eq $fm1, $(m1=$BB) - ; check: $(hi=$V) = sshr_imm - ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1 - ; nextln: jump $(done=$BB)($r) - ; check: $m1: - ; nextln: $(zero=$V) = iconst.i64 0 - ; nextln: jump $(done=$BB)($zero) - ; check: $done(v2: i64): - return v2 - ; nextln: return v2 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-div.clif b/cranelift/filetests/filetests/isa/x86/legalize-div.clif deleted file mode 100644 index b172a9aef3..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-div.clif +++ /dev/null @@ -1,57 +0,0 @@ -; Test the division legalizations. -test legalizer -; See also legalize-div-traps.clif. -set avoid_div_traps=0 -target x86_64 legacy - -; regex: V=v\d+ -; regex: BB=block\d+ - -function %udiv(i64, i64) -> i64 { -block0(v0: i64, v1: i64): - ; check: block0( - v2 = udiv v0, v1 - ; nextln: $(hi=$V) = iconst.i64 0 - ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1 - return v2 - ; nextln: return $d -} - -function %urem(i64, i64) -> i64 { -block0(v0: i64, v1: i64): - ; check: block0( - v2 = urem v0, v1 - ; nextln: $(hi=$V) = iconst.i64 0 - ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1 - return v2 - ; nextln: return $r -} - -function %sdiv(i64, i64) -> i64 { -block0(v0: i64, v1: i64): - ; check: block0( - v2 = sdiv v0, v1 - ; check: $(hi=$V) = sshr_imm - ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1 - return v2 - ; nextln: return $d -} - -; The srem expansion needs to special-case x % -1 since x86_sdivmodx traps on INT_MIN/-1. -; TODO: Add more explicit pattern matching once we've cleaned up the ifcmp+brif pattern. -function %srem(i64, i64) -> i64 { -block0(v0: i64, v1: i64): - ; check: block0( - v2 = srem v0, v1 - ; nextln: $(fm1=$V) = ifcmp_imm v1, -1 - ; nextln: brif eq $fm1, $(m1=$BB) - ; check: $(hi=$V) = sshr_imm - ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1 - ; nextln: jump $(done=$BB)($r) - ; check: $m1: - ; nextln: $(zero=$V) = iconst.i64 0 - ; nextln: jump $(done=$BB)($zero) - ; check: $done(v2: i64): - return v2 - ; nextln: return v2 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-f64const-x64.clif b/cranelift/filetests/filetests/isa/x86/legalize-f64const-x64.clif deleted file mode 100644 index 43f57f8372..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-f64const-x64.clif +++ /dev/null @@ -1,13 +0,0 @@ -; Test the legalization of f64const. -test legalizer -target x86_64 legacy - -; regex: V=v\d+ - -function %f64const() -> f64 { -block0: - v1 = f64const 0x1.0p1 - ; check: $(tmp=$V) = iconst.i64 - ; check: v1 = bitcast.f64 $tmp - return v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-fcvt_from_usint-i16.clif b/cranelift/filetests/filetests/isa/x86/legalize-fcvt_from_usint-i16.clif deleted file mode 100644 index 32a256c9e7..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-fcvt_from_usint-i16.clif +++ /dev/null @@ -1,14 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0(i16) -> f64 fast { -block0(v0: i16): - v1 = fcvt_from_uint.f64 v0 - return v1 -} - -function u0:1(i16) -> f64 fast { -block0(v0: i16): - v1 = fcvt_from_sint.f64 v0 - return v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-heaps.clif b/cranelift/filetests/filetests/isa/x86/legalize-heaps.clif deleted file mode 100644 index 242a0f8dfa..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-heaps.clif +++ /dev/null @@ -1,123 +0,0 @@ -test legalizer -set enable_heap_access_spectre_mitigation=false -target x86_64 legacy - -; Test legalization for various forms of heap addresses. -; regex: BB=block\d+ - -function %heap_addrs(i32, i64, i64 vmctx) { - gv4 = vmctx - gv0 = iadd_imm.i64 gv4, 64 - gv1 = iadd_imm.i64 gv4, 72 - gv2 = iadd_imm.i64 gv4, 80 - gv3 = load.i64 notrap aligned gv4+88 - - heap0 = static gv0, min 0x1_0000, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32 - heap1 = static gv0, offset_guard 0x1000, bound 0x1_0000, index_type i32 - heap2 = static gv0, min 0x1_0000, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i64 - heap3 = static gv0, offset_guard 0x1000, bound 0x1_0000, index_type i64 - heap4 = dynamic gv1, min 0x1_0000, bound gv3, offset_guard 0x8000_0000, index_type i32 - heap5 = dynamic gv1, bound gv3, offset_guard 0x1000, index_type i32 - heap6 = dynamic gv1, min 0x1_0000, bound gv2, offset_guard 0x8000_0000, index_type i64 - heap7 = dynamic gv1, bound gv2, offset_guard 0x1000, index_type i64 - - ; check: heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i32 - ; check: heap1 = static gv0, min 0, bound 0x0001_0000, offset_guard 4096, index_type i32 - ; check: heap2 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i64 - ; check: heap3 = static gv0, min 0, bound 0x0001_0000, offset_guard 4096, index_type i64 - ; check: heap4 = dynamic gv1, min 0x0001_0000, bound gv3, offset_guard 0x8000_0000, index_type i32 - ; check: heap5 = dynamic gv1, min 0, bound gv3, offset_guard 4096, index_type i32 - ; check: heap6 = dynamic gv1, min 0x0001_0000, bound gv2, offset_guard 0x8000_0000, index_type i64 - ; check: heap7 = dynamic gv1, min 0, bound gv2, offset_guard 4096, index_type i64 - -block0(v0: i32, v1: i64, v3: i64): - ; The fast-path; 32-bit index, static heap with a sufficient bound, no bounds check needed! - v4 = heap_addr.i64 heap0, v0, 0 - ; check: v12 = uextend.i64 v0 - ; check: v13 = iadd_imm v3, 64 - ; check: v4 = iadd v13, v12 - - v5 = heap_addr.i64 heap1, v0, 0 - ; check: v14 = uextend.i64 v0 - ; check: v15 = icmp_imm ugt v14, 0x0001_0000 - ; check: brz v15, $(resume_1=$BB) - ; nextln: jump $(trap_1=$BB) - ; check: $trap_1: - ; nextln: trap heap_oob - ; check: $resume_1: - ; check: v16 = iadd_imm.i64 v3, 64 - ; check: v5 = iadd v16, v14 - - v6 = heap_addr.i64 heap2, v1, 0 - ; check: v19 = iconst.i64 0x0001_0000_0000 - ; check: v17 = icmp.i64 ugt v1, v19 - ; check: brz v17, $(resume_2=$BB) - ; nextln: jump $(trap_2=$BB) - ; check: $trap_2: - ; nextln: trap heap_oob - ; check: $resume_2: - ; check: v18 = iadd_imm.i64 v3, 64 - ; check: v6 = iadd v18, v1 - - v7 = heap_addr.i64 heap3, v1, 0 - ; check: v20 = icmp_imm.i64 ugt v1, 0x0001_0000 - ; check: brz v20, $(resume_3=$BB) - ; nextln: jump $(trap_3=$BB) - ; check: $trap_3: - ; nextln: trap heap_oob - ; check: $resume_3: - ; check: v21 = iadd_imm.i64 v3, 64 - ; check: v7 = iadd v21, v1 - - v8 = heap_addr.i64 heap4, v0, 0 - ; check: v22 = uextend.i64 v0 - ; check: v23 = load.i64 notrap aligned v3+88 - ; check: v24 = iadd_imm v23, 0 - ; check: v25 = icmp ugt v22, v24 - ; check: brz v25, $(resume_4=$BB) - ; nextln: jump $(trap_4=$BB) - ; check: $trap_4: - ; nextln: trap heap_oob - ; check: $resume_4: - ; check: v26 = iadd_imm.i64 v3, 72 - ; check: v8 = iadd v26, v22 - - v9 = heap_addr.i64 heap5, v0, 0 - ; check: v27 = uextend.i64 v0 - ; check: v28 = load.i64 notrap aligned v3+88 - ; check: v29 = iadd_imm v28, 0 - ; check: v30 = icmp ugt v27, v29 - ; check: brz v30, $(resume_5=$BB) - ; nextln: jump $(trap_5=$BB) - ; check: $trap_5: - ; nextln: trap heap_oob - ; check: $resume_5: - ; check: v31 = iadd_imm.i64 v3, 72 - ; check: v9 = iadd v31, v27 - - v10 = heap_addr.i64 heap6, v1, 0 - ; check: v32 = iadd_imm.i64 v3, 80 - ; check: v33 = iadd_imm v32, 0 - ; check: v34 = icmp.i64 ugt v1, v33 - ; check: brz v34, $(resume_6=$BB) - ; nextln: jump $(trap_6=$BB) - ; check: $trap_6: - ; nextln: trap heap_oob - ; check: $resume_6: - ; check: v35 = iadd_imm.i64 v3, 72 - ; check: v10 = iadd v35, v1 - - v11 = heap_addr.i64 heap7, v1, 0 - ; check: v36 = iadd_imm.i64 v3, 80 - ; check: v37 = iadd_imm v36, 0 - ; check: v38 = icmp.i64 ugt v1, v37 - ; check: brz v38, $(resume_7=$BB) - ; nextln: jump $(trap_7=$BB) - ; check: $trap_7: - ; nextln: trap heap_oob - ; check: $resume_7: - ; check: v39 = iadd_imm.i64 v3, 72 - ; check: v11 = iadd v39, v1 - - return -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-i128.clif b/cranelift/filetests/filetests/isa/x86/legalize-i128.clif deleted file mode 100644 index 276de82d4e..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-i128.clif +++ /dev/null @@ -1,20 +0,0 @@ -; Test the legalization of i128 instructions on x86_64. -test legalizer -target x86_64 legacy haswell - -; regex: V=v\d+ - -function %imul(i128, i128) -> i128 { -block0(v1: i128, v2: i128): - v10 = imul v1, v2 - ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) - ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) - ; nextln: $(v11=$V) = imul $v1_msb, $v2_lsb - ; nextln: $(v12=$V) = imul $v1_lsb, $v2_msb - ; nextln: $(v13=$V) = iadd $v11, $v12 - ; nextln: $(v99=$V), $(v14=$V) = x86_umulx $v1_lsb, $v2_lsb - ; nextln: $(v10_msb=$V) = iadd $v13, $v14 - ; nextln: $(v10_lsb=$V) = imul $v1_lsb, $v2_lsb - ; nextln: v10 = iconcat $v10_lsb, $v10_msb - return v10 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-i64.clif b/cranelift/filetests/filetests/isa/x86/legalize-i64.clif deleted file mode 100644 index 7e2d381947..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-i64.clif +++ /dev/null @@ -1,357 +0,0 @@ -; Test the legalization of i64 instructions on x86_32. -test legalizer -target i686 legacy haswell - -; regex: V=v\d+ - -function %iadd(i64, i64) -> i64 { -block0(v1: i64, v2: i64): - v10 = iadd v1, v2 - ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) - ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) - ; nextln: $(v10_lsb=$V), $(carry=$V) = iadd_ifcout $v1_lsb, $v2_lsb - ; nextln: $(v10_msb=$V) = iadd_ifcin $v1_msb, $v2_msb, $carry - ; nextln: v10 = iconcat $v10_lsb, $v10_msb - return v10 -} - -function %isub(i64, i64) -> i64 { -block0(v1: i64, v2: i64): - v10 = isub v1, v2 - ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) - ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) - ; nextln: $(v10_lsb=$V), $(borrow=$V) = isub_ifbout $v1_lsb, $v2_lsb - ; nextln: $(v10_msb=$V) = isub_ifbin $v1_msb, $v2_msb, $borrow - ; nextln: v10 = iconcat $v10_lsb, $v10_msb - return v10 -} - -function %imul(i64, i64) -> i64 { -block0(v1: i64, v2: i64): - v10 = imul v1, v2 - ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) - ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) - ; nextln: $(v11=$V) = imul $v1_msb, $v2_lsb - ; nextln: $(v12=$V) = imul $v1_lsb, $v2_msb - ; nextln: $(v13=$V) = iadd $v11, $v12 - ; nextln: $(v99=$V), $(v14=$V) = x86_umulx $v1_lsb, $v2_lsb - ; nextln: $(v10_msb=$V) = iadd $v13, $v14 - ; nextln: $(v10_lsb=$V) = imul $v1_lsb, $v2_lsb - ; nextln: v10 = iconcat $v10_lsb, $v10_msb - return v10 -} - -function %icmp_eq(i64, i64) -> b1 { -block0(v1: i64, v2: i64): - v10 = icmp eq v1, v2 - ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) - ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) - ; nextln: $(v10_lsb=$V) = icmp eq $v1_lsb, $v2_lsb - ; nextln: $(v10_msb=$V) = icmp eq $v1_msb, $v2_msb - ; nextln: v10 = band $v10_lsb, $v10_msb - return v10 -} - -function %icmp_imm_eq(i64) -> b1 { -block0(v1: i64): - v10 = icmp_imm eq v1, 0 - ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V) - ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V) - ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V) - ; nextln: $(v2_lsb=$V) = iconst.i32 0 - ; nextln: $(v2_msb=$V) = iconst.i32 0 - ; nextln: $(v10_lsb=$V) = icmp eq $v1_lsb, $v2_lsb - ; nextln: $(v10_msb=$V) = icmp eq $v1_msb, $v2_msb - ; nextln: v10 = band $v10_lsb, $v10_msb - return v10 -} - -function %icmp_ne(i64, i64) -> b1 { -block0(v1: i64, v2: i64): - v10 = icmp ne v1, v2 - ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) - ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) - ; nextln: $(v10_lsb=$V) = icmp ne $v1_lsb, $v2_lsb - ; nextln: $(v10_msb=$V) = icmp ne $v1_msb, $v2_msb - ; nextln: v10 = bor $v10_lsb, $v10_msb - return v10 -} - -function %icmp_imm_ne(i64) -> b1 { -block0(v1: i64): - v10 = icmp_imm ne v1, 0 - ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V) - ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V) - ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V) - ; nextln: $(v2_lsb=$V) = iconst.i32 0 - ; nextln: $(v2_msb=$V) = iconst.i32 0 - ; nextln: $(v10_lsb=$V) = icmp ne $v1_lsb, $v2_lsb - ; nextln: $(v10_msb=$V) = icmp ne $v1_msb, $v2_msb - ; nextln: v10 = bor $v10_lsb, $v10_msb - return v10 -} - -function %icmp_sgt(i64, i64) -> b1 { -block0(v1: i64, v2: i64): - v10 = icmp sgt v1, v2 - ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) - ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) - ; nextln: $(b1=$V) = icmp sgt $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp slt $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp ugt $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_imm_sgt(i64) -> b1 { -block0(v1: i64): - v10 = icmp_imm sgt v1, 0 - ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V) - ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V) - ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V) - ; nextln: $(v2_lsb=$V) = iconst.i32 0 - ; nextln: $(v2_msb=$V) = iconst.i32 0 - ; nextln: $(b1=$V) = icmp sgt $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp slt $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp ugt $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_sge(i64, i64) -> b1 { -block0(v1: i64, v2: i64): - v10 = icmp sge v1, v2 - ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) - ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) - ; nextln: $(b1=$V) = icmp sgt $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp slt $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp uge $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_imm_sge(i64) -> b1 { -block0(v1: i64): - v10 = icmp_imm sge v1, 0 - ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V) - ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V) - ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V) - ; nextln: $(v2_lsb=$V) = iconst.i32 0 - ; nextln: $(v2_msb=$V) = iconst.i32 0 - ; nextln: $(b1=$V) = icmp sgt $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp slt $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp uge $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_slt(i64, i64) -> b1 { -block0(v1: i64, v2: i64): - v10 = icmp slt v1, v2 - ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) - ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) - ; nextln: $(b1=$V) = icmp slt $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp sgt $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp ult $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_imm_slt(i64) -> b1 { -block0(v1: i64): - v10 = icmp_imm slt v1, 0 - ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V) - ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V) - ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V) - ; nextln: $(v2_lsb=$V) = iconst.i32 0 - ; nextln: $(v2_msb=$V) = iconst.i32 0 - ; nextln: $(b1=$V) = icmp slt $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp sgt $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp ult $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_sle(i64, i64) -> b1 { -block0(v1: i64, v2: i64): - v10 = icmp sle v1, v2 - ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) - ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) - ; nextln: $(b1=$V) = icmp slt $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp sgt $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp ule $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_imm_sle(i64) -> b1 { -block0(v1: i64): - v10 = icmp_imm sle v1, 0 - ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V) - ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V) - ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V) - ; nextln: $(v2_lsb=$V) = iconst.i32 0 - ; nextln: $(v2_msb=$V) = iconst.i32 0 - ; nextln: $(b1=$V) = icmp slt $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp sgt $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp ule $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_ugt(i64, i64) -> b1 { -block0(v1: i64, v2: i64): - v10 = icmp ugt v1, v2 - ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) - ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) - ; nextln: $(b1=$V) = icmp ugt $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp ult $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp ugt $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_imm_ugt(i64) -> b1 { -block0(v1: i64): - v10 = icmp_imm ugt v1, 0 - ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V) - ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V) - ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V) - ; nextln: $(v2_lsb=$V) = iconst.i32 0 - ; nextln: $(v2_msb=$V) = iconst.i32 0 - ; nextln: $(b1=$V) = icmp ugt $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp ult $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp ugt $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_uge(i64, i64) -> b1 { -block0(v1: i64, v2: i64): - v10 = icmp uge v1, v2 - ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) - ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) - ; nextln: $(b1=$V) = icmp ugt $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp ult $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp uge $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_imm_uge(i64) -> b1 { -block0(v1: i64): - v10 = icmp_imm uge v1, 0 - ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V) - ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V) - ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V) - ; nextln: $(v2_lsb=$V) = iconst.i32 0 - ; nextln: $(v2_msb=$V) = iconst.i32 0 - ; nextln: $(b1=$V) = icmp ugt $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp ult $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp uge $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_ult(i64, i64) -> b1 { -block0(v1: i64, v2: i64): - v10 = icmp ult v1, v2 - ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) - ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) - ; nextln: $(b1=$V) = icmp ult $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp ugt $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp ult $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_imm_ult(i64) -> b1 { -block0(v1: i64): - v10 = icmp_imm ult v1, 0 - ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V) - ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V) - ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V) - ; nextln: $(v2_lsb=$V) = iconst.i32 0 - ; nextln: $(v2_msb=$V) = iconst.i32 0 - ; nextln: $(b1=$V) = icmp ult $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp ugt $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp ult $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_ule(i64, i64) -> b1 { -block0(v1: i64, v2: i64): - v10 = icmp ule v1, v2 - ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V) - ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V) - ; nextln: $(b1=$V) = icmp ult $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp ugt $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp ule $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %icmp_imm_ule(i64) -> b1 { -block0(v1: i64): - v10 = icmp_imm ule v1, 0 - ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V) - ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V) - ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V) - ; nextln: $(v2_lsb=$V) = iconst.i32 0 - ; nextln: $(v2_msb=$V) = iconst.i32 0 - ; nextln: $(b1=$V) = icmp ult $v1_msb, $v2_msb - ; nextln: $(b2=$V) = icmp ugt $v1_msb, $v2_msb - ; nextln: $(b3=$V) = icmp ule $v1_lsb, $v2_lsb - ; nextln: $(c1=$V) = bnot $b2 - ; nextln: $(c2=$V) = band $c1, $b3 - ; nextln: v10 = bor $b1, $c2 - return v10 -} - -function %ineg_legalized_i64() { -block0: - v0 = iconst.i64 1 - v1 = ineg v0 - ; check: v2 = iconst.i32 1 - ; nextln: v3 = iconst.i32 0 - ; nextln: v0 = iconcat v2, v3 - ; nextln: v5 = iconst.i32 0 - ; nextln: v6 = iconst.i32 0 - ; nextln: v4 = iconcat v5, v6 - ; nextln: v7, v8 = isub_ifbout v5, v2 - ; nextln: v9 = isub_ifbin v6, v3, v8 - ; nextln: v1 = iconcat v7, v9 - - return -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-icmp-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-icmp-i8.clif deleted file mode 100644 index 32f2b3d3e7..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-icmp-i8.clif +++ /dev/null @@ -1,19 +0,0 @@ -test compile -target x86_64 legacy - -; regex: V=v\d+ - -function u0:0(i8, i8) -> i8 fast { -block0(v0: i8, v1: i8): - v2 = icmp_imm sle v0, 0 - ; check: $(e1=$V) = sextend.i32 v0 - ; nextln: v2 = icmp_imm sle $e1, 0 - v3 = bint.i8 v2 - v4 = icmp eq v0, v1 - ; check: $(e2=$V) = uextend.i32 v0 - ; nextln: $(e3=$V) = uextend.i32 v1 - ; nextln: v4 = icmp eq $e2, $e3 - v5 = bint.i8 v4 - v6 = iadd v3, v5 - return v6 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-iconst-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-iconst-i8.clif deleted file mode 100644 index 1e6a70434a..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-iconst-i8.clif +++ /dev/null @@ -1,18 +0,0 @@ -test compile - -target x86_64 legacy - -function u0:0(i64) system_v { - ss0 = explicit_slot 0 - -block0(v0: i64): - jump block1 - -block1: -; _0 = const 42u8 - v1 = iconst.i8 42 - store v1, v0 -; -; return - return -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-imul-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-imul-i8.clif deleted file mode 100644 index b1f5b12095..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-imul-i8.clif +++ /dev/null @@ -1,11 +0,0 @@ -test compile - -target x86_64 legacy - -function u0:0(i64, i8, i8) system_v { - -block0(v0: i64, v1: i8, v2: i8): - v11 = imul v1, v2 - store v11, v0 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-imul-imm-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-imul-imm-i8.clif deleted file mode 100644 index 4f84d93d0b..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-imul-imm-i8.clif +++ /dev/null @@ -1,15 +0,0 @@ -test compile - -target x86_64 legacy - -function u0:0(i64, i8) system_v { - ss0 = explicit_slot 1 - -block0(v0: i64, v1: i8): - v3 = stack_addr.i64 ss0 - v5 = load.i8 v3 - v6 = iconst.i8 2 - v7 = imul_imm v5, 42 - store v7, v0 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-ineg-x86_64.clif b/cranelift/filetests/filetests/isa/x86/legalize-ineg-x86_64.clif deleted file mode 100644 index a36a2d6ed0..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-ineg-x86_64.clif +++ /dev/null @@ -1,13 +0,0 @@ -; Test the custom legalization of ineg.i64 on x86_64. -test legalizer -target x86_64 legacy - -function %ineg_legalized_i64() { -block0: - v0 = iconst.i64 1 - v1 = ineg v0 - ; check: v0 = iconst.i64 1 - ; nextln: v2 = iconst.i64 0 - ; nextln: v1 = isub v2, v0 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-ireduce-i128.clif b/cranelift/filetests/filetests/isa/x86/legalize-ireduce-i128.clif deleted file mode 100644 index 527710d4fe..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-ireduce-i128.clif +++ /dev/null @@ -1,11 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0(i64, i64) -> i64 { -block0(v0: i64, v1: i64): - v2 = iconcat v0, v1 - v3 = ireduce.i64 v2 - ; check: v3 = copy v0 - ; check: return v3 - return v3 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-ireduce-i64.clif b/cranelift/filetests/filetests/isa/x86/legalize-ireduce-i64.clif deleted file mode 100644 index 3ad3f4c69f..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-ireduce-i64.clif +++ /dev/null @@ -1,11 +0,0 @@ -test compile -target i686 legacy - -function u0:0(i32, i32) -> i32 { -block0(v0: i32, v1: i32): - v2 = iconcat v0, v1 - v3 = ireduce.i32 v2 - ; check: v3 = fill v0 - ; check: return v3 - return v3 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-isplit-backwards.clif b/cranelift/filetests/filetests/isa/x86/legalize-isplit-backwards.clif deleted file mode 100644 index 0d042bf3ff..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-isplit-backwards.clif +++ /dev/null @@ -1,24 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0(i128) -> i64, i64 fast { -; check: block0(v4: i64 [%rdi], v5: i64 [%rsi], v8: i64 [%rbp]): -block0(v0: i128): - jump block2 - -block1: - ; When this `isplit` is legalized, the bnot below is not yet legalized, - ; so there isn't a corresponding `iconcat` yet. We should try legalization - ; for this `isplit` again once all instrucions have been legalized. - v2, v3 = isplit.i128 v1 - ; return v6, v7 - return v2, v3 - -block2: - ; check: v6 = bnot.i64 v4 - ; check: v2 -> v6 - ; check: v7 = bnot.i64 v5 - ; check: v3 -> v7 - v1 = bnot.i128 v0 - jump block1 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-libcall.clif b/cranelift/filetests/filetests/isa/x86/legalize-libcall.clif deleted file mode 100644 index 838a915bf0..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-libcall.clif +++ /dev/null @@ -1,15 +0,0 @@ -test legalizer - -; Pre-SSE 4.1, we need to use runtime library calls for floating point rounding operations. -set is_pic -target x86_64 legacy - -function %floor(f32) -> f32 { -block0(v0: f32): - v1 = floor v0 - return v1 -} -; check: function %floor(f32 [%xmm0]) -> f32 [%xmm0] fast { -; check: sig0 = (f32 [%xmm0]) -> f32 [%xmm0] system_v -; check: fn0 = %FloorF32 sig0 -; check: v1 = call fn0(v0) diff --git a/cranelift/filetests/filetests/isa/x86/legalize-load-store-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-load-store-i8.clif deleted file mode 100644 index 4cbf3e088e..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-load-store-i8.clif +++ /dev/null @@ -1,31 +0,0 @@ -test compile - -target x86_64 legacy - -function u0:0(i64, i8, i8) system_v { - ss0 = explicit_slot 0 - ss1 = explicit_slot 1 - ss2 = explicit_slot 1 - ss3 = explicit_slot 1 - ss4 = explicit_slot 1 - -block0(v0: i64, v1: i8, v2: i8): - v3 = stack_addr.i64 ss1 - store v1, v3 - v4 = stack_addr.i64 ss2 - store v2, v4 - v5 = stack_addr.i64 ss3 - v6 = stack_addr.i64 ss4 - jump block1 - -block1: - v7 = load.i8 v3 - store v7, v5 - v8 = load.i8 v4 - store v8, v6 - v9 = load.i8 v5 - v10 = load.i8 v6 - v11 = imul v9, v10 - store v11, v0 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-memory.clif b/cranelift/filetests/filetests/isa/x86/legalize-memory.clif deleted file mode 100644 index 11a0f1d20f..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-memory.clif +++ /dev/null @@ -1,115 +0,0 @@ -; Test the legalization of memory objects. -test legalizer -set enable_heap_access_spectre_mitigation=false -target x86_64 legacy - -; regex: V=v\d+ -; regex: BB=block\d+ - -function %vmctx(i64 vmctx) -> i64 { - gv0 = vmctx - gv1 = iadd_imm.i64 gv0, -16 - -block1(v1: i64): - v2 = global_value.i64 gv1 - ; check: v2 = iadd_imm v1, -16 - return v2 - ; check: return v2 -} - -function %load(i64 vmctx) -> i64 { - gv0 = vmctx - gv1 = load.i64 notrap aligned gv0-16 - gv2 = iadd_imm.i64 gv1, 32 - -block1(v1: i64): - v2 = global_value.i64 gv2 - ; check: $(p1=$V) = load.i64 notrap aligned v1-16 - ; check: v2 = iadd_imm $p1, 32 - return v2 - ; check: return v2 -} - -function %symbol() -> i64 { - gv0 = symbol %something - gv1 = symbol u123:456 - -block1: - v0 = global_value.i64 gv0 - ; check: v0 = symbol_value.i64 gv0 - v1 = global_value.i64 gv1 - ; check: v1 = symbol_value.i64 gv1 - v2 = bxor v0, v1 - return v2 -} - -; SpiderMonkey VM-style static 4+2 GB heap. -; This eliminates bounds checks completely for offsets < 2GB. -function %staticheap_sm64(i32, i64 vmctx) -> f32 baldrdash_system_v { - gv0 = vmctx - gv1 = iadd_imm.i64 gv0, 64 - heap0 = static gv1, min 0x1000, bound 0x1_0000_0000, offset_guard 0x8000_0000 - -block0(v0: i32, v999: i64): - ; check: block0( - v1 = heap_addr.i64 heap0, v0, 1 - ; Boundscheck should be eliminated. - ; Checks here are assuming that no pipehole opts fold the load offsets. - ; nextln: $(xoff=$V) = uextend.i64 v0 - ; check: $(hbase=$V) = iadd_imm v999, 64 - ; nextln: v1 = iadd $hbase, $xoff - v2 = load.f32 v1+16 - ; nextln: v2 = load.f32 v1+16 - v3 = load.f32 v1+20 - ; nextln: v3 = load.f32 v1+20 - v4 = fadd v2, v3 - return v4 -} - -function %staticheap_static_oob_sm64(i32, i64 vmctx) -> f32 baldrdash_system_v { - gv0 = vmctx - gv1 = iadd_imm.i64 gv0, 64 - heap0 = static gv1, min 0x1000, bound 0x1000_0000, offset_guard 0x8000_0000 - -block0(v0: i32, v999: i64): - ; Everything after the obviously OOB access should be eliminated, leaving - ; the `trap heap_oob` instruction as the terminator of the block and moving - ; the remainder of the instructions into an inaccessible block. - ; check: block0( - ; nextln: trap heap_oob - ; check: block1: - ; nextln: v1 = iconst.i64 0 - ; nextln: v2 = load.f32 v1+16 - ; nextln: return v2 - ; nextln: } - v1 = heap_addr.i64 heap0, v0, 0x1000_0001 - v2 = load.f32 v1+16 - return v2 -} - - -; SpiderMonkey VM-style static 4+2 GB heap. -; Offsets >= 2 GB do require a boundscheck. -function %staticheap_sm64(i32, i64 vmctx) -> f32 baldrdash_system_v { - gv0 = vmctx - gv1 = iadd_imm.i64 gv0, 64 - heap0 = static gv1, min 0x1000, bound 0x1_0000_0000, offset_guard 0x8000_0000 - -block0(v0: i32, v999: i64): - ; check: block0( - v1 = heap_addr.i64 heap0, v0, 0x8000_0000 - ; Boundscheck code - ; check: $(xoff=$V) = uextend.i64 v0 - ; check: $(oob=$V) = icmp - ; nextln: brz $oob, $(ok=$BB) - ; nextln: jump $(trap_oob=$BB) - ; check: $trap_oob: - ; nextln: trap heap_oob - ; check: $ok: - ; Checks here are assuming that no pipehole opts fold the load offsets. - ; check: $(hbase=$V) = iadd_imm.i64 v999, 64 - ; nextln: v1 = iadd $hbase, $xoff - v2 = load.f32 v1+0x7fff_ffff - ; nextln: v2 = load.f32 v1+0x7fff_ffff - return v2 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-mulhi.clif b/cranelift/filetests/filetests/isa/x86/legalize-mulhi.clif deleted file mode 100644 index 179ef824f3..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-mulhi.clif +++ /dev/null @@ -1,43 +0,0 @@ -test compile -target x86_64 legacy baseline - -; umulhi/smulhi on 64 bit operands - -function %i64_umulhi(i64, i64) -> i64 { -block0(v10: i64, v11: i64): - v12 = umulhi v10, v11 - ; check: %rdi -> %rax - ; check: x86_umulx - ; check: %rdx -> %rax - return v12 -} - -function %i64_smulhi(i64, i64) -> i64 { -block0(v20: i64, v21: i64): - v22 = smulhi v20, v21 - ; check: %rdi -> %rax - ; check: x86_smulx - ; check: %rdx -> %rax - return v22 -} - - -; umulhi/smulhi on 32 bit operands - -function %i32_umulhi(i32, i32) -> i32 { -block0(v30: i32, v31: i32): - v32 = umulhi v30, v31 - ; check: %rdi -> %rax - ; check: x86_umulx - ; check: %rdx -> %rax - return v32 -} - -function %i32_smulhi(i32, i32) -> i32 { -block0(v40: i32, v41: i32): - v42 = smulhi v40, v41 - ; check: %rdi -> %rax - ; check: x86_smulx - ; check: %rdx -> %rax - return v42 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-popcnt-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-popcnt-i8.clif deleted file mode 100644 index fb9c4f49b8..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-popcnt-i8.clif +++ /dev/null @@ -1,9 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0(i8) -> i8 fast { -block0(v0: i8): - v1 = popcnt v0 - ; check-not: sextend.i32 v0 - return v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-regmove-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-regmove-i8.clif deleted file mode 100644 index f770ba5643..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-regmove-i8.clif +++ /dev/null @@ -1,36 +0,0 @@ -test compile - -target x86_64 legacy - -function u0:0(i64, i64, i64) system_v { - ss0 = explicit_slot 0 - ss1 = explicit_slot 8 - ss2 = explicit_slot 8 - ss3 = explicit_slot 2 - ss4 = explicit_slot 8 - sig0 = (i64, i16, i64) system_v - fn0 = colocated u0:11 sig0 - -block0(v0: i64, v1: i64, v2: i64): - v3 = stack_addr.i64 ss1 - store v1, v3 - v4 = stack_addr.i64 ss2 - store v2, v4 - v5 = stack_addr.i64 ss3 - v6 = stack_addr.i64 ss4 - jump block1 - -block1: - v7 = load.i64 v3 - v8 = load.i16 v7 - store v8, v5 - v9 = load.i64 v4 - store v9, v6 - v10 = load.i16 v5 - v11 = load.i64 v6 - call fn0(v0, v10, v11) - jump block2 - -block2: - return -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-rotate.clif b/cranelift/filetests/filetests/isa/x86/legalize-rotate.clif deleted file mode 100644 index e058602615..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-rotate.clif +++ /dev/null @@ -1,35 +0,0 @@ -test compile -target x86_64 legacy - -; regex: V=v\d+ -; regex: R=%[a-z0-9]+ - -function %i32_rotr(i32, i32) -> i32 fast { -block0(v0: i32, v1: i32): - ; check: regmove v1, $R -> %rcx - ; check: v2 = rotr v0, v1 - v2 = rotr v0, v1 - return v2 -} - -function %i32_rotr_imm_1(i32) -> i32 fast { -block0(v0: i32): - ; check: $V = rotr_imm v0, 1 - v2 = rotr_imm v0, 1 - return v2 -} - -function %i32_rotl(i32, i32) -> i32 fast { -block0(v0: i32, v1: i32): - ; check: regmove v1, $R -> %rcx - ; check: v2 = rotl v0, v1 - v2 = rotl v0, v1 - return v2 -} - -function %i32_rotl_imm_1(i32) -> i32 fast { -block0(v0: i32): - ; check: $V = rotl_imm v0, 1 - v2 = rotl_imm v0, 1 - return v2 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-shlr-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-shlr-i8.clif deleted file mode 100644 index 9759a8b155..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-shlr-i8.clif +++ /dev/null @@ -1,24 +0,0 @@ -test compile -target x86_64 legacy - -; regex: V=v\d+ - -function u0:0(i8, i8) -> i8 fast { -block0(v0: i8, v1: i8): - v2 = ishl v0, v1 - ; check: $(e1=$V) = uextend.i32 v0 - ; check: $(r1=$V) = ishl $e1, v1 - ; check v2 = ireduce.i8 $r1 - v3 = ushr v0, v1 - ; check: $(e2=$V) = uextend.i32 v0 - ; check: $(r2=$V) = ushr $e2, v1 - ; check v2 = ireduce.i8 $r2 - v4 = sshr v0, v1 - ; check: $(e3=$V) = sextend.i32 v0 - ; check: $(r3=$V) = sshr $e3, v1 - ; check v2 = ireduce.i8 $r3 - - v5 = iadd v2, v3 - v6 = iadd v4, v5 - return v6 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-tables.clif b/cranelift/filetests/filetests/isa/x86/legalize-tables.clif deleted file mode 100644 index 10912afe76..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-tables.clif +++ /dev/null @@ -1,73 +0,0 @@ -test legalizer -target x86_64 legacy - -; Test legalization for various forms of table addresses. -; regex: BB=block\d+ - -function %table_addrs(i32, i64, i64 vmctx) { - gv4 = vmctx - gv0 = iadd_imm.i64 gv4, 72 - gv1 = iadd_imm.i64 gv4, 80 - gv2 = load.i32 notrap aligned gv4+88 - - table0 = dynamic gv0, min 0x1_0000, bound gv2, element_size 1, index_type i32 - table1 = dynamic gv0, bound gv2, element_size 16, index_type i32 - table2 = dynamic gv0, min 0x1_0000, bound gv1, element_size 1, index_type i64 - table3 = dynamic gv0, bound gv1, element_size 16, index_type i64 - - ; check: table0 = dynamic gv0, min 0x0001_0000, bound gv2, element_size 1, index_type i32 - ; check: table1 = dynamic gv0, min 0, bound gv2, element_size 16, index_type i32 - ; check: table2 = dynamic gv0, min 0x0001_0000, bound gv1, element_size 1, index_type i64 - ; check: table3 = dynamic gv0, min 0, bound gv1, element_size 16, index_type i64 - -block0(v0: i32, v1: i64, v3: i64): - v4 = table_addr.i64 table0, v0, +0 - ; check: v8 = load.i32 notrap aligned v3+88 - ; check: v9 = icmp uge v0, v8 - ; check: brz v9, $(resume_1=$BB) - ; nextln: jump $(trap_1=$BB) - ; check: $trap_1: - ; nextln: trap table_oob - ; check: $resume_1: - ; check: v10 = uextend.i64 v0 - ; check: v11 = iadd_imm.i64 v3, 72 - ; check: v4 = iadd v11, v10 - - v5 = table_addr.i64 table1, v0, +0 - ; check: v12 = load.i32 notrap aligned v3+88 - ; check: v13 = icmp.i32 uge v0, v12 - ; check: brz v13, $(resume_2=$BB) - ; nextln: jump $(trap_2=$BB) - ; check: $trap_2: - ; nextln: trap table_oob - ; check: $resume_2: - ; check: v14 = uextend.i64 v0 - ; check: v15 = iadd_imm.i64 v3, 72 - ; check: v16 = ishl_imm v14, 4 - ; check: v5 = iadd v15, v16 - - v6 = table_addr.i64 table2, v1, +0 - ; check: v17 = iadd_imm.i64 v3, 80 - ; check: v18 = icmp.i64 uge v1, v17 - ; check: brz v18, $(resume_3=$BB) - ; nextln: jump $(trap_3=$BB) - ; check: $trap_3: - ; nextln: trap table_oob - ; check: $resume_3: - ; check: v19 = iadd_imm.i64 v3, 72 - ; check: v6 = iadd v19, v1 - - v7 = table_addr.i64 table3, v1, +0 - ; check: v20 = iadd_imm.i64 v3, 80 - ; check: v21 = icmp.i64 uge v1, v20 - ; check: brz v21, $(resume_4=$BB) - ; nextln: jump $(trap_4=$BB) - ; check: $trap_4: - ; nextln: trap table_oob - ; check: $resume_4: - ; check: v22 = iadd_imm.i64 v3, 72 - ; check: v23 = ishl_imm.i64 v1, 4 - ; check: v7 = iadd v22, v23 - - return -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-urem-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-urem-i8.clif deleted file mode 100644 index 7be308308c..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-urem-i8.clif +++ /dev/null @@ -1,15 +0,0 @@ -test compile -target x86_64 legacy - -; regex: V=v\d+ - -function u0:0(i8, i8) -> i8 fast { -block0(v0: i8, v1: i8): - v2 = urem v0, v1 - ; check: $(a=$V) = uextend.i32 v0 - ; nextln: $(b=$V) = uextend.i32 v1 - ; nextln: $(c=$V) = iconst.i32 0 - ; nextln: $(V), $(r=$V) = x86_udivmodx $a, $c, $b - ; nextln: v2 = ireduce.i8 $r - return v2 -} diff --git a/cranelift/filetests/filetests/isa/x86/legalize-x86_32-shifts.clif b/cranelift/filetests/filetests/isa/x86/legalize-x86_32-shifts.clif deleted file mode 100644 index ff5d11a4d7..0000000000 --- a/cranelift/filetests/filetests/isa/x86/legalize-x86_32-shifts.clif +++ /dev/null @@ -1,51 +0,0 @@ -test compile -set enable_simd -target i686 legacy haswell - -function u0:1(i32) -> i64 system_v { - block1(v0: i32): - v1 = load.i64 notrap aligned v0+0 - v2 = load.i32 notrap aligned v0+16 - v3 = ishl v1, v2 - return v3 -} - -function u0:2(i32) -> i64 system_v { - block1(v0: i32): - v1 = load.i64 notrap aligned v0+0 - v2 = load.i64 notrap aligned v0+16 - v3 = ishl v1, v2 - return v3 -} - -function u0:3(i32) -> i32 system_v { - block1(v0: i32): - v1 = load.i32 notrap aligned v0+0 - v2 = load.i64 notrap aligned v0+16 - v3 = ishl v1, v2 - return v3 -} - -function u0:4(i32) -> i64 system_v { - block1(v0: i32): - v1 = load.i64 notrap aligned v0+0 - v2 = load.i32 notrap aligned v0+16 - v3 = ushr v1, v2 - return v3 -} - -function u0:5(i32) -> i64 system_v { - block1(v0: i32): - v1 = load.i64 notrap aligned v0+0 - v2 = load.i64 notrap aligned v0+16 - v3 = ushr v1, v2 - return v3 -} - -function u0:6(i32) -> i32 system_v { - block1(v0: i32): - v1 = load.i32 notrap aligned v0+0 - v2 = load.i64 notrap aligned v0+16 - v3 = ushr v1, v2 - return v3 -} diff --git a/cranelift/filetests/filetests/isa/x86/load-store-narrow.clif b/cranelift/filetests/filetests/isa/x86/load-store-narrow.clif deleted file mode 100644 index 4e0af65c9f..0000000000 --- a/cranelift/filetests/filetests/isa/x86/load-store-narrow.clif +++ /dev/null @@ -1,16 +0,0 @@ -test compile -target i686 legacy - -function u0:0(i64, i32) system_v { -block0(v0: i64, v1: i32): - v2 = bor v0, v0 - store v2, v1 - return -} - -function u0:1(i32) -> i64 system_v { -block0(v1: i32): - v0 = load.i64 v1 - v2 = bor v0, v0 - return v2 -} diff --git a/cranelift/filetests/filetests/isa/x86/nop.clif b/cranelift/filetests/filetests/isa/x86/nop.clif deleted file mode 100644 index cafa90eb4f..0000000000 --- a/cranelift/filetests/filetests/isa/x86/nop.clif +++ /dev/null @@ -1,10 +0,0 @@ -test compile - -target x86_64 legacy - -function %test(i32) -> i32 system_v { -block0(v0: i32): - nop - v1 = iconst.i32 42 - return v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/optimized-zero-constants-32bit.clif b/cranelift/filetests/filetests/isa/x86/optimized-zero-constants-32bit.clif deleted file mode 100644 index b5a9658b67..0000000000 --- a/cranelift/filetests/filetests/isa/x86/optimized-zero-constants-32bit.clif +++ /dev/null @@ -1,52 +0,0 @@ -; Check that floating-point and integer constants equal to zero are optimized correctly. -test binemit -set opt_level=speed_and_size -target i686 legacy - -function %foo() -> f32 fast { -block0: - ; asm: xorps %xmm0, %xmm0 - [-,%xmm0] v0 = f32const 0.0 ; bin: 0f 57 c0 - return v0 -} - -function %bar() -> f64 fast { -block0: - ; asm: xorpd %xmm0, %xmm0 - [-,%xmm0] v1 = f64const 0.0 ; bin: 66 0f 57 c0 - return v1 -} - -function %zero_dword() -> i32 fast { -block0: - ; asm: xor %eax, %eax - [-,%rax] v0 = iconst.i32 0 ; bin: 31 c0 - ; asm: xor %edi, %edi - [-,%rdi] v1 = iconst.i32 0 ; bin: 31 ff - return v0 -} - -function %zero_word() -> i16 fast { -block0: - ; while you may expect this to be encoded like 6631c0, aka - ; xor %ax, %ax, the upper 16 bits of the register used for - ; i16 are left undefined, so it's not wrong to clear them. - ; - ; discarding the 66 prefix is shorter, so this test expects - ; that we do so. - ; - ; asm: xor %eax, %eax - [-,%rax] v0 = iconst.i16 0 ; bin: 31 c0 - ; asm: xor %edi, %edi - [-,%rdi] v1 = iconst.i16 0 ; bin: 31 ff - return v0 -} - -function %zero_byte() -> i8 fast { -block0: - ; asm: xor %eax, %eax - [-,%rax] v0 = iconst.i8 0 ; bin: 31 c0 - ; asm: xor %edi, %edi - [-,%rdi] v1 = iconst.i8 0 ; bin: 31 ff - return v0 -} diff --git a/cranelift/filetests/filetests/isa/x86/optimized-zero-constants.clif b/cranelift/filetests/filetests/isa/x86/optimized-zero-constants.clif deleted file mode 100644 index 8e469b8b7a..0000000000 --- a/cranelift/filetests/filetests/isa/x86/optimized-zero-constants.clif +++ /dev/null @@ -1,72 +0,0 @@ -; Check that floating-point constants equal to zero are optimized correctly. -test binemit -set opt_level=speed_and_size -target x86_64 legacy - -function %zero_const_32bit_no_rex() -> f32 fast { -block0: - ; asm: xorps %xmm0, %xmm0 - [-,%xmm0] v0 = f32const 0.0 ; bin: 0f 57 c0 - return v0 -} - -function %zero_const_32bit_rex() -> f32 fast { -block0: - ; asm: xorps %xmm8, %xmm8 - [-,%xmm8] v1 = f32const 0.0 ; bin: 45 0f 57 c0 - return v1 -} - -function %zero_const_64bit_no_rex() -> f64 fast { -block0: - ; asm: xorpd %xmm0, %xmm0 - [-,%xmm0] v0 = f64const 0.0 ; bin: 66 0f 57 c0 - return v0 -} - -function %zero_const_64bit_rex() -> f64 fast { -block0: - ; asm: xorpd %xmm8, %xmm8 - [-,%xmm8] v1 = f64const 0.0 ; bin: 66 45 0f 57 c0 - return v1 -} - -function %imm_zero_register() -> i64 fast { -block0: - ; asm: xor %eax, %eax - [-,%rax] v0 = iconst.i64 0 ; bin: 31 c0 - ; asm: xor %edi, %edi - [-,%rdi] v1 = iconst.i64 0 ; bin: 31 ff - ; asm: xor %r8, r8 - [-,%r8] v2 = iconst.i64 0 ; bin: 45 31 c0 - ; asm: xor %r15, %r15 - [-,%r15] v4 = iconst.i64 0 ; bin: 45 31 ff - return v0 -} - -function %zero_word() -> i16 fast { -block0: - ; while you may expect this to be encoded like 6631c0, aka - ; xor %ax, %ax, the upper 16 bits of the register used for - ; i16 are left undefined, so it's not wrong to clear them. - ; - ; discarding the 66 prefix is shorter, so this test expects - ; that we do so. - ; - ; asm: xor %eax, %eax - [-,%rax] v0 = iconst.i16 0 ; bin: 31 c0 - ; asm: xor %edi, %edi - [-,%rdi] v1 = iconst.i16 0 ; bin: 31 ff - return v0 -} - -function %zero_byte() -> i8 fast { -block0: - ; asm: xor %r8d, %r8d - [-,%r15] v0 = iconst.i8 0 ; bin: 45 31 ff - ; asm: xor %eax, eax - [-,%rax] v1 = iconst.i8 0 ; bin: 31 c0 - ; asm: xor %edi, %edi - [-,%rdi] v2 = iconst.i8 0 ; bin: 31 ff - return v0 -} diff --git a/cranelift/filetests/filetests/isa/x86/pinned-reg.clif b/cranelift/filetests/filetests/isa/x86/pinned-reg.clif deleted file mode 100644 index b9bc230c33..0000000000 --- a/cranelift/filetests/filetests/isa/x86/pinned-reg.clif +++ /dev/null @@ -1,74 +0,0 @@ -test compile - -set enable_pinned_reg=true -set use_pinned_reg_as_heap_base=true -set opt_level=speed_and_size - -target x86_64 legacy - -; regex: V=v\d+ - -; r15 is the pinned heap register. It must not be rewritten, so it must not be -; used as a tied output register. -function %tied_input() -> i64 system_v { -block0: - v1 = get_pinned_reg.i64 - v2 = iadd_imm v1, 42 - return v2 -} - -; check: ,%r15] -; sameln: v1 = get_pinned_reg.i64 -; nextln: regmove v1, %r15 -> %rax -; nextln: ,%rax] -; sameln: iadd_imm v1, 42 - -;; It musn't be used even if this is a tied input used twice. -function %tied_twice() -> i64 system_v { -block0: - v1 = get_pinned_reg.i64 - v2 = iadd v1, v1 - return v2 -} - -; check: ,%r15] -; sameln: v1 = get_pinned_reg.i64 -; nextln: regmove v1, %r15 -> %rax -; nextln: ,%rax] -; sameln: iadd v1, v1 - -function %uses() -> i64 system_v { -block0: - v1 = get_pinned_reg.i64 - v2 = iadd_imm v1, 42 - v3 = get_pinned_reg.i64 - v4 = iadd v2, v3 - return v4 -} - -; check: ,%r15] -; sameln: v1 = get_pinned_reg.i64 -; nextln: regmove v1, %r15 -> %rax -; nextln: ,%rax] -; sameln: iadd_imm v1, 42 -; nextln: ,%r15 -; sameln: v3 = get_pinned_reg.i64 -; nextln: ,%rax] -; sameln: iadd v2, v3 - -; When the pinned register is used as the heap base, the final load instruction -; must use the %r15 register, since x86 implements the complex addressing mode. -function u0:1(i64 vmctx) -> i64 system_v { - gv0 = vmctx - heap0 = static gv0, min 0x000a_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i32 - -block0(v42: i64): - v5 = iconst.i32 42 - v6 = heap_addr.i64 heap0, v5, 0 - v7 = load.i64 v6 - return v7 -} - -; check: ,%r15] -; sameln: $(heap_base=$V) = get_pinned_reg.i64 -; nextln: load_complex.i64 $heap_base+ diff --git a/cranelift/filetests/filetests/isa/x86/probestack-adjusts-sp.clif b/cranelift/filetests/filetests/isa/x86/probestack-adjusts-sp.clif deleted file mode 100644 index 4b4a05244c..0000000000 --- a/cranelift/filetests/filetests/isa/x86/probestack-adjusts-sp.clif +++ /dev/null @@ -1,28 +0,0 @@ -test compile -set use_colocated_libcalls=1 -set probestack_func_adjusts_sp=1 -target x86_64 legacy - -; Like %big in probestack.clif, but with the probestack function adjusting -; the stack pointer itself. - -function %big() system_v { - ss0 = explicit_slot 300000 -block0: - return -} -; check: function %big(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { -; nextln: ss0 = explicit_slot 300000, offset -300016 -; nextln: ss1 = incoming_arg 16, offset -16 -; nextln: sig0 = (i64 [%rax]) probestack -; nextln: fn0 = colocated %Probestack sig0 -; nextln: -; nextln: block0(v0: i64 [%rbp]): -; nextln: [RexOp1pushq#50] x86_push v0 -; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp -; nextln: [RexOp1pu_id#b8,%rax] v1 = iconst.i64 0x0004_93e0 -; nextln: [Op1call_id#e8] call fn0(v1) -; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 0x0004_93e0 -; nextln: [RexOp1popq#58,%rbp] v2 = x86_pop.i64 -; nextln: [Op1ret#c3] return v2 -; nextln: } diff --git a/cranelift/filetests/filetests/isa/x86/probestack-disabled.clif b/cranelift/filetests/filetests/isa/x86/probestack-disabled.clif deleted file mode 100644 index 6b9b4f3342..0000000000 --- a/cranelift/filetests/filetests/isa/x86/probestack-disabled.clif +++ /dev/null @@ -1,24 +0,0 @@ -test compile -set use_colocated_libcalls=1 -set enable_probestack=0 -target x86_64 legacy - -; Like %big in probestack.clif, but with probes disabled. - -function %big() system_v { - ss0 = explicit_slot 300000 -block0: - return -} -; check: function %big(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { -; nextln: ss0 = explicit_slot 300000, offset -300016 -; nextln: ss1 = incoming_arg 16, offset -16 -; nextln: -; nextln: block0(v0: i64 [%rbp]): -; nextln: [RexOp1pushq#50] x86_push v0 -; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp -; nextln: [RexOp1adjustsp_id#d081] adjust_sp_down_imm 0x0004_93e0 -; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 0x0004_93e0 -; nextln: [RexOp1popq#58,%rbp] v1 = x86_pop.i64 -; nextln: [Op1ret#c3] return v1 -; nextln: } diff --git a/cranelift/filetests/filetests/isa/x86/probestack-noncolocated.clif b/cranelift/filetests/filetests/isa/x86/probestack-noncolocated.clif deleted file mode 100644 index 2837ddd0c9..0000000000 --- a/cranelift/filetests/filetests/isa/x86/probestack-noncolocated.clif +++ /dev/null @@ -1,27 +0,0 @@ -test compile -target x86_64 legacy - -; Like %big in probestack.clif, but without a colocated libcall. - -function %big() system_v { - ss0 = explicit_slot 300000 -block0: - return -} -; check: function %big(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { -; nextln: ss0 = explicit_slot 300000, offset -300016 -; nextln: ss1 = incoming_arg 16, offset -16 -; nextln: sig0 = (i64 [%rax]) -> i64 [%rax] probestack -; nextln: fn0 = %Probestack sig0 -; nextln: -; nextln: block0(v0: i64 [%rbp]): -; nextln: [RexOp1pushq#50] x86_push v0 -; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp -; nextln: [RexOp1pu_id#b8,%rax] v1 = iconst.i64 0x0004_93e0 -; nextln: [RexOp1fnaddr8#80b8,%r11] v2 = func_addr.i64 fn0 -; nextln: [RexOp1call_r#20ff,%rax] v3 = call_indirect sig0, v2(v1) -; nextln: [RexOp1adjustsp#8029] adjust_sp_down v3 -; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 0x0004_93e0 -; nextln: [RexOp1popq#58,%rbp] v4 = x86_pop.i64 -; nextln: [Op1ret#c3] return v4 -; nextln: } diff --git a/cranelift/filetests/filetests/isa/x86/probestack-size.clif b/cranelift/filetests/filetests/isa/x86/probestack-size.clif deleted file mode 100644 index efb1900170..0000000000 --- a/cranelift/filetests/filetests/isa/x86/probestack-size.clif +++ /dev/null @@ -1,74 +0,0 @@ -test compile -set use_colocated_libcalls=1 -set probestack_size_log2=13 -target x86_64 legacy - -; Like %big in probestack.clif, but now the probestack size is bigger -; and it no longer needs a probe. - -function %big() system_v { - ss0 = explicit_slot 4097 -block0: - return -} - -; check: function %big(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { -; nextln: ss0 = explicit_slot 4097, offset -4113 -; nextln: ss1 = incoming_arg 16, offset -16 -; nextln: -; nextln: block0(v0: i64 [%rbp]): -; nextln: [RexOp1pushq#50] x86_push v0 -; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp -; nextln: [RexOp1adjustsp_id#d081] adjust_sp_down_imm 4112 -; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 4112 -; nextln: [RexOp1popq#58,%rbp] v1 = x86_pop.i64 -; nextln: [Op1ret#c3] return v1 -; nextln: } - - -; Like %big; still doesn't need a probe. - -function %bigger() system_v { - ss0 = explicit_slot 8192 -block0: - return -} - -; check: function %bigger(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { -; nextln: ss0 = explicit_slot 8192, offset -8208 -; nextln: ss1 = incoming_arg 16, offset -16 -; nextln: -; nextln: block0(v0: i64 [%rbp]): -; nextln: [RexOp1pushq#50] x86_push v0 -; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp -; nextln: [RexOp1adjustsp_id#d081] adjust_sp_down_imm 8192 -; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 8192 -; nextln: [RexOp1popq#58,%rbp] v1 = x86_pop.i64 -; nextln: [Op1ret#c3] return v1 -; nextln: } - - -; Like %bigger; this needs a probe. - -function %biggest() system_v { - ss0 = explicit_slot 8193 -block0: - return -} - -; check: function %biggest(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { -; nextln: ss0 = explicit_slot 8193, offset -8209 -; nextln: ss1 = incoming_arg 16, offset -16 -; nextln: sig0 = (i64 [%rax]) -> i64 [%rax] probestack -; nextln: fn0 = colocated %Probestack sig0 -; nextln: -; nextln: block0(v0: i64 [%rbp]): -; nextln: [RexOp1pushq#50] x86_push v0 -; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp -; nextln: [RexOp1pu_id#b8,%rax] v1 = iconst.i64 8208 -; nextln: [Op1call_id#e8,%rax] v2 = call fn0(v1) -; nextln: [RexOp1adjustsp#8029] adjust_sp_down v2 -; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 8208 -; nextln: [RexOp1popq#58,%rbp] v3 = x86_pop.i64 -; nextln: [Op1ret#c3] return v3 -; nextln: } diff --git a/cranelift/filetests/filetests/isa/x86/probestack.clif b/cranelift/filetests/filetests/isa/x86/probestack.clif deleted file mode 100644 index c434cf5f63..0000000000 --- a/cranelift/filetests/filetests/isa/x86/probestack.clif +++ /dev/null @@ -1,49 +0,0 @@ -test compile -set use_colocated_libcalls=1 -target x86_64 legacy - -; A function with a big stack frame. This should have a stack probe. - -function %big() system_v { - ss0 = explicit_slot 4097 -block0: - return -} -; check: function %big(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { -; nextln: ss0 = explicit_slot 4097, offset -4113 -; nextln: ss1 = incoming_arg 16, offset -16 -; nextln: sig0 = (i64 [%rax]) -> i64 [%rax] probestack -; nextln: fn0 = colocated %Probestack sig0 -; nextln: -; nextln: block0(v0: i64 [%rbp]): -; nextln: [RexOp1pushq#50] x86_push v0 -; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp -; nextln: [RexOp1pu_id#b8,%rax] v1 = iconst.i64 4112 -; nextln: [Op1call_id#e8,%rax] v2 = call fn0(v1) -; nextln: [RexOp1adjustsp#8029] adjust_sp_down v2 -; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 4112 -; nextln: [RexOp1popq#58,%rbp] v3 = x86_pop.i64 -; nextln: [Op1ret#c3] return v3 -; nextln: } - - -; A function with a small enough stack frame. This shouldn't have a stack probe. - -function %small() system_v { - ss0 = explicit_slot 4096 -block0: - return -} - -; check: function %small(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { -; nextln: ss0 = explicit_slot 4096, offset -4112 -; nextln: ss1 = incoming_arg 16, offset -16 -; nextln: -; nextln: block0(v0: i64 [%rbp]): -; nextln: [RexOp1pushq#50] x86_push v0 -; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp -; nextln: [RexOp1adjustsp_id#d081] adjust_sp_down_imm 4096 -; nextln: [RexOp1adjustsp_id#8081] adjust_sp_up_imm 4096 -; nextln: [RexOp1popq#58,%rbp] v1 = x86_pop.i64 -; nextln: [Op1ret#c3] return v1 -; nextln: } diff --git a/cranelift/filetests/filetests/isa/x86/prologue-epilogue.clif b/cranelift/filetests/filetests/isa/x86/prologue-epilogue.clif deleted file mode 100644 index 831928186b..0000000000 --- a/cranelift/filetests/filetests/isa/x86/prologue-epilogue.clif +++ /dev/null @@ -1,314 +0,0 @@ -test compile -set opt_level=speed_and_size -set is_pic -set enable_probestack=false -target x86_64 legacy haswell - -; An empty function. - -function %empty() { -block0: - return -} - -; check: function %empty(i64 fp [%rbp]) -> i64 fp [%rbp] fast { -; nextln: ss0 = incoming_arg 16, offset -16 -; nextln: -; nextln: block0(v0: i64 [%rbp]): -; nextln: x86_push v0 -; nextln: copy_special %rsp -> %rbp -; nextln: v1 = x86_pop.i64 -; nextln: return v1 -; nextln: } - -; A function with a single stack slot. - -function %one_stack_slot() { - ss0 = explicit_slot 168 -block0: - return -} - -; check: function %one_stack_slot(i64 fp [%rbp]) -> i64 fp [%rbp] fast { -; nextln: ss0 = explicit_slot 168, offset -184 -; nextln: ss1 = incoming_arg 16, offset -16 -; nextln: -; nextln: block0(v0: i64 [%rbp]): -; nextln: x86_push v0 -; nextln: copy_special %rsp -> %rbp -; nextln: adjust_sp_down_imm 176 -; nextln: adjust_sp_up_imm 176 -; nextln: v1 = x86_pop.i64 -; nextln: return v1 -; nextln: } - -; A function performing a call. - -function %call() { - fn0 = %foo() - -block0: - call fn0() - return -} - -; check: function %call(i64 fp [%rbp]) -> i64 fp [%rbp] fast { -; nextln: ss0 = incoming_arg 16, offset -16 -; nextln: sig0 = () fast -; nextln: fn0 = %foo sig0 -; nextln: -; nextln: block0(v0: i64 [%rbp]): -; nextln: x86_push v0 -; nextln: copy_special %rsp -> %rbp -; nextln: call fn0() -; nextln: v1 = x86_pop.i64 -; nextln: return v1 -; nextln: } - -; A function that uses a lot of registers but doesn't quite need to spill. - -function %no_spill(i64, i64) { -block0(v0: i64, v1: i64): - v2 = load.i32 v0+0 - v3 = load.i32 v0+8 - v4 = load.i32 v0+16 - v5 = load.i32 v0+24 - v6 = load.i32 v0+32 - v7 = load.i32 v0+40 - v8 = load.i32 v0+48 - v9 = load.i32 v0+56 - v10 = load.i32 v0+64 - v11 = load.i32 v0+72 - v12 = load.i32 v0+80 - v13 = load.i32 v0+88 - v14 = load.i32 v0+96 - store.i32 v2, v1+0 - store.i32 v3, v1+8 - store.i32 v4, v1+16 - store.i32 v5, v1+24 - store.i32 v6, v1+32 - store.i32 v7, v1+40 - store.i32 v8, v1+48 - store.i32 v9, v1+56 - store.i32 v10, v1+64 - store.i32 v11, v1+72 - store.i32 v12, v1+80 - store.i32 v13, v1+88 - store.i32 v14, v1+96 - return -} - -; check: function %no_spill(i64 [%rdi], i64 [%rsi], i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15]) -> i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15] fast { -; nextln: ss0 = incoming_arg 56, offset -56 -; nextln: -; nextln: block0(v0: i64 [%rdi], v1: i64 [%rsi], v15: i64 [%rbp], v16: i64 [%rbx], v17: i64 [%r12], v18: i64 [%r13], v19: i64 [%r14], v20: i64 [%r15]): -; nextln: x86_push v15 -; nextln: copy_special %rsp -> %rbp -; nextln: x86_push v16 -; nextln: x86_push v17 -; nextln: x86_push v18 -; nextln: x86_push v19 -; nextln: x86_push v20 -; nextln: v2 = load.i32 v0 -; nextln: v3 = load.i32 v0+8 -; nextln: v4 = load.i32 v0+16 -; nextln: v5 = load.i32 v0+24 -; nextln: v6 = load.i32 v0+32 -; nextln: v7 = load.i32 v0+40 -; nextln: v8 = load.i32 v0+48 -; nextln: v9 = load.i32 v0+56 -; nextln: v10 = load.i32 v0+64 -; nextln: v11 = load.i32 v0+72 -; nextln: v12 = load.i32 v0+80 -; nextln: v13 = load.i32 v0+88 -; nextln: v14 = load.i32 v0+96 -; nextln: store v2, v1 -; nextln: store v3, v1+8 -; nextln: store v4, v1+16 -; nextln: store v5, v1+24 -; nextln: store v6, v1+32 -; nextln: store v7, v1+40 -; nextln: store v8, v1+48 -; nextln: store v9, v1+56 -; nextln: store v10, v1+64 -; nextln: store v11, v1+72 -; nextln: store v12, v1+80 -; nextln: store v13, v1+88 -; nextln: store v14, v1+96 -; nextln: v26 = x86_pop.i64 -; nextln: v25 = x86_pop.i64 -; nextln: v24 = x86_pop.i64 -; nextln: v23 = x86_pop.i64 -; nextln: v22 = x86_pop.i64 -; nextln: v21 = x86_pop.i64 -; nextln: return v21, v22, v23, v24, v25, v26 -; nextln: } - -; This function requires too many registers and must spill. - -function %yes_spill(i64, i64) { -block0(v0: i64, v1: i64): - v2 = load.i32 v0+0 - v3 = load.i32 v0+8 - v4 = load.i32 v0+16 - v5 = load.i32 v0+24 - v6 = load.i32 v0+32 - v7 = load.i32 v0+40 - v8 = load.i32 v0+48 - v9 = load.i32 v0+56 - v10 = load.i32 v0+64 - v11 = load.i32 v0+72 - v12 = load.i32 v0+80 - v13 = load.i32 v0+88 - v14 = load.i32 v0+96 - v15 = load.i32 v0+104 - store.i32 v2, v1+0 - store.i32 v3, v1+8 - store.i32 v4, v1+16 - store.i32 v5, v1+24 - store.i32 v6, v1+32 - store.i32 v7, v1+40 - store.i32 v8, v1+48 - store.i32 v9, v1+56 - store.i32 v10, v1+64 - store.i32 v11, v1+72 - store.i32 v12, v1+80 - store.i32 v13, v1+88 - store.i32 v14, v1+96 - store.i32 v15, v1+104 - return -} - -; check: function %yes_spill(i64 [%rdi], i64 [%rsi], i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15]) -> i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15] fast { -; check: ss0 = spill_slot - -; check: block0(v16: i64 [%rdi], v17: i64 [%rsi], v48: i64 [%rbp], v49: i64 [%rbx], v50: i64 [%r12], v51: i64 [%r13], v52: i64 [%r14], v53: i64 [%r15]): -; nextln: x86_push v48 -; nextln: copy_special %rsp -> %rbp -; nextln: x86_push v49 -; nextln: x86_push v50 -; nextln: x86_push v51 -; nextln: x86_push v52 -; nextln: x86_push v53 -; nextln: adjust_sp_down_imm - -; check: spill - -; check: fill - -; check: adjust_sp_up_imm -; nextln: v59 = x86_pop.i64 -; nextln: v58 = x86_pop.i64 -; nextln: v57 = x86_pop.i64 -; nextln: v56 = x86_pop.i64 -; nextln: v55 = x86_pop.i64 -; nextln: v54 = x86_pop.i64 -; nextln: return v54, v55, v56, v57, v58, v59 -; nextln: } - -; A function which uses diverted registers. - -function %divert(i32) -> i32 system_v { -block0(v0: i32): - v2 = iconst.i32 0 - v3 = iconst.i32 1 - jump block1(v0, v3, v2) - -block1(v4: i32, v5: i32, v6: i32): - brz v4, block3 - jump block2 - -block2: - v7 = iadd v5, v6 - v8 = iadd_imm v4, -1 - jump block1(v8, v7, v5) - -block3: - return v5 -} - -; check: function %divert -; check: regmove.i32 v5, %rcx -> %rbx -; check: [Op1popq#58,%rbx] v15 = x86_pop.i64 - -; Stack limit checking - -function %stack_limit(i64 stack_limit) { - ss0 = explicit_slot 168 -block0(v0: i64): - return -} - -; check: function %stack_limit(i64 stack_limit [%rdi], i64 fp [%rbp]) -> i64 fp [%rbp] fast { -; nextln: ss0 = explicit_slot 168, offset -184 -; nextln: ss1 = incoming_arg 16, offset -16 -; nextln: -; nextln: block0(v0: i64 [%rdi], v4: i64 [%rbp]): -; nextln: v1 = copy v0 -; nextln: v2 = iadd_imm v1, 176 -; nextln: v3 = ifcmp_sp v2 -; nextln: trapif uge v3, stk_ovf -; nextln: x86_push v4 -; nextln: copy_special %rsp -> %rbp -; nextln: adjust_sp_down_imm 176 -; nextln: adjust_sp_up_imm 176 -; nextln: v5 = x86_pop.i64 -; nextln: return v5 -; nextln: } - -function %big_stack_limit(i64 stack_limit) { - ss0 = explicit_slot 40000 -block0(v0: i64): - return -} - -; check: function %big_stack_limit(i64 stack_limit [%rdi], i64 fp [%rbp]) -> i64 fp [%rbp] fast { -; nextln: ss0 = explicit_slot 40000, offset -40016 -; nextln: ss1 = incoming_arg 16, offset -16 -; nextln: -; nextln: block0(v0: i64 [%rdi], v5: i64 [%rbp]): -; nextln: v1 = copy v0 -; nextln: v2 = ifcmp_sp v1 -; nextln: trapif uge v2, stk_ovf -; nextln: v3 = iadd_imm v1, 0x9c40 -; nextln: v4 = ifcmp_sp v3 -; nextln: trapif uge v4, stk_ovf -; nextln: x86_push v5 -; nextln: copy_special %rsp -> %rbp -; nextln: adjust_sp_down_imm 0x9c40 -; nextln: adjust_sp_up_imm 0x9c40 -; nextln: v6 = x86_pop.i64 -; nextln: return v6 -; nextln: } - -function %limit_preamble(i64 vmctx) { - gv0 = vmctx - gv1 = load.i64 notrap aligned gv0 - gv2 = load.i64 notrap aligned gv1+4 - stack_limit = gv2 - ss0 = explicit_slot 20 -block0(v0: i64): - return -} - -; check: function %limit_preamble(i64 vmctx [%rdi], i64 fp [%rbp]) -> i64 fp [%rbp] fast { -; nextln: ss0 = explicit_slot 20, offset -36 -; nextln: ss1 = incoming_arg 16, offset -16 -; nextln: gv0 = vmctx -; nextln: gv1 = load.i64 notrap aligned gv0 -; nextln: gv2 = load.i64 notrap aligned gv1+4 -; nextln: stack_limit = gv2 -; nextln: -; nextln: block0(v0: i64 [%rdi], v5: i64 [%rbp]): -; nextln: v1 = load.i64 notrap aligned v0 -; nextln: v2 = load.i64 notrap aligned v1+4 -; nextln: v3 = iadd_imm v2, 32 -; nextln: v4 = ifcmp_sp v3 -; nextln: trapif uge v4, stk_ovf -; nextln: x86_push v5 -; nextln: copy_special %rsp -> %rbp -; nextln: adjust_sp_down_imm 32 -; nextln: adjust_sp_up_imm 32 -; nextln: v6 = x86_pop.i64 -; nextln: return v6 -; nextln: } diff --git a/cranelift/filetests/filetests/isa/x86/relax_branch.clif b/cranelift/filetests/filetests/isa/x86/relax_branch.clif deleted file mode 100644 index 0e123f8a36..0000000000 --- a/cranelift/filetests/filetests/isa/x86/relax_branch.clif +++ /dev/null @@ -1,132 +0,0 @@ -test binemit -set opt_level=speed_and_size -set avoid_div_traps -set baldrdash_prologue_words=3 -set emit_all_ones_funcaddrs -set enable_probestack=false -target x86_64 legacy haswell - -; This checks that a branch that is too far away is getting relaxed. In -; particular, the first block has to be non-empty but its encoding size must be -; zero (i.e. not generate any code). See also issue #666 for more details. - -function u0:2691(i32 [%rdi], i32 [%rsi], i64 vmctx [%r14]) -> i64 uext [%rax] baldrdash_system_v { - ss0 = incoming_arg 24, offset -24 - gv0 = vmctx - gv1 = iadd_imm.i64 gv0, 48 - gv2 = load.i64 notrap aligned readonly gv0 - heap0 = static gv2, min 0xd839_6000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i32 - - block0(v0: i32 [%rdi], v1: i32 [%rsi], v2: i64 [%r14]): -@0005 [-] fallthrough block3(v0, v1) - - block3(v8: i32 [%rdi], v19: i32 [%rsi]): -@0005 [RexOp1ldDisp8#808b,%rax] v7 = load.i64 v2+48 -@0005 [RexOp1rcmp_ib#f083,%rflags] v91 = ifcmp_imm v7, 0 -@0005 [trapif#00] trapif ne v91, interrupt -[DynRexOp1umr#89,%rax] v105 = copy v8 -@000b [DynRexOp1r_ib#83,%rax] v10 = iadd_imm v105, 1 - v80 -> v10 -@0010 [Op1umr#89,%rcx] v92 = uextend.i64 v8 -@0010 [RexOp1ld#808b,%rdx] v93 = load.i64 notrap aligned readonly v2 - v95 -> v93 -@0010 [Op2ldWithIndex#4be,%rcx] v12 = sload8_complex.i32 v93+v92 -[DynRexOp1umr#89,%rbx] v106 = copy v12 -@0017 [DynRexOp1r_ib#40c1,%rbx] v14 = ishl_imm v106, 24 -@001a [DynRexOp1r_ib#70c1,%rbx] v16 = sshr_imm v14, 24 -[DynRexOp1umr#89,%rdi] v107 = copy v16 -@001f [DynRexOp1r_ib#83,%rdi] v18 = iadd_imm v107, 32 -[DynRexOp1umr#89,%r8] v108 = copy v19 -@0026 [DynRexOp1r_ib#83,%r8] v21 = iadd_imm v108, 1 - v82 -> v21 -@002b [Op1umr#89,%rsi] v94 = uextend.i64 v19 -@002b [Op2ldWithIndex#4be,%rdx] v23 = sload8_complex.i32 v93+v94 - v55 -> v23 -[DynRexOp1umr#89,%rsi] v109 = copy v23 -@0032 [DynRexOp1r_ib#40c1,%rsi] v25 = ishl_imm v109, 24 -@0035 [DynRexOp1r_ib#70c1,%rsi] v27 = sshr_imm v25, 24 - v69 -> v27 -[DynRexOp1umr#89,%r9] v110 = copy v27 -@003a [DynRexOp1r_ib#83,%r9] v29 = iadd_imm v110, 32 - v68 -> v29 -@0042 [DynRexOp1r_ib#83,%rcx] v31 = iadd_imm v12, -65 -@0045 [DynRexOp1r_ib#40c1,%rcx] v33 = ishl_imm v31, 24 -@0048 [DynRexOp1r_ib#70c1,%rcx] v35 = sshr_imm v33, 24 -@004c [DynRexOp1r_id#4081,%rcx] v37 = band_imm v35, 255 -[DynRexOp1rcmp_ib#7083,%rflags] v97 = ifcmp_imm v37, 26 -@0050 [Op1brib#70] brif sge v97, block6 -@0050 [-] fallthrough block10 - - block10: -[DynRexOp1umr#89,%rcx] v101 = copy v18 -@0054 [Op1jmpb#eb] jump block5(v18, v101) - - block6: -[DynRexOp1umr#89,%rcx] v102 = copy.i32 v16 -@0059 [RexOp1rmov#89] regmove v102, %rcx -> %rdi -@0059 [RexOp1rmov#89] regmove.i32 v16, %rbx -> %rcx -@0059 [-] fallthrough block5(v102, v16) - - block5(v41: i32 [%rdi], v84: i32 [%rcx]): - v83 -> v84 -@005d [DynRexOp1r_id#4081,%rdi] v43 = band_imm v41, 255 -@0062 [DynRexOp1r_ib#40c1,%rdi] v45 = ishl_imm v43, 24 - v52 -> v45 -@0065 [RexOp1rmov#89] regmove v45, %rdi -> %rbx -@0065 [DynRexOp1r_ib#70c1,%rbx] v47 = sshr_imm v45, 24 - v54 -> v47 -@0068 [RexOp1rmov#89] regmove v47, %rbx -> %rdi -@0068 [DynRexOp1icscc_ib#7083,%rbx] v49 = icmp_imm ne v47, 0 -@0068 [RexOp2urm_noflags#4b6,%r10] v50 = bint.i32 v49 -@0076 [DynRexOp1r_ib#83,%rdx] v57 = iadd_imm.i32 v23, -65 -@0079 [DynRexOp1r_ib#40c1,%rdx] v59 = ishl_imm v57, 24 -@007c [DynRexOp1r_ib#70c1,%rdx] v61 = sshr_imm v59, 24 -@0080 [DynRexOp1r_id#4081,%rdx] v63 = band_imm v61, 255 -[DynRexOp1rcmp_ib#7083,%rflags] v98 = ifcmp_imm v63, 26 -@0084 [RexOp1rmov#89] regmove v47, %rdi -> %rbx -@0084 [Op1brib#70] brif sge v98, block8 -@0084 [-] fallthrough block11 - - block11: -[DynRexOp1umr#89,%rdx] v103 = copy.i32 v29 -@0088 [Op1jmpb#eb] jump block7(v29, v10, v21, v103) - - block8: -[DynRexOp1umr#89,%rdx] v104 = copy.i32 v27 -@008d [RexOp1rmov#89] regmove v104, %rdx -> %r9 -@008d [RexOp1rmov#89] regmove.i32 v27, %rsi -> %rdx -@008d [-] fallthrough block7(v104, v10, v21, v27) - - block7(v67: i32 [%r9], v79: i32 [%rax], v81: i32 [%r8], v87: i32 [%rdx]): -@0091 [DynRexOp1r_id#4081,%r9] v71 = band_imm v67, 255 -@0094 [DynRexOp1r_ib#40c1,%r9] v73 = ishl_imm v71, 24 -@0097 [DynRexOp1r_ib#70c1,%r9] v75 = sshr_imm v73, 24 -@0098 [DynRexOp1icscc#39,%rbx] v76 = icmp.i32 eq v47, v75 -@0098 [Op2urm_noflags_abcd#4b6,%rbx] v77 = bint.i32 v76 -@0099 [DynRexOp1rr#21,%r10] v78 = band.i32 v50, v77 -@009a [RexOp1tjccb#74] brz v78, block9 -@009a [-] fallthrough block12 - - block12: -[DynRexOp1umr#89,%rcx] v99 = copy v81 -[DynRexOp1umr#89,%rdx] v100 = copy v79 -@00a4 [RexOp1rmov#89] regmove v100, %rdx -> %rdi -@00a4 [RexOp1rmov#89] regmove v99, %rcx -> %rsi -@00a4 [Op1jmpd#e9] jump block3(v100, v99); bin: e9 ffffff2d - - block9: -@00a7 [-] fallthrough block4 - - block4: -@00ad [DynRexOp1r_id#4081,%rcx] v86 = band_imm.i32 v84, 255 -@00b3 [DynRexOp1r_id#4081,%rdx] v89 = band_imm.i32 v87, 255 -@00b4 [DynRexOp1rr#29,%rcx] v90 = isub v86, v89 -@00b5 [-] fallthrough block2(v90) - - block2(v5: i32 [%rcx]): -@00b6 [-] fallthrough block1(v5) - - block1(v3: i32 [%rcx]): -@00b6 [Op1umr#89,%rax] v96 = uextend.i64 v3 -@00b6 [-] fallthrough_return v96 -} diff --git a/cranelift/filetests/filetests/isa/x86/saturating-float-cast.clif b/cranelift/filetests/filetests/isa/x86/saturating-float-cast.clif deleted file mode 100644 index a26e2d865c..0000000000 --- a/cranelift/filetests/filetests/isa/x86/saturating-float-cast.clif +++ /dev/null @@ -1,13 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0() -> f32 system_v { -block0: - v0 = iconst.i8 255 -; check: v2 = iconst.i32 255 -; nextln: v0 = ireduce.i8 v2 - v1 = fcvt_from_uint.f32 v0 -; nextln: v3 = uextend.i64 v0 -; nextln: v1 = fcvt_from_sint.f32 v3 - return v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/select-i8.clif b/cranelift/filetests/filetests/isa/x86/select-i8.clif deleted file mode 100644 index 44b7e32d12..0000000000 --- a/cranelift/filetests/filetests/isa/x86/select-i8.clif +++ /dev/null @@ -1,8 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0(b1, i8, i8) -> i8 { -block0(v0: b1, v1: i8, v2: i8): - v3 = select v0, v1, v2 - return v3 -} diff --git a/cranelift/filetests/filetests/isa/x86/shrink-multiple-uses.clif b/cranelift/filetests/filetests/isa/x86/shrink-multiple-uses.clif deleted file mode 100644 index 31b73da391..0000000000 --- a/cranelift/filetests/filetests/isa/x86/shrink-multiple-uses.clif +++ /dev/null @@ -1,18 +0,0 @@ -test shrink -set opt_level=speed_and_size -target x86_64 legacy - -function %test_multiple_uses(i32 [%rdi]) -> i32 { -block0(v0: i32 [%rdi]): -[DynRexOp1rcmp_ib#7083,%rflags] v3 = ifcmp_imm v0, 0 -[Op2seti_abcd#490,%rax] v1 = trueif eq v3 -[RexOp2urm_noflags#4b6,%rax] v2 = bint.i32 v1 -[Op1brib#70] brif eq v3, block1 -[Op1jmpb#eb] jump block2 - -block2: -[Op1ret#c3] return v2 - -block1: -[Op2trap#40b] trap user0 -} diff --git a/cranelift/filetests/filetests/isa/x86/shrink.clif b/cranelift/filetests/filetests/isa/x86/shrink.clif deleted file mode 100644 index bb787832c9..0000000000 --- a/cranelift/filetests/filetests/isa/x86/shrink.clif +++ /dev/null @@ -1,40 +0,0 @@ -test binemit -set opt_level=speed_and_size -target x86_64 legacy - -; Test that instruction shrinking eliminates REX prefixes when possible. - -; The binary encodings can be verified with the command: -; -; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/shrink.clif | llvm-mc -show-encoding -triple=x86_64 -; - -function %test_shrinking(i32) -> i32 { -block0(v0: i32 [ %rdi ]): - ; asm: movl $0x2,%eax -[-,%rcx] v1 = iconst.i32 2 ; bin: b9 00000002 - ; asm: subl %ecx,%edi -[-,%rdi] v2 = isub v0, v1 ; bin: 29 cf - return v2 -} - -function %test_not_shrinking(i32) -> i32 { -block0(v0: i32 [ %r8 ]): - ; asm: movl $0x2,%eax -[-,%rcx] v1 = iconst.i32 2 ; bin: b9 00000002 - ; asm: subl %ecx,%edi -[-,%r8] v2 = isub v0, v1 ; bin: 41 29 c8 - return v2 -} - -function %test_not_shrinking_i8() { -block0: -[-,%rsi] v1 = iconst.i8 1 - ; asm: movsbl %sil,%esi -[-,%rsi] v2 = sextend.i32 v1 ; bin: 40 0f be f6 - ; asm: movzbl %sil,%esi -[-,%rsi] v3 = uextend.i32 v1 ; bin: 40 0f b6 f6 - ; asm: movzbl %sil,%esi -[-,%rsi] v4 = uextend.i64 v1 ; bin: 40 0f b6 f6 - trap user0 -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-binemit.clif deleted file mode 100644 index 0a8fbe7f0c..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-binemit.clif +++ /dev/null @@ -1,116 +0,0 @@ -test binemit -set enable_simd -target x86_64 legacy skylake - -function %arithmetic_i8x16(i8x16, i8x16) { -block0(v0: i8x16 [%xmm6], v1: i8x16 [%xmm2]): -[-, %xmm6] v2 = iadd v0, v1 ; bin: 66 0f fc f2 -[-, %xmm6] v3 = isub v0, v1 ; bin: 66 0f f8 f2 -[-, %xmm6] v4 = sadd_sat v0, v1 ; bin: 66 0f ec f2 -[-, %xmm6] v5 = ssub_sat v0, v1 ; bin: 66 0f e8 f2 -[-, %xmm6] v6 = usub_sat v0, v1 ; bin: 66 0f d8 f2 -[-, %xmm6] v7 = avg_round v0, v1 ; bin: 66 0f e0 f2 -[-, %xmm6] v9 = iabs v1 ; bin: 66 0f 38 1c f2 - - return -} - -function %arithmetic_i16x8(i16x8, i16x8) { -block0(v0: i16x8 [%xmm3], v1: i16x8 [%xmm5]): -[-, %xmm3] v2 = iadd v0, v1 ; bin: 66 0f fd dd -[-, %xmm3] v3 = isub v0, v1 ; bin: 66 0f f9 dd -[-, %xmm3] v4 = imul v0, v1 ; bin: 66 0f d5 dd -[-, %xmm3] v5 = uadd_sat v0, v1 ; bin: 66 0f dd dd -[-, %xmm3] v6 = ssub_sat v0, v1 ; bin: 66 0f e9 dd -[-, %xmm3] v7 = usub_sat v0, v1 ; bin: 66 0f d9 dd -[-, %xmm3] v8 = avg_round v0, v1 ; bin: 66 0f e3 dd -[-, %xmm3] v9 = iabs v1 ; bin: 66 0f 38 1d dd - - return -} - -function %arithmetic_i32x4(i32x4, i32x4) { -block0(v0: i32x4 [%xmm0], v1: i32x4 [%xmm1]): -[-, %xmm0] v2 = iadd v0, v1 ; bin: 66 0f fe c1 -[-, %xmm0] v3 = isub v0, v1 ; bin: 66 0f fa c1 -[-, %xmm0] v4 = imul v0, v1 ; bin: 66 0f 38 40 c1 -[-, %xmm0] v5 = iabs v1 ; bin: 66 0f 38 1e c1 - - return -} - -function %arithmetic_i64x2(i64x2, i64x2) { -block0(v0: i64x2 [%xmm0], v1: i64x2 [%xmm2]): -[-, %xmm0] v2 = iadd v0, v1 ; bin: 66 0f d4 c2 -[-, %xmm0] v3 = isub v0, v1 ; bin: 66 0f fb c2 - - return -} - -function %arithmetic_i64x2_rex(i64x2, i64x2) { -block0(v0: i64x2 [%xmm8], v1: i64x2 [%xmm10]): -[-, %xmm8] v2 = iadd v0, v1 ; bin: 66 45 0f d4 c2 -[-, %xmm8] v3 = isub v0, v1 ; bin: 66 45 0f fb c2 - - return -} - -function %arithmetic_f32x4(f32x4, f32x4) { -block0(v0: f32x4 [%xmm3], v1: f32x4 [%xmm5]): -[-, %xmm3] v2 = fadd v0, v1 ; bin: 0f 58 dd -[-, %xmm3] v3 = fsub v0, v1 ; bin: 0f 5c dd -[-, %xmm3] v4 = fmul v0, v1 ; bin: 0f 59 dd -[-, %xmm3] v5 = fdiv v0, v1 ; bin: 0f 5e dd -[-, %xmm3] v6 = x86_fmin v0, v1 ; bin: 0f 5d dd -[-, %xmm3] v7 = x86_fmax v0, v1 ; bin: 0f 5f dd -[-, %xmm3] v8 = sqrt v0 ; bin: 0f 51 db - return -} - -function %arithmetic_f32x4_rex(f32x4, f32x4) { -block0(v0: f32x4 [%xmm3], v1: f32x4 [%xmm10]): -[-, %xmm3] v2 = fadd v0, v1 ; bin: 41 0f 58 da -[-, %xmm3] v3 = fsub v0, v1 ; bin: 41 0f 5c da -[-, %xmm3] v4 = fmul v0, v1 ; bin: 41 0f 59 da -[-, %xmm3] v5 = fdiv v0, v1 ; bin: 41 0f 5e da -[-, %xmm3] v6 = x86_fmin v0, v1 ; bin: 41 0f 5d da -[-, %xmm3] v7 = x86_fmax v0, v1 ; bin: 41 0f 5f da -[-, %xmm3] v8 = sqrt v1 ; bin: 41 0f 51 da - return -} - -function %arithmetic_f64x2(f64x2, f64x2) { -block0(v0: f64x2 [%xmm3], v1: f64x2 [%xmm5]): -[-, %xmm3] v2 = fadd v0, v1 ; bin: 66 0f 58 dd -[-, %xmm3] v3 = fsub v0, v1 ; bin: 66 0f 5c dd -[-, %xmm3] v4 = fmul v0, v1 ; bin: 66 0f 59 dd -[-, %xmm3] v5 = fdiv v0, v1 ; bin: 66 0f 5e dd -[-, %xmm3] v6 = x86_fmin v0, v1 ; bin: 66 0f 5d dd -[-, %xmm3] v7 = x86_fmax v0, v1 ; bin: 66 0f 5f dd -[-, %xmm3] v8 = sqrt v0 ; bin: 66 0f 51 db - return -} - -function %arithmetic_f64x2_rex(f64x2, f64x2) { -block0(v0: f64x2 [%xmm11], v1: f64x2 [%xmm13]): -[-, %xmm11] v2 = fadd v0, v1 ; bin: 66 45 0f 58 dd -[-, %xmm11] v3 = fsub v0, v1 ; bin: 66 45 0f 5c dd -[-, %xmm11] v4 = fmul v0, v1 ; bin: 66 45 0f 59 dd -[-, %xmm11] v5 = fdiv v0, v1 ; bin: 66 45 0f 5e dd -[-, %xmm11] v6 = x86_fmin v0, v1 ; bin: 66 45 0f 5d dd -[-, %xmm11] v7 = x86_fmax v0, v1 ; bin: 66 45 0f 5f dd -[-, %xmm11] v8 = sqrt v0 ; bin: 66 45 0f 51 db - return -} - -function %pmuludq(i64x2, i64x2) -> i64x2 { -block0(v0: i64x2 [%xmm3], v1: i64x2 [%xmm5]): -[-, %xmm3] v2 = x86_pmuludq v0, v1 ; bin: 66 0f f4 dd - return v2 -} - -function %pmaddwd(i16x8, i16x8) -> i32x4 { -block0(v0: i16x8 [%xmm8], v1: i16x8 [%xmm9]): -[-, %xmm8] v2 = widening_pairwise_dot_product_s v0, v1 ; bin: 66 45 0f f5 c1 - return v2 -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-legalize.clif deleted file mode 100644 index 74bc68ee67..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-legalize.clif +++ /dev/null @@ -1,117 +0,0 @@ -test legalizer -set enable_simd -target x86_64 legacy skylake - -function %ineg_i32x4() -> b1 { -; check: const0 = 0x00000001000000010000000100000001 -; nextln: const1 = 0x00000000000000000000000000000000 -block0: - v0 = vconst.i32x4 [1 1 1 1] - v2 = ineg v0 - ; check: v5 = vconst.i32x4 const1 - ; nextln: v2 = isub v5, v0 - - v3 = extractlane v2, 0 - v4 = icmp_imm eq v3, -1 - - return v4 -} - -function %ineg_legalized() { -; check: const0 = 0x00000000000000000000000000000000 -block0: - v0 = vconst.i8x16 0x00 - v1 = ineg v0 - ; check: v6 = vconst.i8x16 const0 - ; nextln: v1 = isub v6, v0 - - v2 = raw_bitcast.i16x8 v0 - v3 = ineg v2 - ; check: v7 = vconst.i16x8 const0 - ; nextln: v3 = isub v7, v2 - - v4 = raw_bitcast.i64x2 v0 - v5 = ineg v4 - ; check: v8 = vconst.i64x2 const0 - ; nextln: v5 = isub v8, v4 - - return -} - -function %fneg_legalized() { -; check: const2 = 0xffffffffffffffffffffffffffffffff -block0: - v0 = vconst.f32x4 [0x1.0 0x2.0 0x3.0 0x4.0] - v1 = fneg v0 - ; check: v4 = vconst.i32x4 const2 - ; nextln: v5 = ishl_imm v4, 31 - ; nextln: v6 = raw_bitcast.f32x4 v5 - ; nextln: v1 = bxor v0, v6 - - v2 = vconst.f64x2 [0x1.0 0x2.0] - v3 = fneg v2 - ; check: v7 = vconst.i64x2 const2 - ; nextln: v8 = ishl_imm v7, 63 - ; nextln: v9 = raw_bitcast.f64x2 v8 - ; nextln: v3 = bxor v2, v9 - - return -} - -function %fabs_legalized() { -; check: const1 = 0xffffffffffffffffffffffffffffffff -block0: - v0 = vconst.f64x2 [0x1.0 -0x2.0] - v1 = fabs v0 - ; check: v2 = vconst.i64x2 const1 - ; nextln: v3 = ushr_imm v2, 1 - ; nextln: v4 = raw_bitcast.f64x2 v3 - ; nextln: v1 = band v0, v4 - return -} - -function %imul_i64x2(i64x2, i64x2) { -block0(v0:i64x2, v1:i64x2): - v2 = imul v0, v1 - ; check: v3 = ushr_imm v0, 32 - ; nextln: v4 = x86_pmuludq v3, v1 - ; nextln: v5 = ushr_imm v1, 32 - ; nextln: v6 = x86_pmuludq v5, v0 - ; nextln: v7 = iadd v4, v6 - ; nextln: v8 = ishl_imm v7, 32 - ; nextln: v9 = x86_pmuludq v0, v1 - ; nextln: v2 = iadd v9, v8 - return -} - -function %fmin_f32x4(f32x4, f32x4) { -block0(v0:f32x4, v1:f32x4): - v2 = fmin v0, v1 - ; check: v3 = x86_fmin v0, v1 - ; nextln: v4 = x86_fmin v1, v0 - ; nextln: v5 = bor v4, v3 - ; nextln: v6 = fcmp uno v3, v5 - ; nextln: v7 = raw_bitcast.f32x4 v6 - ; nextln: v8 = bor v5, v7 - ; nextln: v9 = raw_bitcast.i32x4 v7 - ; nextln: v10 = ushr_imm v9, 10 - ; nextln: v11 = raw_bitcast.f32x4 v10 - ; nextln: v2 = band_not v8, v11 - return -} - -function %fmax_f64x2(f64x2, f64x2) { -block0(v0:f64x2, v1:f64x2): - v2 = fmax v0, v1 - ; check: v3 = x86_fmax v0, v1 - ; nextln: v4 = x86_fmax v1, v0 - ; nextln: v5 = bxor v3, v4 - ; nextln: v6 = bor v4, v5 - ; nextln: v7 = fsub v6, v5 - ; nextln: v8 = fcmp uno v5, v7 - ; nextln: v9 = raw_bitcast.i64x2 v8 - ; nextln: v10 = ushr_imm v9, 13 - ; nextln: v11 = raw_bitcast.f64x2 v10 - ; nextln: v2 = band_not v7, v11 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-avx512-arithmetic-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-avx512-arithmetic-binemit.clif deleted file mode 100644 index 0daf064713..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-avx512-arithmetic-binemit.clif +++ /dev/null @@ -1,17 +0,0 @@ -test binemit -set enable_simd -target x86_64 legacy skylake has_avx512dq=true - -function %imul_i64x2() { -block0: - [-, %xmm1] v0 = vconst.i64x2 [1 2] - [-, %xmm2] v1 = vconst.i64x2 [2 2] - [-, %xmm14] v2 = x86_pmullq v0, v1 ; bin: 62 72 f5 08 40 f2 - ; 62, mandatory EVEX prefix - ; 72 = 0111 0010, R is set (MSB in %xmm14) while X, B, and R' are unset (note these are all inverted); mm is set to 0F38 - ; f5 = 1111 0101, W is set (64-bit op), vvvv set to 1 (inverted), bit 2 always set, pp set to 01 - ; 08 = 0000 1000, everything, LL' indicates 128-bit, V' is unset (inverted, %xmm1 has MSB of 0) - ; 40, opcode (correct) - ; f2 = 1111 0010, ModR/M byte using 0b110 from %xmm14 in reg and 0b010 from %xmm2 in r/m - return -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-avx512-arithmetic-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-avx512-arithmetic-legalize.clif deleted file mode 100644 index 294902d45b..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-avx512-arithmetic-legalize.clif +++ /dev/null @@ -1,10 +0,0 @@ -test legalizer -set enable_simd -target x86_64 legacy skylake has_avx512dq=true - -function %imul_i64x2(i64x2, i64x2) { -block0(v0:i64x2, v1:i64x2): - v2 = imul v0, v1 - ; check: v2 = x86_pmullq v0, v1 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-binemit.clif deleted file mode 100644 index 6f235e6b3b..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-binemit.clif +++ /dev/null @@ -1,9 +0,0 @@ -test binemit -set enable_simd -target x86_64 legacy has_avx512vl=true - -function %fcvt_from_uint(i32x4) { -block0(v0: i32x4 [%xmm2]): -[-, %xmm6] v1 = x86_vcvtudq2ps v0 ; bin: 62 f1 7f 08 7a f2 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-legalize.clif deleted file mode 100644 index cdadd3254d..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-legalize.clif +++ /dev/null @@ -1,10 +0,0 @@ -test legalizer -set enable_simd -target x86_64 legacy skylake has_avx512f=true - -function %fcvt_from_uint(i32x4) -> f32x4 { -block0(v0:i32x4): - v1 = fcvt_from_uint.f32x4 v0 - ; check: v1 = x86_vcvtudq2ps v0 - return v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-bitwise-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-bitwise-binemit.clif deleted file mode 100644 index 3131a8aa0c..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-bitwise-binemit.clif +++ /dev/null @@ -1,99 +0,0 @@ -test binemit -set enable_simd -target x86_64 legacy skylake - -function %ishl_i16x8(i16x8, i64x2) -> i16x8 { -block0(v0: i16x8 [%xmm2], v1: i64x2 [%xmm1]): -[-, %xmm2] v2 = x86_psll v0, v1 ; bin: 66 0f f1 d1 - return v2 -} - -function %ishl_i32x4(i32x4, i64x2) -> i32x4 { -block0(v0: i32x4 [%xmm4], v1: i64x2 [%xmm0]): -[-, %xmm4] v2 = x86_psll v0, v1 ; bin: 66 0f f2 e0 - return v2 -} - -function %ishl_i64x2(i64x2, i64x2) -> i64x2 { -block0(v0: i64x2 [%xmm6], v1: i64x2 [%xmm3]): -[-, %xmm6] v2 = x86_psll v0, v1 ; bin: 66 0f f3 f3 - return v2 -} - -function %ushr_i16x8(i16x8, i64x2) -> i16x8 { -block0(v0: i16x8 [%xmm2], v1: i64x2 [%xmm1]): -[-, %xmm2] v2 = x86_psrl v0, v1 ; bin: 66 0f d1 d1 - return v2 -} - -function %ushr_i32x4(i32x4, i64x2) -> i32x4 { -block0(v0: i32x4 [%xmm4], v1: i64x2 [%xmm0]): -[-, %xmm4] v2 = x86_psrl v0, v1 ; bin: 66 0f d2 e0 - return v2 -} - -function %ushr_i64x2(i64x2, i64x2) -> i64x2 { -block0(v0: i64x2 [%xmm6], v1: i64x2 [%xmm3]): -[-, %xmm6] v2 = x86_psrl v0, v1 ; bin: 66 0f d3 f3 - return v2 -} - -function %sshr_i16x8(i16x8, i64x2) -> i16x8 { -block0(v0: i16x8 [%xmm2], v1: i64x2 [%xmm1]): -[-, %xmm2] v2 = x86_psra v0, v1 ; bin: 66 0f e1 d1 - return v2 -} - -function %sshr_i32x4(i32x4, i64x2) -> i32x4 { -block0(v0: i32x4 [%xmm4], v1: i64x2 [%xmm0]): -[-, %xmm4] v2 = x86_psra v0, v1 ; bin: 66 0f e2 e0 - return v2 -} - -function %ishl_imm_i16x8(i16x8) -> i16x8 { -block0(v0: i16x8 [%xmm2]): -[-, %xmm2] v2 = ishl_imm v0, 3 ; bin: 66 0f 71 f2 03 - return v2 -} - -function %ishl_imm_i32x4(i32x4) -> i32x4 { -block0(v0: i32x4 [%xmm4]): -[-, %xmm4] v2 = ishl_imm v0, 10 ; bin: 66 0f 72 f4 0a - return v2 -} - -function %ishl_imm_i64x2(i64x2) -> i64x2 { -block0(v0: i64x2 [%xmm6]): -[-, %xmm6] v2 = ishl_imm v0, 42 ; bin: 66 0f 73 f6 2a - return v2 -} - -function %ushr_imm_i16x8(i16x8) -> i16x8 { -block0(v0: i16x8 [%xmm2]): -[-, %xmm2] v2 = ushr_imm v0, 3 ; bin: 66 0f 71 d2 03 - return v2 -} - -function %ushr_imm_i32x4(i32x4) -> i32x4 { -block0(v0: i32x4 [%xmm4]): -[-, %xmm4] v2 = ushr_imm v0, 10 ; bin: 66 0f 72 d4 0a - return v2 -} - -function %ushr_imm_i64x2(i64x2) -> i64x2 { -block0(v0: i64x2 [%xmm6]): -[-, %xmm6] v2 = ushr_imm v0, 42 ; bin: 66 0f 73 d6 2a - return v2 -} - -function %sshr_imm_i16x8(i16x8) -> i16x8 { -block0(v0: i16x8 [%xmm2]): -[-, %xmm2] v2 = sshr_imm v0, 3 ; bin: 66 0f 71 e2 03 - return v2 -} - -function %sshr_imm_i32x4(i32x4) -> i32x4 { -block0(v0: i32x4 [%xmm4]): -[-, %xmm4] v2 = sshr_imm v0, 10 ; bin: 66 0f 72 e4 0a - return v2 -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-bitwise-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-bitwise-legalize.clif deleted file mode 100644 index 7674f83e01..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-bitwise-legalize.clif +++ /dev/null @@ -1,111 +0,0 @@ -test legalizer -set enable_simd -target x86_64 legacy skylake - -function %ushr_i8x16() -> i8x16 { -block0: - v0 = iconst.i32 1 - v1 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15] - v2 = ushr v1, v0 - ; check: v3 = bitcast.i64x2 v0 - ; nextln: v4 = raw_bitcast.i16x8 v1 - ; nextln: v5 = x86_psrl v4, v3 - ; nextln: v6 = raw_bitcast.i8x16 v5 - ; nextln: v7 = const_addr.i64 const1 - ; nextln: v8 = ishl_imm v0, 4 - ; nextln: v9 = load_complex.i8x16 v7+v8 - ; nextln: v2 = band v6, v9 - return v2 -} - -function %sshr_i8x16() -> i8x16 { -block0: - v0 = iconst.i32 1 - v1 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15] - v2 = sshr v1, v0 - ; check: v3 = iadd_imm v0, 8 - ; nextln: v4 = bitcast.i64x2 v3 - - ; nextln: v5 = x86_punpckl v1, v1 - ; nextln: v6 = raw_bitcast.i16x8 v5 - ; nextln: v7 = x86_psra v6, v4 - - ; nextln: v8 = x86_punpckh v1, v1 - ; nextln: v9 = raw_bitcast.i16x8 v8 - ; nextln: v10 = x86_psra v9, v4 - - ; nextln: v2 = snarrow v7, v10 - return v2 -} - -function %ishl_i8x16() -> i8x16 { -block0: - v0 = iconst.i32 1 - v1 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15] - v2 = ishl v1, v0 - ; check: v3 = bitcast.i64x2 v0 - ; nextln: v4 = raw_bitcast.i16x8 v1 - ; nextln: v5 = x86_psll v4, v3 - ; nextln: v6 = raw_bitcast.i8x16 v5 - ; nextln: v7 = const_addr.i64 const1 - ; nextln: v8 = ishl_imm v0, 4 - ; nextln: v9 = load_complex.i8x16 v7+v8 - ; nextln: v2 = band v6, v9 - return v2 -} - -function %ishl_i32x4() -> i32x4 { -block0: - v0 = iconst.i32 1 - v1 = vconst.i32x4 [1 2 4 8] - v2 = ishl v1, v0 - ; check: v3 = bitcast.i64x2 v0 - ; nextln: v2 = x86_psll v1, v3 - return v2 -} - -function %ushr_i64x2() -> i64x2 { -block0: - v0 = iconst.i32 1 - v1 = vconst.i64x2 [1 2] - v2 = ushr v1, v0 - ; check: v3 = bitcast.i64x2 v0 - ; nextln: v2 = x86_psrl v1, v3 - return v2 -} - -function %sshr_i16x8() -> i16x8 { -block0: - v0 = iconst.i32 1 - v1 = vconst.i16x8 [1 2 4 8 16 32 64 128] - v2 = sshr v1, v0 - ; check: v3 = bitcast.i64x2 v0 - ; nextln: v2 = x86_psra v1, v3 - return v2 -} - -function %sshr_i64x2() -> i64x2 { -block0: - v0 = iconst.i32 1 - v1 = vconst.i64x2 [1 2] - v2 = sshr v1, v0 - ; check: v3 = x86_pextr v1, 0 - ; nextln: v4 = sshr v3, v0 - ; nextln: v5 = x86_pinsr v1, v4, 0 - ; nextln: v6 = x86_pextr v1, 1 - ; nextln: v7 = sshr v6, v0 - ; nextln: v2 = x86_pinsr v5, v7, 1 - return v2 -} - -function %bitselect_i16x8() -> i16x8 { -block0: - v0 = vconst.i16x8 [0 0 0 0 0 0 0 0] - v1 = vconst.i16x8 [0 0 0 0 0 0 0 0] - v2 = vconst.i16x8 [0 0 0 0 0 0 0 0] - v3 = bitselect v0, v1, v2 - ; check: v4 = band v1, v0 - ; nextln: v5 = band_not v2, v0 - ; nextln: v3 = bor v4, v5 - return v3 -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif deleted file mode 100644 index 1d3db4a119..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif +++ /dev/null @@ -1,138 +0,0 @@ -test binemit -set enable_simd -target x86_64 legacy skylake - -function %icmp_i8x16() { -block0: -[-, %xmm3] v0 = vconst.i8x16 0x00 ; bin: 66 0f ef db -[-, %xmm4] v1 = vconst.i8x16 0xffffffffffffffffffffffffffffffff ; bin: 66 0f 74 e4 -[-, %xmm3] v2 = icmp eq v0, v1 ; bin: 66 0f 74 dc - return -} - -function %icmp_i16x8_rex() { -block0: -[-, %xmm0] v0 = vconst.i16x8 0x00 -[-, %xmm15] v1 = vconst.i16x8 0xffffffffffffffffffffffffffffffff -[-, %xmm0] v2 = icmp eq v0, v1 ; bin: 66 41 0f 75 c7 - return -} - -function %icmp_i32x4() { -block0: -[-, %xmm0] v0 = vconst.i32x4 0x00 -[-, %xmm4] v1 = vconst.i32x4 0xffffffffffffffffffffffffffffffff -[-, %xmm0] v2 = icmp eq v0, v1 ; bin: 66 0f 76 c4 - return -} - -function %icmp_i64x2_rex() { -block0: -[-, %xmm8] v0 = vconst.i64x2 0x00 -[-, %xmm1] v1 = vconst.i64x2 0xffffffffffffffffffffffffffffffff -[-, %xmm8] v2 = icmp eq v0, v1 ; bin: 66 44 0f 38 29 c1 - return -} - -function %icmp_sgt_i8x16(i8x16, i8x16) -> b8x16 { -block0(v0: i8x16 [%xmm2], v1: i8x16 [%xmm1]): -[-, %xmm2] v2 = icmp sgt v0, v1 ; bin: 66 0f 64 d1 - return v2 -} - -function %icmp_sgt_i16x8(i16x8, i16x8) -> b16x8 { -block0(v0: i16x8 [%xmm4], v1: i16x8 [%xmm3]): -[-, %xmm4] v2 = icmp sgt v0, v1 ; bin: 66 0f 65 e3 - return v2 -} - -function %icmp_sgt_i32x4(i32x4, i32x4) -> b32x4 { -block0(v0: i32x4 [%xmm6], v1: i32x4 [%xmm5]): -[-, %xmm6] v2 = icmp sgt v0, v1 ; bin: 66 0f 66 f5 - return v2 -} - -function %icmp_sgt_i64x2(i64x2, i64x2) -> b64x2 { -block0(v0: i64x2 [%xmm0], v1: i64x2 [%xmm7]): -[-, %xmm0] v2 = icmp sgt v0, v1 ; bin: 66 0f 38 37 c7 - return v2 -} - -function %min_max_i8x16(i8x16, i8x16) { -block0(v0: i8x16 [%xmm3], v1: i8x16 [%xmm1]): -[-, %xmm3] v2 = x86_pmaxs v0, v1 ; bin: 66 0f 38 3c d9 -[-, %xmm3] v3 = x86_pmaxu v0, v1 ; bin: 66 0f de d9 -[-, %xmm3] v4 = x86_pmins v0, v1 ; bin: 66 0f 38 38 d9 -[-, %xmm3] v5 = x86_pminu v0, v1 ; bin: 66 0f da d9 - return -} - -function %min_max_i16x8(i16x8, i16x8) { -block0(v0: i16x8 [%xmm2], v1: i16x8 [%xmm5]): -[-, %xmm2] v2 = x86_pmaxs v0, v1 ; bin: 66 0f ee d5 -[-, %xmm2] v3 = x86_pmaxu v0, v1 ; bin: 66 0f 38 3e d5 -[-, %xmm2] v4 = x86_pmins v0, v1 ; bin: 66 0f ea d5 -[-, %xmm2] v5 = x86_pminu v0, v1 ; bin: 66 0f 38 3a d5 - return -} - -function %min_max_i32x4(i32x4, i32x4) { -block0(v0: i32x4 [%xmm2], v1: i32x4 [%xmm4]): -[-, %xmm2] v2 = x86_pmaxs v0, v1 ; bin: 66 0f 38 3d d4 -[-, %xmm2] v3 = x86_pmaxu v0, v1 ; bin: 66 0f 38 3f d4 -[-, %xmm2] v4 = x86_pmins v0, v1 ; bin: 66 0f 38 39 d4 -[-, %xmm2] v5 = x86_pminu v0, v1 ; bin: 66 0f 38 3b d4 - return -} - -function %fcmp_f32x4(f32x4, f32x4) { -block0(v0: f32x4 [%xmm2], v1: f32x4 [%xmm4]): -[-, %xmm2] v2 = fcmp eq v0, v1 ; bin: 0f c2 d4 00 -[-, %xmm2] v3 = fcmp lt v0, v1 ; bin: 0f c2 d4 01 -[-, %xmm2] v4 = fcmp le v0, v1 ; bin: 0f c2 d4 02 -[-, %xmm2] v5 = fcmp uno v0, v1 ; bin: 0f c2 d4 03 -[-, %xmm2] v6 = fcmp ne v0, v1 ; bin: 0f c2 d4 04 -[-, %xmm2] v7 = fcmp uge v0, v1 ; bin: 0f c2 d4 05 -[-, %xmm2] v8 = fcmp ugt v0, v1 ; bin: 0f c2 d4 06 -[-, %xmm2] v9 = fcmp ord v0, v1 ; bin: 0f c2 d4 07 - return -} - -function %fcmp_f32x4_rex(f32x4, f32x4) { -block0(v0: f32x4 [%xmm8], v1: f32x4 [%xmm8]): -[-, %xmm8] v2 = fcmp eq v0, v1 ; bin: 45 0f c2 c0 00 -[-, %xmm8] v3 = fcmp lt v0, v1 ; bin: 45 0f c2 c0 01 -[-, %xmm8] v4 = fcmp le v0, v1 ; bin: 45 0f c2 c0 02 -[-, %xmm8] v5 = fcmp uno v0, v1 ; bin: 45 0f c2 c0 03 -[-, %xmm8] v6 = fcmp ne v0, v1 ; bin: 45 0f c2 c0 04 -[-, %xmm8] v7 = fcmp uge v0, v1 ; bin: 45 0f c2 c0 05 -[-, %xmm8] v8 = fcmp ugt v0, v1 ; bin: 45 0f c2 c0 06 -[-, %xmm8] v9 = fcmp ord v0, v1 ; bin: 45 0f c2 c0 07 - return -} - -function %fcmp_f64x2(f64x2, f64x2) { -block0(v0: f64x2 [%xmm2], v1: f64x2 [%xmm0]): -[-, %xmm2] v2 = fcmp eq v0, v1 ; bin: 66 0f c2 d0 00 -[-, %xmm2] v3 = fcmp lt v0, v1 ; bin: 66 0f c2 d0 01 -[-, %xmm2] v4 = fcmp le v0, v1 ; bin: 66 0f c2 d0 02 -[-, %xmm2] v5 = fcmp uno v0, v1 ; bin: 66 0f c2 d0 03 -[-, %xmm2] v6 = fcmp ne v0, v1 ; bin: 66 0f c2 d0 04 -[-, %xmm2] v7 = fcmp uge v0, v1 ; bin: 66 0f c2 d0 05 -[-, %xmm2] v8 = fcmp ugt v0, v1 ; bin: 66 0f c2 d0 06 -[-, %xmm2] v9 = fcmp ord v0, v1 ; bin: 66 0f c2 d0 07 - return -} - -function %fcmp_f64x2_rex(f64x2, f64x2) { -block0(v0: f64x2 [%xmm9], v1: f64x2 [%xmm11]): -[-, %xmm9] v2 = fcmp eq v0, v1 ; bin: 66 45 0f c2 cb 00 -[-, %xmm9] v3 = fcmp lt v0, v1 ; bin: 66 45 0f c2 cb 01 -[-, %xmm9] v4 = fcmp le v0, v1 ; bin: 66 45 0f c2 cb 02 -[-, %xmm9] v5 = fcmp uno v0, v1 ; bin: 66 45 0f c2 cb 03 -[-, %xmm9] v6 = fcmp ne v0, v1 ; bin: 66 45 0f c2 cb 04 -[-, %xmm9] v7 = fcmp uge v0, v1 ; bin: 66 45 0f c2 cb 05 -[-, %xmm9] v8 = fcmp ugt v0, v1 ; bin: 66 45 0f c2 cb 06 -[-, %xmm9] v9 = fcmp ord v0, v1 ; bin: 66 45 0f c2 cb 07 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-comparison-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-comparison-legalize.clif deleted file mode 100644 index a6324a34cc..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-comparison-legalize.clif +++ /dev/null @@ -1,40 +0,0 @@ -test legalizer -set enable_simd -target x86_64 legacy skylake - -function %icmp_ne_32x4(i32x4, i32x4) -> b32x4 { -; check: const0 = 0xffffffffffffffffffffffffffffffff -block0(v0: i32x4, v1: i32x4): - v2 = icmp ne v0, v1 - ; check: v3 = icmp eq v0, v1 - ; nextln: v4 = vconst.b32x4 const0 - ; nextln: v2 = bxor v4, v3 - return v2 -} - -function %icmp_ugt_i32x4(i32x4, i32x4) -> b32x4 { -; check: const0 = 0xffffffffffffffffffffffffffffffff -block0(v0: i32x4, v1: i32x4): - v2 = icmp ugt v0, v1 - ; check: v3 = x86_pmaxu v0, v1 - ; nextln: v4 = icmp eq v3, v1 - ; nextln: v5 = vconst.b32x4 const0 - ; nextln: v2 = bxor v5, v4 - return v2 -} - -function %icmp_sge_i16x8(i16x8, i16x8) -> b16x8 { -block0(v0: i16x8, v1: i16x8): - v2 = icmp sge v0, v1 - ; check: v3 = x86_pmins v0, v1 - ; nextln: v2 = icmp eq v3, v1 - return v2 -} - -function %icmp_uge_i8x16(i8x16, i8x16) -> b8x16 { -block0(v0: i8x16, v1: i8x16): - v2 = icmp uge v0, v1 - ; check: v3 = x86_pminu v0, v1 - ; nextln: v2 = icmp eq v3, v1 - return v2 -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-conversion-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-conversion-binemit.clif deleted file mode 100644 index f26b436931..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-conversion-binemit.clif +++ /dev/null @@ -1,26 +0,0 @@ -test binemit -set enable_simd -target x86_64 legacy nehalem - -; Ensure raw_bitcast emits no instructions. -function %raw_bitcast_i16x8_to_b32x4() { -block0: -[-, %rbx] v0 = bconst.b16 true -[-, %xmm2] v1 = scalar_to_vector.b16x8 v0 -[-, %xmm2] v2 = raw_bitcast.i32x4 v1 ; bin: - return -} - -function %conversions_i32x4(i32x4, i32x4) { -block0(v0: i32x4 [%xmm6], v1: i32x4 [%xmm4]): -[-, %xmm2] v2 = fcvt_from_sint.f32x4 v0 ; bin: 40 0f 5b d6 -[-, %xmm6] v3 = x86_palignr v0, v1, 3 ; bin: 66 0f 3a 0f f4 03 - return -} - -function %conversions_i16x8(i16x8) { -block0(v0: i16x8 [%xmm6]): -[-, %xmm2] v1 = swiden_low v0 ; bin: 66 0f 38 23 d6 -[-, %xmm11] v2 = uwiden_low v0 ; bin: 66 44 0f 38 33 de - return -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-conversion-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-conversion-legalize.clif deleted file mode 100644 index 6de14e181a..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-conversion-legalize.clif +++ /dev/null @@ -1,70 +0,0 @@ -test legalizer -set enable_simd -target x86_64 legacy skylake - -function %fcvt_from_uint(i32x4) -> f32x4 { -block0(v0:i32x4): - v1 = fcvt_from_uint.f32x4 v0 - ; check: v2 = raw_bitcast.i16x8 v0 - ; nextln: v3 = vconst.i16x8 const0 - ; nextln: v4 = x86_pblendw v3, v2, 85 - ; nextln: v5 = raw_bitcast.i32x4 v4 - ; nextln: v6 = isub v0, v5 - ; nextln: v7 = fcvt_from_sint.f32x4 v5 - ; nextln: v8 = ushr_imm v6, 1 - ; nextln: v9 = fcvt_from_sint.f32x4 v8 - ; nextln: v10 = fadd v9, v9 - ; nextln: v1 = fadd v10, v7 - return v1 -} - -function %fcvt_to_sint_sat(f32x4) -> i32x4 { -block0(v0:f32x4): - v1 = fcvt_to_sint_sat.i32x4 v0 - ; check: v2 = fcmp eq v0, v0 - ; nextln: v3 = raw_bitcast.f32x4 v2 - ; nextln: v4 = band v0, v3 - ; nextln: v5 = bxor v3, v0 - ; nextln: v6 = raw_bitcast.i32x4 v5 - ; nextln: v7 = x86_cvtt2si.i32x4 v4 - ; nextln: v8 = band v6, v7 - ; nextln: v9 = sshr_imm v8, 31 - ; nextln: v1 = bxor v7, v9 - return v1 -} - -function %fcvt_to_uint_sat(f32x4) -> i32x4 { -; check: const0 = 0x00000000000000000000000000000000 -; nextln: const1 = 0x4f0000004f0000004f0000004f000000 -block0(v0:f32x4): - v1 = fcvt_to_uint_sat.i32x4 v0 - ; check: v2 = vconst.f32x4 const0 - ; nextln: v3 = vconst.f32x4 const1 - ; nextln: v4 = x86_fmax v0, v2 - ; nextln: v5 = fsub v4, v3 - ; nextln: v6 = fcmp le v3, v5 - ; nextln: v7 = x86_cvtt2si.i32x4 v5 - ; nextln: v8 = raw_bitcast.i32x4 v6 - ; nextln: v9 = bxor v7, v8 - ; nextln: v10 = raw_bitcast.i32x4 v2 - ; nextln: v11 = x86_pmaxs v9, v10 - ; nextln: v12 = x86_cvtt2si.i32x4 v4 - ; nextln: v1 = iadd v12, v11 - return v1 -} - -function %uwiden_high(i8x16) -> i16x8 { -block0(v0: i8x16): - v1 = uwiden_high v0 - ; check: v2 = x86_palignr v0, v0, 8 - ; nextln: v1 = uwiden_low v2 - return v1 -} - -function %swiden_high(i16x8) -> i32x4 { -block0(v0: i16x8): - v1 = swiden_high v0 - ; check: v2 = x86_palignr v0, v0, 8 - ; nextln: v1 = swiden_low v2 - return v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit-for-size.clif b/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit-for-size.clif deleted file mode 100644 index 6240a08557..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit-for-size.clif +++ /dev/null @@ -1,34 +0,0 @@ -test binemit -set opt_level=speed_and_size -set enable_simd -target x86_64 legacy - -;; These scalar_to_vector tests avoid the use of REX prefixes with the speed_and_size optimization flag. - -function %scalar_to_vector_b8() { -block0: -[-, %rax] v0 = bconst.b8 true -[-, %xmm0] v1 = scalar_to_vector.b8x16 v0 ; bin: 66 0f 6e c0 - return -} - -function %scalar_to_vector_i16() { -block0: -[-, %rbx] v0 = iconst.i16 42 -[-, %xmm2] v1 = scalar_to_vector.i16x8 v0 ; bin: 66 0f 6e d3 - return -} - -function %scalar_to_vector_b32() { -block0: -[-, %rcx] v0 = bconst.b32 false -[-, %xmm3] v1 = scalar_to_vector.b32x4 v0 ; bin: 66 0f 6e d9 - return -} - -function %scalar_to_vector_i64() { -block0: -[-, %rdx] v0 = iconst.i64 42 -[-, %xmm7] v1 = scalar_to_vector.i64x2 v0 ; bin: 66 48 0f 6e fa - return -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit.clif deleted file mode 100644 index a8c14a6342..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit.clif +++ /dev/null @@ -1,126 +0,0 @@ -test binemit -set enable_simd -target x86_64 legacy haswell - -; for insertlane, floats are legalized differently than integers and booleans; integers and -; booleans use x86_pinsr which is manually placed in the IR so that it can be binemit-tested - -function %insertlane_b8() { -block0: -[-, %rax] v0 = bconst.b8 true -[-, %rbx] v1 = bconst.b8 false -[-, %xmm0] v2 = splat.b8x16 v0 -[-, %xmm0] v3 = x86_pinsr v2, v1, 10 ; bin: 66 0f 3a 20 c3 0a - return -} - -function %insertlane_i16() { -block0: -[-, %rax] v0 = iconst.i16 4 -[-, %rbx] v1 = iconst.i16 5 -[-, %xmm1] v2 = splat.i16x8 v0 -[-, %xmm1] v3 = x86_pinsr v2, v1, 4 ; bin: 66 0f c4 cb 04 - return -} - -function %insertlane_i32() { -block0: -[-, %rax] v0 = iconst.i32 42 -[-, %rbx] v1 = iconst.i32 99 -[-, %xmm4] v2 = splat.i32x4 v0 -[-, %xmm4] v3 = x86_pinsr v2, v1, 2 ; bin: 66 0f 3a 22 e3 02 - return -} - -function %insertlane_b64() { -block0: -[-, %rax] v0 = bconst.b64 true -[-, %rbx] v1 = bconst.b64 false -[-, %xmm2] v2 = splat.b64x2 v0 -[-, %xmm2] v3 = x86_pinsr v2, v1, 1 ; bin: 66 48 0f 3a 22 d3 01 - return -} - -; for extractlane, floats are legalized differently than integers and booleans; integers and -; booleans use x86_pextr which is manually placed in the IR so that it can be binemit-tested - -function %extractlane_b8() { -block0: -[-, %rax] v0 = bconst.b8 true -[-, %xmm0] v1 = splat.b8x16 v0 -[-, %rax] v2 = x86_pextr v1, 10 ; bin: 66 0f 3a 14 c0 0a - return -} - -function %extractlane_i16() { -block0: -[-, %rax] v0 = iconst.i16 4 -[-, %xmm1] v1 = splat.i16x8 v0 -[-, %rax] v2 = x86_pextr v1, 4 ; bin: 66 0f 3a 15 c8 04 - return -} - -function %extractlane_i32() { -block0: -[-, %rax] v0 = iconst.i32 42 -[-, %xmm4] v1 = splat.i32x4 v0 -[-, %rcx] v2 = x86_pextr v1, 2 ; bin: 66 0f 3a 16 e1 02 - return -} - -function %extractlane_b64() { -block0: -[-, %rax] v0 = bconst.b64 false -[-, %xmm2] v1 = splat.b64x2 v0 -[-, %rbx] v2 = x86_pextr v1, 1 ; bin: 66 48 0f 3a 16 d3 01 - return -} - -;; shuffle - -function %pshufd() { -block0: -[-, %rax] v0 = iconst.i32 42 -[-, %xmm0] v1 = scalar_to_vector.i32x4 v0 ; bin: 66 0f 6e c0 -[-, %xmm0] v2 = x86_pshufd v1, 0 ; bin: 66 0f 70 c0 00 - return -} - -function %pshufb() { -block0: -[-, %rax] v0 = iconst.i8 42 -[-, %xmm0] v1 = scalar_to_vector.i8x16 v0 ; bin: 66 0f 6e c0 -[-, %rbx] v2 = iconst.i8 43 -[-, %xmm12] v3 = scalar_to_vector.i8x16 v2 ; bin: 66 44 0f 6e e3 -[-, %xmm0] v4 = x86_pshufb v1, v3 ; bin: 66 41 0f 38 00 c4 - return -} - -;; blend - -function %pblendw(b16x8, b16x8) { -block0(v0: b16x8 [%xmm10], v1: b16x8 [%xmm2]): -[-, %xmm10] v2 = x86_pblendw v0, v1, 0x55 ; bin: 66 44 0f 3a 0e d2 55 - return -} - -;; pack/unpack - -function %unpack_high_i8x16(i8x16, i8x16) { -block0(v0: i8x16 [%xmm0], v1: i8x16 [%xmm12]): -[-, %xmm0] v2 = x86_punpckh v0, v1 ; bin: 66 41 0f 68 c4 - return -} - -function %unpack_low_i32x4(i32x4, i32x4) { -block0(v0: i32x4 [%xmm7], v1: i32x4 [%xmm6]): -[-, %xmm7] v2 = x86_punpckl v0, v1 ; bin: 66 0f 62 fe - return -} - -function %narrowing_i16x8(i16x8, i16x8) { -block0(v0: i16x8 [%xmm7], v1: i16x8 [%xmm8]): -[-, %xmm7] v2 = snarrow v0, v1 ; bin: 66 41 0f 63 f8 -[-, %xmm7] v3 = unarrow v0, v1 ; bin: 66 41 0f 67 f8 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-lane-access-compile.clif b/cranelift/filetests/filetests/isa/x86/simd-lane-access-compile.clif deleted file mode 100644 index 91ff8eb9a0..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-lane-access-compile.clif +++ /dev/null @@ -1,19 +0,0 @@ -test compile -set opt_level=speed_and_size -set enable_probestack=false -set enable_simd -target x86_64 legacy - -; Ensure that scalar_to_vector emits no instructions for floats (already exist in an XMM register) -function %scalar_to_vector_f32() -> f32x4 baldrdash_system_v { -block0: - v0 = f32const 0x0.42 - v1 = scalar_to_vector.f32x4 v0 - return v1 -} - -; check: block0 -; nextln: v2 = iconst.i32 0x3e84_0000 -; nextln: v0 = bitcast.f32 v2 -; nextln: [null_fpr#00,%xmm0] v1 = scalar_to_vector.f32x4 v0 -; nextln: return v1 diff --git a/cranelift/filetests/filetests/isa/x86/simd-lane-access-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-lane-access-legalize.clif deleted file mode 100644 index 284ef35180..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-lane-access-legalize.clif +++ /dev/null @@ -1,101 +0,0 @@ -test legalizer -set enable_simd -target x86_64 legacy skylake - -;; shuffle - -function %shuffle_different_ssa_values() -> i8x16 { -; check: const2 = 0x80000000000000000000000000000000 -; nextln: const3 = 0x01808080808080808080808080808080 -block0: - v0 = vconst.i8x16 0x00 - v1 = vconst.i8x16 0x01 - v2 = shuffle v0, v1, 0x11000000000000000000000000000000 ; pick the second lane of v1, the rest use the first lane of v0 - return v2 -} -; check: v1 = vconst.i8x16 const1 -; nextln: v3 = vconst.i8x16 const2 -; nextln: v4 = x86_pshufb v0, v3 -; nextln: v5 = vconst.i8x16 const3 -; nextln: v6 = x86_pshufb v1, v5 -; nextln: v2 = bor v4, v6 - -function %shuffle_same_ssa_value() -> i8x16 { -; check: const1 = 0x03000000000000000000000000000000 -block0: - v1 = vconst.i8x16 0x01 - v2 = shuffle v1, v1, 0x13000000000000000000000000000000 ; pick the fourth lane of v1 and the rest from the first lane of v1 - return v2 -} -; check: v1 = vconst.i8x16 const0 -; nextln: v3 = vconst.i8x16 const1 -; nextln: v2 = x86_pshufb v1, v3 - -;; splat - -function %splat_i32() -> i32x4 { -block0: - v0 = iconst.i32 42 - v1 = splat.i32x4 v0 - return v1 -} -; check: block0: -; nextln: v0 = iconst.i32 42 -; nextln: v2 = scalar_to_vector.i32x4 v0 -; nextln: v1 = x86_pshufd v2, 0 -; nextln: return v1 -; nextln: } - -function %splat_i64() -> i64x2 { -block0: - v0 = iconst.i64 42 - v1 = splat.i64x2 v0 - return v1 -} -; check: block0: -; nextln: v0 = iconst.i64 42 -; nextln: v2 = scalar_to_vector.i64x2 v0 -; nextln: v1 = x86_pinsr v2, v0, 1 -; nextln: return v1 - -function %splat_b16() -> b16x8 { -block0: - v0 = bconst.b16 true - v1 = splat.b16x8 v0 - return v1 -} -; check: block0: -; nextln: v0 = bconst.b16 true -; nextln: v2 = scalar_to_vector.b16x8 v0 -; nextln: v3 = x86_pinsr v2, v0, 1 -; nextln: v4 = raw_bitcast.i32x4 v3 -; nextln: v5 = x86_pshufd v4, 0 -; nextln: v1 = raw_bitcast.b16x8 v5 -; nextln: return v1 - -function %splat_i8() -> i8x16 { -; check: const0 = 0x00000000000000000000000000000000 -block0: - v0 = iconst.i8 42 - v1 = splat.i8x16 v0 - return v1 -} -; check: block0: -; nextln: v2 = iconst.i32 42 -; nextln: v0 = ireduce.i8 v2 -; nextln: v3 = scalar_to_vector.i8x16 v0 -; nextln: v4 = vconst.i8x16 const0 -; nextln: v1 = x86_pshufb v3, v4 -; nextln: return v1 - -function %swizzle() -> i8x16 { -; check: const1 = 0x70707070707070707070707070707070 -block0: - v0 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15] - v1 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15] - v2 = swizzle.i8x16 v0, v1 - ; check: v3 = vconst.i8x16 const1 - ; nextln: v4 = uadd_sat v1, v3 - ; nextln: v2 = x86_pshufb v0, v4 - return v2 -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-logical-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-logical-binemit.clif deleted file mode 100644 index af5ca0fe63..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-logical-binemit.clif +++ /dev/null @@ -1,33 +0,0 @@ -test binemit -set enable_simd -target x86_64 legacy skylake - -function %bor_b16x8(b16x8, b16x8) -> b16x8 { -block0(v0: b16x8 [%xmm2], v1: b16x8 [%xmm1]): -[-, %xmm2] v2 = bor v0, v1 ; bin: 66 0f eb d1 - return v2 -} - -function %band_b64x2(b64x2, b64x2) -> b64x2 { -block0(v0: b64x2 [%xmm6], v1: b64x2 [%xmm3]): -[-, %xmm6] v2 = band v0, v1 ; bin: 66 0f db f3 - return v2 -} - -function %bxor_b32x4(b32x4, b32x4) -> b32x4 { -block0(v0: b32x4 [%xmm4], v1: b32x4 [%xmm0]): -[-, %xmm4] v2 = bxor v0, v1 ; bin: 66 0f ef e0 - return v2 -} - -function %band_not_b64x2(b64x2, b64x2) -> b64x2 { -block0(v0: b64x2 [%xmm6], v1: b64x2 [%xmm3]): -[-, %xmm3] v2 = band_not v0, v1 ; bin: 66 0f df de - return v2 -} - -function %x86_ptest_f64x2(f64x2, f64x2) { -block0(v0: f64x2 [%xmm0], v1: f64x2 [%xmm2]): -[-, %rflags] v2 = x86_ptest v0, v1 ; bin: 66 0f 38 17 c2 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-logical-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-logical-legalize.clif deleted file mode 100644 index 5e5bb7ac43..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-logical-legalize.clif +++ /dev/null @@ -1,31 +0,0 @@ -test legalizer -set enable_simd -target x86_64 legacy skylake - -function %bnot_b32x4(b32x4) -> b32x4 { -; check: const0 = 0xffffffffffffffffffffffffffffffff -block0(v0: b32x4): - v1 = bnot v0 - ; check: v2 = vconst.b32x4 const0 - ; nextln: v1 = bxor v2, v0 - return v1 -} - -function %vany_true_b32x4(b32x4) -> b1 { -block0(v0: b32x4): - v1 = vany_true v0 - ; check: v2 = x86_ptest v0, v0 - ; nextln: v1 = trueif ne v2 - return v1 -} - -function %vall_true_i64x2(i64x2) -> b1 { -; check: const0 = 0x00000000000000000000000000000000 -block0(v0: i64x2): - v1 = vall_true v0 - ; check: v2 = vconst.i64x2 const0 - ; nextln: v3 = icmp eq v0, v2 - ; nextln: v4 = x86_ptest v3, v3 - ; nextln: v1 = trueif eq v4 - return v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-logical-rodata.clif b/cranelift/filetests/filetests/isa/x86/simd-logical-rodata.clif deleted file mode 100644 index 6b6b91a915..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-logical-rodata.clif +++ /dev/null @@ -1,11 +0,0 @@ -test rodata -set enable_simd -target x86_64 legacy skylake - -function %bnot_b32x4(b32x4) -> b32x4 { -block0(v0: b32x4): - v1 = bnot v0 - return v1 -} - -; sameln: [FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF] diff --git a/cranelift/filetests/filetests/isa/x86/simd-memory-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-memory-binemit.clif deleted file mode 100644 index 4f8b050d01..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-memory-binemit.clif +++ /dev/null @@ -1,85 +0,0 @@ -test binemit -set enable_simd -target x86_64 legacy skylake - -function %load_store_simple(i64) { -block0(v0: i64 [%rax]): -[-, %xmm0] v10 = load.i32x4 v0 ; bin: heap_oob 0f 10 00 -[-] store v10, v0 ; bin: heap_oob 0f 11 00 - - ; use REX prefix -[-, %xmm8] v12 = load.i8x16 v0 ; bin: heap_oob 44 0f 10 00 -[-] store v12, v0 ; bin: heap_oob 44 0f 11 00 - - return -} - -function %load_store_with_displacement(i64) { -block0(v0: i64 [%rax]): - ; use 8-bit displacement -[-, %xmm0] v1 = load.f32x4 v0+42 ; bin: heap_oob 0f 10 40 2a -[-] store v1, v0+42 ; bin: heap_oob 0f 11 40 2a - - ; use 8-bit displacement with REX prefix -[-, %xmm8] v2 = load.i8x16 v0 ; bin: heap_oob 44 0f 10 00 -[-] store v2, v0 ; bin: heap_oob 44 0f 11 00 - - ; use 32-bit displacement -[-, %xmm0] v3 = load.f32x4 v0+256 ; bin: heap_oob 0f 10 80 00000100 -[-] store v3, v0+256 ; bin: heap_oob 0f 11 80 00000100 - - ; use 32-bit displacement with REX prefix -[-, %xmm8] v4 = load.f32x4 v0+256 ; bin: heap_oob 44 0f 10 80 00000100 -[-] store v4, v0+256 ; bin: heap_oob 44 0f 11 80 00000100 - - return -} - -function %load_store_complex(i64, i64) { -block0(v0: i64 [%rax], v1: i64 [%rbx]): - ; %xmm1 corresponds to ModR/M 0x04; the 0b100 in the R/M slot indicates a SIB byte follows - ; %rax and %rbx form the SIB 0x18 -[-, %xmm1] v10 = load_complex.f64x2 v0+v1 ; bin: heap_oob 40 0f 10 0c 18 - ; enabling bit 6 of the ModR/M byte indicates a disp8 follows -[-] store_complex v10, v0+v1+5 ; bin: heap_oob 40 0f 11 4c 18 05 - - return -} - -function %copy_to_ssa() { -block0: -[-, %xmm1] v0 = copy_to_ssa.i64x2 %xmm3 ; bin: 40 0f 28 cb -[-, %xmm2] v1 = copy_to_ssa.i64x2 %xmm15 ; bin: 41 0f 28 d7 - - return -} - -function %uload_extend() { -block0: - [-,%rdx] v1 = iconst.i64 0x0123_4567_89ab_cdef - [-,%xmm2] v3 = uload8x8 v1+0 ; bin: heap_oob 66 0f 38 30 12 - [-,%xmm2] v4 = uload8x8 v1+20 ; bin: heap_oob 66 0f 38 30 52 14 - [-,%xmm2] v5 = uload8x8 v1+256 ; bin: heap_oob 66 0f 38 30 92 00000100 - [-,%xmm2] v6 = uload16x4 v1+0 ; bin: heap_oob 66 0f 38 33 12 - [-,%xmm2] v7 = uload16x4 v1+20 ; bin: heap_oob 66 0f 38 33 52 14 - [-,%xmm2] v8 = uload16x4 v1+256 ; bin: heap_oob 66 0f 38 33 92 00000100 - [-,%xmm10] v9 = uload32x2 v1+0 ; bin: heap_oob 66 44 0f 38 35 12 - [-,%xmm10] v10 = uload32x2 v1+20 ; bin: heap_oob 66 44 0f 38 35 52 14 - [-,%xmm10] v11 = uload32x2 v1+256 ; bin: heap_oob 66 44 0f 38 35 92 00000100 - return -} - -function %sload_extend() { -block0: - [-,%rdx] v1 = iconst.i64 0x0123_4567_89ab_cdef - [-,%xmm2] v3 = sload8x8 v1+0 ; bin: heap_oob 66 0f 38 20 12 - [-,%xmm2] v4 = sload8x8 v1+20 ; bin: heap_oob 66 0f 38 20 52 14 - [-,%xmm2] v5 = sload8x8 v1+256 ; bin: heap_oob 66 0f 38 20 92 00000100 - [-,%xmm10] v6 = sload16x4 v1+0 ; bin: heap_oob 66 44 0f 38 23 12 - [-,%xmm10] v7 = sload16x4 v1+20 ; bin: heap_oob 66 44 0f 38 23 52 14 - [-,%xmm10] v8 = sload16x4 v1+256 ; bin: heap_oob 66 44 0f 38 23 92 00000100 - [-,%xmm2] v9 = sload32x2 v1+0 ; bin: heap_oob 66 0f 38 25 12 - [-,%xmm2] v10 = sload32x2 v1+20 ; bin: heap_oob 66 0f 38 25 52 14 - [-,%xmm2] v11 = sload32x2 v1+256 ; bin: heap_oob 66 0f 38 25 92 00000100 - return -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-pextr-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-pextr-binemit.clif deleted file mode 100644 index 4141a05b32..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-pextr-binemit.clif +++ /dev/null @@ -1,22 +0,0 @@ -test binemit -set enable_simd -target x86_64 legacy haswell - -function u0:0(i64 fp [%rbp]) -> i32 [%rax], i64 fp [%rbp] system_v { - ss0 = explicit_slot 32, offset -48 - ss1 = spill_slot 16, offset -64 - ss2 = incoming_arg 16, offset -16 - sig0 = () system_v - fn0 = colocated u0:2 sig0 - -block0(v5: i64 [%rbp]): -[-] x86_push v5 -[-] copy_special %rsp -> %rbp -[-] adjust_sp_down_imm 48 -[-,%rax] v0 = stack_addr.i64 ss0 -[-,%xmm15] v4 = load.i32x4 v0 -[-,%rax] v2 = x86_pextr v4, 1 ; bin: 66 44 0f 3a 16 f8 01 -[-] adjust_sp_up_imm 48 -[-] v6 = x86_pop.i64 -[-] return v2, v6 -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-vconst-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-vconst-binemit.clif deleted file mode 100644 index 23aee87655..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-vconst-binemit.clif +++ /dev/null @@ -1,29 +0,0 @@ -test binemit -set opt_level=speed_and_size -set enable_simd -target x86_64 legacy - -function %vconst_b8() { -block0: -[-, %xmm2] v0 = vconst.b8x16 0x01 ; bin: 0f 10 15 00000008 PCRelRodata4(15) -[-, %xmm3] v1 = vconst.b8x16 0x02 ; bin: 0f 10 1d 00000011 PCRelRodata4(31) - return -} - -function %vconst_with_preamble() { -const42 = i32x4 [1 0 0 0] -const43 = i32x4 [2 0 0 0] - -block0: -[-, %xmm2] v0 = vconst.i32x4 const42 ; bin: 0f 10 15 00000008 PCRelRodata4(15) -[-, %xmm3] v1 = vconst.i32x4 const43 ; bin: 0f 10 1d 00000011 PCRelRodata4(31) - return -} - -function %address_of_vconst() { -const42 = i32x4 [1 0 0 0] - -block0: -[-, %rax] v0 = const_addr.i64 const42 ; bin: 48 8d 05 00000001 PCRelRodata4(8) - return -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-vconst-compile.clif b/cranelift/filetests/filetests/isa/x86/simd-vconst-compile.clif deleted file mode 100644 index 477984b344..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-vconst-compile.clif +++ /dev/null @@ -1,16 +0,0 @@ -test compile -set enable_simd=true -set enable_probestack=false -target x86_64 legacy haswell - -; use baldrdash calling convention here for simplicity (avoids prologue, epilogue) -function %vconst_i32() -> i32x4 baldrdash_system_v { -block0: - v0 = vconst.i32x4 0x1234 - return v0 -} -; check: const0 = 0x00000000000000000000000000001234 -; check: block0: -; nextln: v0 = vconst.i32x4 const0 -; nextln: return v0 -; nextln: } diff --git a/cranelift/filetests/filetests/isa/x86/simd-vconst-optimized-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-vconst-optimized-binemit.clif deleted file mode 100644 index 07fa364752..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-vconst-optimized-binemit.clif +++ /dev/null @@ -1,10 +0,0 @@ -test binemit -set enable_simd -target x86_64 legacy - -function %vconst_optimizations() { -block0: -[-, %xmm4] v0 = vconst.b8x16 0x00 ; bin: 66 0f ef e4 -[-, %xmm7] v1 = vconst.b8x16 0xffffffffffffffffffffffffffffffff ; bin: 66 0f 74 ff - return -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-vconst-rodata.clif b/cranelift/filetests/filetests/isa/x86/simd-vconst-rodata.clif deleted file mode 100644 index e7e63e65ea..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-vconst-rodata.clif +++ /dev/null @@ -1,49 +0,0 @@ -test rodata -set enable_simd=true -target x86_64 legacy haswell - -function %vconst_i32() -> i32x4 { -block0: - v0 = vconst.i32x4 0x1234 - return v0 -} - -; sameln: [34, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] - -function %vconst_b16() -> b16x8 { -block0: - v0 = vconst.b16x8 [true false true false true false true true] - return v0 -} - -; sameln: [FF, FF, 0, 0, FF, FF, 0, 0, FF, FF, 0, 0, FF, FF, FF, FF] - - -; Since both jump tables and constants are emitted after the function body, it is important that they do not interfere. -; This test shows that even in the presence of jump tables, constants are emitted correctly -function %vconst_with_jumptables() { -jt0 = jump_table [block0] - -block10: - v10 = iconst.i64 0 - br_table v10, block1, jt0 -block0: - jump block11 -block1: - jump block11 -block11: - v11 = vconst.i8x16 [1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16] - return -} - -; sameln: [1, 2, 3, 4, 5, 6, 7, 8, 9, A, B, C, D, E, F, 10] - -function %vconst_preamble() -> b16x8 { -const42 = i32x4 [0 1 2 3] -const43 = i32x4 [4 5 6 7] -block0: - v0 = vconst.b16x8 const42 - return v0 -} - -; sameln: [0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0] diff --git a/cranelift/filetests/filetests/isa/x86/simd-vselect-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-vselect-binemit.clif deleted file mode 100644 index 275a5e4411..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-vselect-binemit.clif +++ /dev/null @@ -1,27 +0,0 @@ -test binemit -set enable_simd -target x86_64 legacy haswell - -function %vselect_i8x16(b8x16, i8x16, i8x16) { -block0(v0: b8x16 [%xmm0], v1: i8x16 [%xmm3], v2: i8x16 [%xmm5]): -[-, %xmm5] v3 = vselect v0, v1, v2 ; bin: 66 0f 38 10 eb - return -} - -function %vselect_i16x8(b16x8, i16x8, i16x8) { -block0(v0: b16x8 [%xmm0], v1: i16x8 [%xmm3], v2: i16x8 [%xmm5]): -[-, %xmm5] v3 = vselect v0, v1, v2 ; bin: 66 0f 38 10 eb - return -} - -function %vselect_i32x4(b32x4, i32x4, i32x4) { -block0(v0: b32x4 [%xmm0], v1: i32x4 [%xmm3], v2: i32x4 [%xmm5]): -[-, %xmm5] v3 = vselect v0, v1, v2 ; bin: 66 0f 38 14 eb - return -} - -function %vselect_i64x2(b64x2, i64x2, i64x2) { -block0(v0: b64x2 [%xmm0], v1: i64x2 [%xmm3], v2: i64x2 [%xmm5]): -[-, %xmm5] v3 = vselect v0, v1, v2 ; bin: 66 0f 38 15 eb - return -} diff --git a/cranelift/filetests/filetests/isa/x86/simd-vselect-legalize-to-bitselect.clif b/cranelift/filetests/filetests/isa/x86/simd-vselect-legalize-to-bitselect.clif deleted file mode 100644 index 648b3f5584..0000000000 --- a/cranelift/filetests/filetests/isa/x86/simd-vselect-legalize-to-bitselect.clif +++ /dev/null @@ -1,45 +0,0 @@ -test legalizer -set enable_simd -target x86_64 legacy - -;; Test if vselect gets legalized if BLEND* instructions are not available - -function %vselect_i8x16(b8x16, i8x16, i8x16) -> i8x16 { -block0(v0: b8x16, v1: i8x16, v2: i8x16): - v3 = vselect v0, v1, v2 - ; check: v4 = raw_bitcast.i8x16 v0 - ; nextln: v5 = band v1, v4 - ; nextln: v6 = band_not v2, v4 - ; nextln: v3 = bor v5, v6 - return v3 -} - -function %vselect_i16x8(b16x8, i16x8, i16x8) -> i16x8 { -block0(v0: b16x8, v1: i16x8, v2: i16x8): - v3 = vselect v0, v1, v2 - ; check: v4 = raw_bitcast.i16x8 v0 - ; nextln: v5 = band v1, v4 - ; nextln: v6 = band_not v2, v4 - ; nextln: v3 = bor v5, v6 - return v3 -} - -function %vselect_i32x4(b32x4, i32x4, i32x4) -> i32x4 { -block0(v0: b32x4, v1: i32x4, v2: i32x4): - v3 = vselect v0, v1, v2 - ; check: v4 = raw_bitcast.i32x4 v0 - ; nextln: v5 = band v1, v4 - ; nextln: v6 = band_not v2, v4 - ; nextln: v3 = bor v5, v6 - return v3 -} - -function %vselect_i64x2(b64x2, i64x2, i64x2) -> i64x2 { -block0(v0: b64x2, v1: i64x2, v2: i64x2): - v3 = vselect v0, v1, v2 - ; check: v4 = raw_bitcast.i64x2 v0 - ; nextln: v5 = band v1, v4 - ; nextln: v6 = band_not v2, v4 - ; nextln: v3 = bor v5, v6 - return v3 -} diff --git a/cranelift/filetests/filetests/isa/x86/stack-addr32.clif b/cranelift/filetests/filetests/isa/x86/stack-addr32.clif deleted file mode 100644 index f06b3ec0eb..0000000000 --- a/cranelift/filetests/filetests/isa/x86/stack-addr32.clif +++ /dev/null @@ -1,33 +0,0 @@ -; binary emission of stack address instructions on i686. -test binemit -set opt_level=none -target i686 legacy haswell - -; The binary encodings can be verified with the command: -; -; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/stack-addr32.clif | llvm-mc -show-encoding -triple=i686 -; - -function %stack_addr() { - ss0 = incoming_arg 8, offset 0 - ss1 = incoming_arg 1024, offset -1024 - ss2 = incoming_arg 1024, offset -2048 - ss3 = incoming_arg 8, offset -2056 - ss4 = explicit_slot 8, offset 0 - ss5 = explicit_slot 8, offset 1024 - -block0: -[-,%rcx] v0 = stack_addr.i32 ss0 ; bin: 8d 8c 24 00000808 -[-,%rcx] v1 = stack_addr.i32 ss1 ; bin: 8d 8c 24 00000408 -[-,%rcx] v2 = stack_addr.i32 ss2 ; bin: 8d 8c 24 00000008 -[-,%rcx] v3 = stack_addr.i32 ss3 ; bin: 8d 8c 24 00000000 -[-,%rcx] v4 = stack_addr.i32 ss4 ; bin: 8d 8c 24 00000808 -[-,%rcx] v5 = stack_addr.i32 ss5 ; bin: 8d 8c 24 00000c08 - -[-,%rcx] v20 = stack_addr.i32 ss4+1 ; bin: 8d 8c 24 00000809 -[-,%rcx] v21 = stack_addr.i32 ss4+2 ; bin: 8d 8c 24 0000080a -[-,%rcx] v22 = stack_addr.i32 ss4+2048 ; bin: 8d 8c 24 00001008 -[-,%rcx] v23 = stack_addr.i32 ss4-4096 ; bin: 8d 8c 24 fffff808 - - return -} diff --git a/cranelift/filetests/filetests/isa/x86/stack-addr64.clif b/cranelift/filetests/filetests/isa/x86/stack-addr64.clif deleted file mode 100644 index 5b8d5d7ab7..0000000000 --- a/cranelift/filetests/filetests/isa/x86/stack-addr64.clif +++ /dev/null @@ -1,45 +0,0 @@ -; binary emission of stack address instructions on x86-64. -test binemit -set opt_level=none -target x86_64 legacy haswell - -; The binary encodings can be verified with the command: -; -; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/stack-addr64.clif | llvm-mc -show-encoding -triple=x86_64 -; - -function %stack_addr() { - ss0 = incoming_arg 8, offset 0 - ss1 = incoming_arg 1024, offset -1024 - ss2 = incoming_arg 1024, offset -2048 - ss3 = incoming_arg 8, offset -2056 - ss4 = explicit_slot 8, offset 0 - ss5 = explicit_slot 8, offset 1024 - -block0: -[-,%rcx] v0 = stack_addr.i64 ss0 ; bin: 48 8d 8c 24 00000808 -[-,%rcx] v1 = stack_addr.i64 ss1 ; bin: 48 8d 8c 24 00000408 -[-,%rcx] v2 = stack_addr.i64 ss2 ; bin: 48 8d 8c 24 00000008 -[-,%rcx] v3 = stack_addr.i64 ss3 ; bin: 48 8d 8c 24 00000000 -[-,%rcx] v4 = stack_addr.i64 ss4 ; bin: 48 8d 8c 24 00000808 -[-,%rcx] v5 = stack_addr.i64 ss5 ; bin: 48 8d 8c 24 00000c08 - -[-,%rcx] v20 = stack_addr.i64 ss4+1 ; bin: 48 8d 8c 24 00000809 -[-,%rcx] v21 = stack_addr.i64 ss4+2 ; bin: 48 8d 8c 24 0000080a -[-,%rcx] v22 = stack_addr.i64 ss4+2048 ; bin: 48 8d 8c 24 00001008 -[-,%rcx] v23 = stack_addr.i64 ss4-4096 ; bin: 48 8d 8c 24 fffff808 - -[-,%r8] v50 = stack_addr.i64 ss0 ; bin: 4c 8d 84 24 00000808 -[-,%r8] v51 = stack_addr.i64 ss1 ; bin: 4c 8d 84 24 00000408 -[-,%r8] v52 = stack_addr.i64 ss2 ; bin: 4c 8d 84 24 00000008 -[-,%r8] v53 = stack_addr.i64 ss3 ; bin: 4c 8d 84 24 00000000 -[-,%r8] v54 = stack_addr.i64 ss4 ; bin: 4c 8d 84 24 00000808 -[-,%r8] v55 = stack_addr.i64 ss5 ; bin: 4c 8d 84 24 00000c08 - -[-,%r8] v70 = stack_addr.i64 ss4+1 ; bin: 4c 8d 84 24 00000809 -[-,%r8] v71 = stack_addr.i64 ss4+2 ; bin: 4c 8d 84 24 0000080a -[-,%r8] v72 = stack_addr.i64 ss4+2048 ; bin: 4c 8d 84 24 00001008 -[-,%r8] v73 = stack_addr.i64 ss4-4096 ; bin: 4c 8d 84 24 fffff808 - - return -} diff --git a/cranelift/filetests/filetests/isa/x86/stack-load-store64.clif b/cranelift/filetests/filetests/isa/x86/stack-load-store64.clif deleted file mode 100644 index 508fae04d2..0000000000 --- a/cranelift/filetests/filetests/isa/x86/stack-load-store64.clif +++ /dev/null @@ -1,21 +0,0 @@ -; legalization of stack load and store instructions on x86-64. -test legalizer -set opt_level=none -target x86_64 legacy haswell - -function %stack_load_and_store() { - ss0 = explicit_slot 8, offset 0 - -block0: - v0 = stack_load.i64 ss0 - -; check: v1 = stack_addr.i64 ss0 -; check: v0 = load.i64 notrap aligned v1 - - stack_store.i64 v0, ss0 - -; check: v2 = stack_addr.i64 ss0 -; check: store notrap aligned v0, v2 - - return -} diff --git a/cranelift/filetests/filetests/isa/x86/stack-load-store8.clif b/cranelift/filetests/filetests/isa/x86/stack-load-store8.clif deleted file mode 100644 index 0a9f973fac..0000000000 --- a/cranelift/filetests/filetests/isa/x86/stack-load-store8.clif +++ /dev/null @@ -1,19 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0(i8) -> i8 { - ss0 = explicit_slot 1 - -block0(v0: i8): - stack_store v0, ss0 - ; check: v2 = stack_addr.i64 ss0 - ; nextln: v3 = uextend.i32 v0 - ; nextln: istore8 notrap aligned v3, v2 - - v1 = stack_load.i8 ss0 - ; check: v4 = stack_addr.i64 ss0 - ; nextln: v5 = uload8.i32 notrap aligned v4 - ; nextln: v1 = ireduce.i8 v5 - - return v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/struct-arg.clif b/cranelift/filetests/filetests/isa/x86/struct-arg.clif deleted file mode 100644 index 8358e8633a..0000000000 --- a/cranelift/filetests/filetests/isa/x86/struct-arg.clif +++ /dev/null @@ -1,117 +0,0 @@ -test compile -set is_pic -target x86_64 legacy - -function u0:0(i64 sarg(64)) -> i8 system_v { -block0(v0: i64): - v1 = load.i8 v0 - return v1 -} - -; check: function u0:0(sarg_t sarg(64) [0], i64 fp [%rbp]) -> i8 [%rax], i64 fp [%rbp] system_v { -; nextln: ss0 = incoming_arg 64, offset 0 -; nextln: ss1 = incoming_arg 16, offset -16 - -; check: block0(v3: sarg_t [ss0], v5: i64 [%rbp]): -; nextln: [RexOp1pushq#50] x86_push v5 -; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp -; nextln: [RexOp1spaddr_id#808d,%rax] v2 = stack_addr.i64 ss0 -; nextln: v0 -> v2 -; nextln: [RexOp2ld#4b6,%rax] v4 = uload8.i32 v2 -; nextln: [null#00,%rax] v1 = ireduce.i8 v4 -; nextln: [RexOp1popq#58,%rbp] v6 = x86_pop.i64 -; nextln: [Op1ret#c3] return v1, v6 -; nextln: } - -function u0:1(i64, i64 sarg(64)) -> i8 system_v { -block0(v0: i64, v1: i64): - v2 = load.i8 v1 - return v2 -} - -; check: function u0:1(i64 [%rdi], sarg_t sarg(64) [0], i64 fp [%rbp]) -> i8 [%rax], i64 fp [%rbp] system_v { -; nextln: ss0 = incoming_arg 64, offset 0 -; nextln: ss1 = incoming_arg 16, offset -16 - -; check: block0(v0: i64 [%rdi], v4: sarg_t [ss0], v6: i64 [%rbp]): -; nextln: [RexOp1pushq#50] x86_push v6 -; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp -; nextln: [RexOp1spaddr_id#808d,%rax] v3 = stack_addr.i64 ss0 -; nextln: v1 -> v3 -; nextln: [RexOp2ld#4b6,%rax] v5 = uload8.i32 v3 -; nextln: [null#00,%rax] v2 = ireduce.i8 v5 -; nextln: [RexOp1popq#58,%rbp] v7 = x86_pop.i64 -; nextln: [Op1ret#c3] return v2, v7 -; nextln: } - - -function u0:2(i64) -> i8 system_v { -fn1 = u0:0(i64 sarg(64)) -> i8 system_v - -block0(v0: i64): - v1 = call fn1(v0) - return v1 -} - -; check: function u0:2(i64 [%rdi], i64 fp [%rbp]) -> i8 [%rax], i64 fp [%rbp] system_v { -; nextln: ss0 = outgoing_arg 64, offset 0 -; nextln: ss1 = incoming_arg 16, offset -16 -; nextln: sig0 = (sarg_t sarg(64) [0]) -> i8 [%rax] system_v -; nextln: sig1 = (i64 [%rdi], i64 [%rsi], i64 [%rdx]) system_v -; nextln: fn1 = u0:0 sig0 -; nextln: fn2 = %Memcpy sig1 - -; check: block0(v0: i64 [%rdi], v5: i64 [%rbp]): -; nextln: [RexOp1pushq#50] x86_push v5 -; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp -; nextln: [RexOp1adjustsp_ib#d083] adjust_sp_down_imm 64 -; nextln: [RexOp1spaddr_id#808d,%rax] v2 = stack_addr.i64 ss0 -; nextln: [RexOp1pu_id#b8,%rcx] v3 = iconst.i64 64 -; nextln: [RexOp1rmov#8089] regmove v0, %rdi -> %rsi -; nextln: [RexOp1rmov#8089] regmove v2, %rax -> %rdi -; nextln: [RexOp1rmov#8089] regmove v3, %rcx -> %rdx -; nextln: [Op1call_plt_id#e8] call fn2(v2, v0, v3) -; nextln: [dummy_sarg_t#00,ss0] v4 = dummy_sarg_t -; nextln: [Op1call_plt_id#e8,%rax] v1 = call fn1(v4) -; nextln: [RexOp1adjustsp_ib#8083] adjust_sp_up_imm 64 -; nextln: [RexOp1popq#58,%rbp] v6 = x86_pop.i64 -; nextln: [Op1ret#c3] return v1, v6 -; nextln: } - -function u0:3(i64, i64) -> i8 system_v { -fn1 = u0:0(i64, i64 sarg(64)) -> i8 system_v - -block0(v0: i64, v1: i64): - v2 = call fn1(v0, v1) - return v2 -} - -; check: function u0:3(i64 [%rdi], i64 [%rsi], i64 fp [%rbp], i64 csr [%r15]) -> i8 [%rax], i64 fp [%rbp], i64 csr [%r15] system_v { -; nextln: ss0 = outgoing_arg 64, offset 0 -; nextln: ss1 = spill_slot 8, offset -32 -; nextln: ss2 = incoming_arg 24, offset -24 -; nextln: sig0 = (i64 [%rdi], sarg_t sarg(64) [0]) -> i8 [%rax] system_v -; nextln: sig1 = (i64 [%rdi], i64 [%rsi], i64 [%rdx]) system_v -; nextln: fn1 = u0:0 sig0 -; nextln: fn2 = %Memcpy sig1 - -; check: block0(v6: i64 [%rdi], v1: i64 [%rsi], v8: i64 [%rbp], v9: i64 [%r15]): -; nextln: [RexOp1pushq#50] x86_push v8 -; nextln: [RexOp1copysp#8089] copy_special %rsp -> %rbp -; nextln: [RexOp1pushq#50] x86_push v9 -; nextln: [RexOp1adjustsp_ib#d083] adjust_sp_down_imm 72 -; nextln: [RexOp1spillSib32#8089,ss1] v0 = spill v6 -; nextln: [RexOp1spaddr_id#808d,%rax] v3 = stack_addr.i64 ss0 -; nextln: [RexOp1pu_id#b8,%rcx] v4 = iconst.i64 64 -; nextln: [RexOp1rmov#8089] regmove v3, %rax -> %rdi -; nextln: [RexOp1rmov#8089] regmove v4, %rcx -> %rdx -; nextln: [Op1call_plt_id#e8] call fn2(v3, v1, v4) -; nextln: [dummy_sarg_t#00,ss0] v5 = dummy_sarg_t -; nextln: [RexOp1fillSib32#808b,%r15] v7 = fill v0 -; nextln: [RexOp1rmov#8089] regmove v7, %r15 -> %rdi -; nextln: [Op1call_plt_id#e8,%rax] v2 = call fn1(v7, v5) -; nextln: [RexOp1adjustsp_ib#8083] adjust_sp_up_imm 72 -; nextln: [RexOp1popq#58,%r15] v11 = x86_pop.i64 -; nextln: [RexOp1popq#58,%rbp] v10 = x86_pop.i64 -; nextln: [Op1ret#c3] return v2, v10, v11 -; nextln: } diff --git a/cranelift/filetests/filetests/isa/x86/systemv_x64_unwind.clif b/cranelift/filetests/filetests/isa/x86/systemv_x64_unwind.clif deleted file mode 100644 index c5144bfd97..0000000000 --- a/cranelift/filetests/filetests/isa/x86/systemv_x64_unwind.clif +++ /dev/null @@ -1,205 +0,0 @@ -test unwind -set opt_level=speed_and_size -set is_pic -target x86_64-linux legacy haswell - -; check the unwind information with a function with no args -function %no_args() system_v { -block0: - return -} -; sameln: 0x00000000: CIE -; nextln: length: 0x00000014 -; nextln: version: 0x01 -; nextln: code_align: 1 -; nextln: data_align: -8 -; nextln: ra_register: 0x10 -; nextln: DW_CFA_def_cfa (r7, 8) -; nextln: DW_CFA_offset (r16, 1) -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: Instructions: Init State: -; nextln: -; nextln: -; nextln: 0x00000018: FDE -; nextln: length: 0x00000024 -; nextln: CIE_pointer: 0x00000000 -; nextln: start_addr: 0x0000000000000000 -; nextln: range_size: 0x0000000000000006 (end_addr = 0x0000000000000006) -; nextln: Instructions: -; nextln: DW_CFA_advance_loc (1) -; nextln: DW_CFA_def_cfa_offset (16) -; nextln: DW_CFA_offset (r6, 2) -; nextln: DW_CFA_advance_loc (3) -; nextln: DW_CFA_def_cfa_register (r6) -; nextln: DW_CFA_advance_loc (1) -; nextln: DW_CFA_same_value (r6) -; nextln: DW_CFA_def_cfa (r7, 8) -; nextln: DW_CFA_nop - -; check a function with medium-sized stack alloc -function %medium_stack() system_v { - ss0 = explicit_slot 100000 -block0: - return -} -; sameln: 0x00000000: CIE -; nextln: length: 0x00000014 -; nextln: version: 0x01 -; nextln: code_align: 1 -; nextln: data_align: -8 -; nextln: ra_register: 0x10 -; nextln: DW_CFA_def_cfa (r7, 8) -; nextln: DW_CFA_offset (r16, 1) -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: Instructions: Init State: -; nextln: -; nextln: -; nextln: 0x00000018: FDE -; nextln: length: 0x00000024 -; nextln: CIE_pointer: 0x00000000 -; nextln: start_addr: 0x0000000000000000 -; nextln: range_size: 0x000000000000001a (end_addr = 0x000000000000001a) -; nextln: Instructions: -; nextln: DW_CFA_advance_loc (1) -; nextln: DW_CFA_def_cfa_offset (16) -; nextln: DW_CFA_offset (r6, 2) -; nextln: DW_CFA_advance_loc (3) -; nextln: DW_CFA_def_cfa_register (r6) -; nextln: DW_CFA_advance_loc (21) -; nextln: DW_CFA_same_value (r6) -; nextln: DW_CFA_def_cfa (r7, 8) -; nextln: DW_CFA_nop - -; check a function with large-sized stack alloc -function %large_stack() system_v { - ss0 = explicit_slot 524288 -block0: - return -} -; sameln: 0x00000000: CIE -; nextln: length: 0x00000014 -; nextln: version: 0x01 -; nextln: code_align: 1 -; nextln: data_align: -8 -; nextln: ra_register: 0x10 -; nextln: DW_CFA_def_cfa (r7, 8) -; nextln: DW_CFA_offset (r16, 1) -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: Instructions: Init State: -; nextln: -; nextln: -; nextln: 0x00000018: FDE -; nextln: length: 0x00000024 -; nextln: CIE_pointer: 0x00000000 -; nextln: start_addr: 0x0000000000000000 -; nextln: range_size: 0x000000000000001a (end_addr = 0x000000000000001a) -; nextln: Instructions: -; nextln: DW_CFA_advance_loc (1) -; nextln: DW_CFA_def_cfa_offset (16) -; nextln: DW_CFA_offset (r6, 2) -; nextln: DW_CFA_advance_loc (3) -; nextln: DW_CFA_def_cfa_register (r6) -; nextln: DW_CFA_advance_loc (21) -; nextln: DW_CFA_same_value (r6) -; nextln: DW_CFA_def_cfa (r7, 8) -; nextln: DW_CFA_nop -; nextln: - -; check a function that has CSRs -function %lots_of_registers(i64, i64) system_v { -block0(v0: i64, v1: i64): - v2 = load.i32 v0+0 - v3 = load.i32 v0+8 - v4 = load.i32 v0+16 - v5 = load.i32 v0+24 - v6 = load.i32 v0+32 - v7 = load.i32 v0+40 - v8 = load.i32 v0+48 - v9 = load.i32 v0+56 - v10 = load.i32 v0+64 - v11 = load.i32 v0+72 - v12 = load.i32 v0+80 - v13 = load.i32 v0+88 - v14 = load.i32 v0+96 - store.i32 v2, v1+0 - store.i32 v3, v1+8 - store.i32 v4, v1+16 - store.i32 v5, v1+24 - store.i32 v6, v1+32 - store.i32 v7, v1+40 - store.i32 v8, v1+48 - store.i32 v9, v1+56 - store.i32 v10, v1+64 - store.i32 v11, v1+72 - store.i32 v12, v1+80 - store.i32 v13, v1+88 - store.i32 v14, v1+96 - return -} -; sameln: 0x00000000: CIE -; nextln: length: 0x00000014 -; nextln: version: 0x01 -; nextln: code_align: 1 -; nextln: data_align: -8 -; nextln: ra_register: 0x10 -; nextln: DW_CFA_def_cfa (r7, 8) -; nextln: DW_CFA_offset (r16, 1) -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: DW_CFA_nop -; nextln: Instructions: Init State: -; nextln: -; nextln: -; nextln: 0x00000018: FDE -; nextln: length: 0x00000044 -; nextln: CIE_pointer: 0x00000000 -; nextln: start_addr: 0x0000000000000000 -; nextln: range_size: 0x0000000000000074 (end_addr = 0x0000000000000074) -; nextln: Instructions: -; nextln: DW_CFA_advance_loc (1) -; nextln: DW_CFA_def_cfa_offset (16) -; nextln: DW_CFA_offset (r6, 2) -; nextln: DW_CFA_advance_loc (3) -; nextln: DW_CFA_def_cfa_register (r6) -; nextln: DW_CFA_advance_loc (1) -; nextln: DW_CFA_offset (r3, 3) -; nextln: DW_CFA_advance_loc (2) -; nextln: DW_CFA_offset (r12, 4) -; nextln: DW_CFA_advance_loc (2) -; nextln: DW_CFA_offset (r13, 5) -; nextln: DW_CFA_advance_loc (2) -; nextln: DW_CFA_offset (r14, 6) -; nextln: DW_CFA_advance_loc (2) -; nextln: DW_CFA_offset (r15, 7) -; nextln: DW_CFA_advance_loc (94) -; nextln: DW_CFA_same_value (r15) -; nextln: DW_CFA_advance_loc (2) -; nextln: DW_CFA_same_value (r14) -; nextln: DW_CFA_advance_loc (2) -; nextln: DW_CFA_same_value (r13) -; nextln: DW_CFA_advance_loc (2) -; nextln: DW_CFA_same_value (r12) -; nextln: DW_CFA_advance_loc (1) -; nextln: DW_CFA_same_value (r3) -; nextln: DW_CFA_advance_loc (1) -; nextln: DW_CFA_same_value (r6) -; nextln: DW_CFA_def_cfa (r7, 8) -; nextln: DW_CFA_nop diff --git a/cranelift/filetests/filetests/isa/x86/tls_elf.clif b/cranelift/filetests/filetests/isa/x86/tls_elf.clif deleted file mode 100644 index 2c957e0b9a..0000000000 --- a/cranelift/filetests/filetests/isa/x86/tls_elf.clif +++ /dev/null @@ -1,18 +0,0 @@ -test regalloc -set tls_model=elf_gd -target x86_64 legacy - -function u0:0(i32) -> i32, i64 { -gv0 = symbol colocated tls u1:0 - -block0(v0: i32): - ; check: block0(v2: i32 [%rdi]): - ; nextln: [RexOp1spillSib32#89,ss0] v0 = spill v2 - v1 = global_value.i64 gv0 - ; nextln: [elf_tls_get_addr#00,%rax] v1 = x86_elf_tls_get_addr gv0 - ; nextln: [RexOp1fillSib32#8b,%r15] v3 = fill v0 - return v0, v1 - ; nextln: [RexOp1rmov#8089] regmove v1, %rax -> %rdx - ; nextln: [RexOp1rmov#89] regmove v3, %r15 -> %rax - ; nextln: [Op1ret#c3] return v3, v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/tls_enc.clif b/cranelift/filetests/filetests/isa/x86/tls_enc.clif deleted file mode 100644 index d3481a15bf..0000000000 --- a/cranelift/filetests/filetests/isa/x86/tls_enc.clif +++ /dev/null @@ -1,11 +0,0 @@ -test binemit -target x86_64 legacy - -function u0:0() -> i64, i64 { -gv0 = symbol colocated tls u1:0 - -block0: - [-, %rax] v0 = x86_elf_tls_get_addr gv0 ; bin: 66 48 8d 3d ElfX86_64TlsGd(u1:0-4) 00000000 66 66 48 e8 CallPLTRel4(%ElfTlsGetAddr-4) 00000000 - [-, %rax] v1 = x86_macho_tls_get_addr gv0; bin: 48 8b 3d MachOX86_64Tlv(u1:0-4) 00000000 ff 17 - return v0, v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/tls_macho.clif b/cranelift/filetests/filetests/isa/x86/tls_macho.clif deleted file mode 100644 index 3747ac9f05..0000000000 --- a/cranelift/filetests/filetests/isa/x86/tls_macho.clif +++ /dev/null @@ -1,18 +0,0 @@ -test regalloc -set tls_model=macho -target x86_64 legacy - -function u0:0(i32) -> i32, i64 { -gv0 = symbol colocated tls u1:0 - -block0(v0: i32): - ; check: block0(v2: i32 [%rdi]): - ; nextln: [RexOp1spillSib32#89,ss0] v0 = spill v2 - v1 = global_value.i64 gv0 - ; nextln: [macho_tls_get_addr#00,%rax] v1 = x86_macho_tls_get_addr gv0 - ; nextln: [RexOp1fillSib32#8b,%r15] v3 = fill v0 - return v0, v1 - ; nextln: [RexOp1rmov#8089] regmove v1, %rax -> %rdx - ; nextln: [RexOp1rmov#89] regmove v3, %r15 -> %rax - ; nextln: [Op1ret#c3] return v3, v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/uextend-i8-to-i16.clif b/cranelift/filetests/filetests/isa/x86/uextend-i8-to-i16.clif deleted file mode 100644 index 931b6e0aca..0000000000 --- a/cranelift/filetests/filetests/isa/x86/uextend-i8-to-i16.clif +++ /dev/null @@ -1,14 +0,0 @@ -test compile -target x86_64 legacy - -function u0:0(i8) -> i16 fast { -block0(v0: i8): - v1 = uextend.i16 v0 - return v1 -} - -function u0:1(i8) -> i16 fast { -block0(v0: i8): - v1 = sextend.i16 v0 - return v1 -} diff --git a/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64.clif b/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64.clif deleted file mode 100644 index 13cf504d13..0000000000 --- a/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64.clif +++ /dev/null @@ -1,255 +0,0 @@ -test compile -set opt_level=speed_and_size -set is_pic -target x86_64 legacy haswell - -; check if for one arg we use the right register -function %one_arg(i64) windows_fastcall { -block0(v0: i64): - return -} -; check: function %one_arg(i64 [%rcx], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall { -; nextln: ss0 = incoming_arg 16, offset -16 -; check: block0(v0: i64 [%rcx], v1: i64 [%rbp]): -; nextln: x86_push v1 -; nextln: copy_special %rsp -> %rbp -; nextln: v2 = x86_pop.i64 -; nextln: return v2 -; nextln: } - -; check if we still use registers for 4 arguments -function %four_args(i64, i64, i64, i64) windows_fastcall { -block0(v0: i64, v1: i64, v2: i64, v3: i64): - return -} -; check: function %four_args(i64 [%rcx], i64 [%rdx], i64 [%r8], i64 [%r9], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall { -; nextln: ss0 = incoming_arg 16, offset -16 -; check: block0(v0: i64 [%rcx], v1: i64 [%rdx], v2: i64 [%r8], v3: i64 [%r9], v4: i64 [%rbp]): -; nextln: x86_push v4 -; nextln: copy_special %rsp -> %rbp -; nextln: v5 = x86_pop.i64 -; nextln: return v5 -; nextln: } - -; check if float arguments are passed through XMM registers -function %four_float_args(f64, f64, f64, f64) windows_fastcall { -block0(v0: f64, v1: f64, v2: f64, v3: f64): - return -} -; check: function %four_float_args(f64 [%xmm0], f64 [%xmm1], f64 [%xmm2], f64 [%xmm3], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall { -; nextln: ss0 = incoming_arg 16, offset -16 -; check: block0(v0: f64 [%xmm0], v1: f64 [%xmm1], v2: f64 [%xmm2], v3: f64 [%xmm3], v4: i64 [%rbp]): -; nextln: x86_push v4 -; nextln: copy_special %rsp -> %rbp -; nextln: v5 = x86_pop.i64 -; nextln: return v5 -; nextln: } - -; check if we use stack space for > 4 arguments -function %five_args(i64, i64, i64, i64, i64) windows_fastcall { -block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64): - return -} -; check: function %five_args(i64 [%rcx], i64 [%rdx], i64 [%r8], i64 [%r9], i64 [32], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall { -; nextln: ss0 = incoming_arg 8, offset 32 -; nextln: ss1 = incoming_arg 16, offset -16 -; check: block0(v0: i64 [%rcx], v1: i64 [%rdx], v2: i64 [%r8], v3: i64 [%r9], v4: i64 [ss0], v5: i64 [%rbp]): -; nextln: x86_push v5 -; nextln: copy_special %rsp -> %rbp -; nextln: v6 = x86_pop.i64 -; nextln: return v6 -; nextln: } - -; check that we preserve xmm6 and above if we're using them locally -function %float_callee_saves(f64, f64, f64, f64) windows_fastcall { -block0(v0: f64, v1: f64, v2: f64, v3: f64): -; explicitly use a callee-save register -[-, %xmm6] v4 = fadd v0, v1 -[-, %xmm7] v5 = fadd v0, v1 - return -} -; check: function %float_callee_sav(f64 [%xmm0], f64 [%xmm1], f64 [%xmm2], f64 [%xmm3], i64 csr [%rsp], i64 fp [%rbp], f64x2 csr [%xmm6], f64x2 csr [%xmm7]) -> i64 fp [%rbp], f64x2 csr [%xmm6], f64x2 csr [%xmm7] windows_fastcall { -; nextln: ss0 = incoming_arg 48, offset -48 -; check: block0(v0: f64 [%xmm0], v1: f64 [%xmm1], v2: f64 [%xmm2], v3: f64 [%xmm3], v6: i64 [%rsp], v7: i64 [%rbp], v8: f64x2 [%xmm6], v9: f64x2 [%xmm7]): -; nextln: x86_push v7 -; nextln: copy_special %rsp -> %rbp -; nextln: adjust_sp_down_imm 32 -; nextln: store notrap aligned v8, v6+16 -; nextln: store notrap aligned v9, v6 -; nextln: v11 = load.f64x2 notrap aligned v6+16 -; nextln: v12 = load.f64x2 notrap aligned v6 -; nextln: adjust_sp_up_imm 32 -; nextln: v10 = x86_pop.i64 -; nextln: return v10, v11, v12 -; nextln: } - -function %mixed_int_float(i64, f64, i64, f32) windows_fastcall { -block0(v0: i64, v1: f64, v2: i64, v3: f32): - return -} -; check: function %mixed_int_float(i64 [%rcx], f64 [%xmm1], i64 [%r8], f32 [%xmm3], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall { -; nextln: ss0 = incoming_arg 16, offset -16 -; check: block0(v0: i64 [%rcx], v1: f64 [%xmm1], v2: i64 [%r8], v3: f32 [%xmm3], v4: i64 [%rbp]): -; nextln: x86_push v4 -; nextln: copy_special %rsp -> %rbp -; nextln: v5 = x86_pop.i64 -; nextln: return v5 -; nextln: } - -function %ret_val_float(f32, f64, i64, i64) -> f64 windows_fastcall { -block0(v0: f32, v1: f64, v2: i64, v3: i64): - return v1 -} -; check: function %ret_val_float(f32 [%xmm0], f64 [%xmm1], i64 [%r8], i64 [%r9], i64 fp [%rbp]) -> f64 [%xmm0], i64 fp [%rbp] windows_fastcall { -; nextln: ss0 = incoming_arg 16, offset -16 -; check: block0(v0: f32 [%xmm0], v1: f64 [%xmm1], v2: i64 [%r8], v3: i64 [%r9], v4: i64 [%rbp]): -; nextln: x86_push v4 -; nextln: copy_special %rsp -> %rbp -; nextln: regmove v1, %xmm1 -> %xmm0 -; nextln: v5 = x86_pop.i64 -; nextln: return v1, v5 -; nextln: } - -function %ret_val_i128(i64, i64) -> i128 windows_fastcall { -block0(v0: i64, v1: i64): - v2 = iconcat v0, v1 - return v2 -} -; check: function %ret_val_i128(i64 [%rdx], i64 [%r8], i64 sret [%rcx], i64 fp [%rbp]) -> i64 sret [%rax], i64 fp [%rbp] windows_fastcall { - -; check if i128 is passed by reference -function %i128_arg(i128) windows_fastcall { -block0(v0: i128): - return -} -; check: function %i128_arg(i64 ptr [%rcx], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall { - -; check if vector types are passed by reference -function %i32x4_arg(i32x4) windows_fastcall { -block0(v0: i32x4): - return -} -; check: function %i32x4_arg(i64 ptr [%rcx], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall { - -function %internal_stack_arg_function_call(i64) -> i64 windows_fastcall { - fn0 = %foo(i64, i64, i64, i64) -> i64 windows_fastcall - fn1 = %foo2(i64, i64, i64, i64) -> i64 windows_fastcall -block0(v0: i64): - v1 = load.i64 v0+0 - v2 = load.i64 v0+8 - v3 = load.i64 v0+16 - v4 = load.i64 v0+24 - v5 = load.i64 v0+32 - v6 = load.i64 v0+40 - v7 = load.i64 v0+48 - v8 = load.i64 v0+56 - v9 = load.i64 v0+64 - v10 = call fn0(v1, v2, v3, v4) - store.i64 v1, v0+8 - store.i64 v2, v0+16 - store.i64 v3, v0+24 - store.i64 v4, v0+32 - store.i64 v5, v0+40 - store.i64 v6, v0+48 - store.i64 v7, v0+56 - store.i64 v8, v0+64 - store.i64 v9, v0+72 - return v10 -} -; check: function %internal_stack_a(i64 [%rcx], i64 fp [%rbp], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15]) -> i64 [%rax], i64 fp [%rbp], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15] windows_fastcall { -; nextln: ss0 = spill_slot 8, offset -56 -; nextln: ss1 = spill_slot 8, offset -64 -; nextln: ss2 = spill_slot 8, offset -72 -; nextln: ss3 = spill_slot 8, offset -80 -; nextln: ss4 = spill_slot 8, offset -88 -; nextln: ss5 = spill_slot 8, offset -96 -; nextln: ss6 = spill_slot 8, offset -104 -; nextln: ss7 = spill_slot 8, offset -112 -; nextln: ss8 = spill_slot 8, offset -120 -; nextln: ss9 = spill_slot 8, offset -128 -; nextln: ss10 = incoming_arg 48, offset -48 -; nextln: ss11 = explicit_slot 32, offset -160 -; nextln: sig0 = (i64 [%rcx], i64 [%rdx], i64 [%r8], i64 [%r9]) -> i64 [%rax] windows_fastcall -; nextln: sig1 = (i64 [%rcx], i64 [%rdx], i64 [%r8], i64 [%r9]) -> i64 [%rax] windows_fastcall -; nextln: fn0 = %foo sig0 -; nextln: fn1 = %foo2 sig1 -; check: block0(v11: i64 [%rcx], v52: i64 [%rbp], v53: i64 [%r12], v54: i64 [%r13], v55: i64 [%r14], v56: i64 [%r15]): -; nextln: x86_push v52 -; nextln: copy_special %rsp -> %rbp -; nextln: x86_push v53 -; nextln: x86_push v54 -; nextln: x86_push v55 -; nextln: x86_push v56 -; nextln: adjust_sp_down_imm 112 -; nextln: v0 = spill v11 -; nextln: v12 = copy_to_ssa.i64 %rcx -; nextln: v13 = load.i64 v12 -; nextln: v1 = spill v13 -; nextln: v14 = fill_nop v0 -; nextln: v15 = load.i64 v14+8 -; nextln: v2 = spill v15 -; nextln: v16 = fill_nop v0 -; nextln: v17 = load.i64 v16+16 -; nextln: v3 = spill v17 -; nextln: v18 = fill_nop v0 -; nextln: v19 = load.i64 v18+24 -; nextln: v4 = spill v19 -; nextln: v20 = fill_nop v0 -; nextln: v21 = load.i64 v20+32 -; nextln: v5 = spill v21 -; nextln: v22 = fill_nop v0 -; nextln: v23 = load.i64 v22+40 -; nextln: v6 = spill v23 -; nextln: v24 = fill_nop v0 -; nextln: v25 = load.i64 v24+48 -; nextln: v7 = spill v25 -; nextln: v26 = fill_nop v0 -; nextln: v27 = load.i64 v26+56 -; nextln: v8 = spill v27 -; nextln: v28 = fill_nop v0 -; nextln: v29 = load.i64 v28+64 -; nextln: v9 = spill v29 -; nextln: v30 = fill v1 -; nextln: v31 = fill v2 -; nextln: v32 = fill v3 -; nextln: v33 = fill v4 -; nextln: regmove v30, %r15 -> %rcx -; nextln: regmove v31, %r14 -> %rdx -; nextln: regmove v32, %r13 -> %r8 -; nextln: regmove v33, %r12 -> %r9 -; nextln: v10 = call fn0(v30, v31, v32, v33) -; nextln: v34 = fill v1 -; nextln: v35 = fill v0 -; nextln: store v34, v35+8 -; nextln: v36 = fill v2 -; nextln: v37 = fill_nop v0 -; nextln: store v36, v37+16 -; nextln: v38 = fill v3 -; nextln: v39 = fill_nop v0 -; nextln: store v38, v39+24 -; nextln: v40 = fill v4 -; nextln: v41 = fill_nop v0 -; nextln: store v40, v41+32 -; nextln: v42 = fill v5 -; nextln: v43 = fill_nop v0 -; nextln: store v42, v43+40 -; nextln: v44 = fill v6 -; nextln: v45 = fill_nop v0 -; nextln: store v44, v45+48 -; nextln: v46 = fill v7 -; nextln: v47 = fill_nop v0 -; nextln: store v46, v47+56 -; nextln: v48 = fill v8 -; nextln: v49 = fill_nop v0 -; nextln: store v48, v49+64 -; nextln: v50 = fill v9 -; nextln: v51 = fill_nop v0 -; nextln: store v50, v51+72 -; nextln: adjust_sp_up_imm 112 -; nextln: v61 = x86_pop.i64 -; nextln: v60 = x86_pop.i64 -; nextln: v59 = x86_pop.i64 -; nextln: v58 = x86_pop.i64 -; nextln: v57 = x86_pop.i64 -; nextln: return v10, v57, v58, v59, v60, v61 -; nextln: } diff --git a/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64_unwind.clif b/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64_unwind.clif deleted file mode 100644 index 547e131fbd..0000000000 --- a/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64_unwind.clif +++ /dev/null @@ -1,250 +0,0 @@ -test unwind -set opt_level=speed_and_size -set is_pic -target x86_64-windows legacy haswell - -; check the unwind information with a leaf function with no args -function %no_args_leaf() windows_fastcall { -block0: - return -} -; sameln: version: 1 -; nextln: flags: 0 -; nextln: prologue size: 4 -; nextln: frame register: 0 -; nextln: frame register offset: 0 -; nextln: unwind codes: 1 -; nextln: -; nextln: offset: 1 -; nextln: op: PushNonvolatileRegister -; nextln: info: 5 - -; check the unwind information with a non-leaf function with no args -function %no_args() windows_fastcall { - fn0 = %foo() -block0: - call fn0() - return -} -; sameln: version: 1 -; nextln: flags: 0 -; nextln: prologue size: 8 -; nextln: frame register: 0 -; nextln: frame register offset: 0 -; nextln: unwind codes: 2 -; nextln: -; nextln: offset: 1 -; nextln: op: PushNonvolatileRegister -; nextln: info: 5 -; nextln: -; nextln: offset: 8 -; nextln: op: SmallStackAlloc -; nextln: info: 3 - -; check a function with medium-sized stack alloc -function %medium_stack() windows_fastcall { - ss0 = explicit_slot 100000 -block0: - return -} -; sameln: version: 1 -; nextln: flags: 0 -; nextln: prologue size: 17 -; nextln: frame register: 0 -; nextln: frame register offset: 0 -; nextln: unwind codes: 2 -; nextln: -; nextln: offset: 1 -; nextln: op: PushNonvolatileRegister -; nextln: info: 5 -; nextln: -; nextln: offset: 17 -; nextln: op: LargeStackAlloc -; nextln: info: 0 -; nextln: value: 12500 (u16) - -; check a function with large-sized stack alloc -function %large_stack() windows_fastcall { - ss0 = explicit_slot 524288 -block0: - return -} -; sameln: version: 1 -; nextln: flags: 0 -; nextln: prologue size: 17 -; nextln: frame register: 0 -; nextln: frame register offset: 0 -; nextln: unwind codes: 2 -; nextln: -; nextln: offset: 1 -; nextln: op: PushNonvolatileRegister -; nextln: info: 5 -; nextln: -; nextln: offset: 17 -; nextln: op: LargeStackAlloc -; nextln: info: 1 -; nextln: value: 524288 (u32) - -function %fpr_with_function_call(i64, i64) windows_fastcall { - fn0 = %foo(f64, f64, i64, i64, i64) windows_fastcall; -block0(v0: i64, v1: i64): - v2 = load.f64 v0+0 - v3 = load.f64 v0+8 - v4 = load.i64 v0+16 - v15 = load.f64 v0+104 - v16 = load.f64 v0+112 - v17 = load.f64 v0+120 - v18 = load.f64 v0+128 - v19 = load.f64 v0+136 - v20 = load.f64 v0+144 - v21 = load.f64 v0+152 - v22 = load.f64 v0+160 - v23 = load.f64 v0+168 - call fn0(v2, v3, v4, v1, v1) - store.f64 v15, v1+104 - store.f64 v16, v1+112 - store.f64 v17, v1+120 - store.f64 v18, v1+128 - store.f64 v19, v1+136 - store.f64 v20, v1+144 - store.f64 v21, v1+152 - store.f64 v22, v1+160 - store.f64 v23, v1+168 - return -} -; Only check the first unwind code here because this test specifically looks to -; see that in a function that is not a leaf, a callee-save FPR is stored in an -; area that does not overlap either the callee's shadow space or stack argument -; space. -; -; sameln: version: 1 -; nextln: flags: 0 -; nextln: prologue size: 22 -; nextln: frame register: 0 -; nextln: frame register offset: 0 -; nextln: unwind codes: 4 -; nextln: -; nextln: offset: 1 -; nextln: op: PushNonvolatileRegister -; nextln: info: 5 -; nextln: -; nextln: offset: 6 -; nextln: op: PushNonvolatileRegister -; nextln: info: 15 -; nextln: -; nextln: offset: 13 -; nextln: op: LargeStackAlloc -; nextln: info: 0 -; nextln: value: 23 (u16) -; nextln: -; nextln: offset: 22 -; nextln: op: SaveXmm128 -; nextln: info: 15 -; nextln: value: 10 (u16) - -; check a function that has CSRs -function %lots_of_registers(i64, i64) windows_fastcall { -block0(v0: i64, v1: i64): - v2 = load.i32 v0+0 - v3 = load.i32 v0+8 - v4 = load.i32 v0+16 - v5 = load.i32 v0+24 - v6 = load.i32 v0+32 - v7 = load.i32 v0+40 - v8 = load.i32 v0+48 - v9 = load.i32 v0+56 - v10 = load.i32 v0+64 - v11 = load.i32 v0+72 - v12 = load.i32 v0+80 - v13 = load.i32 v0+88 - v14 = load.i32 v0+96 - v15 = load.f64 v0+104 - v16 = load.f64 v0+112 - v17 = load.f64 v0+120 - v18 = load.f64 v0+128 - v19 = load.f64 v0+136 - v20 = load.f64 v0+144 - v21 = load.f64 v0+152 - v22 = load.f64 v0+160 - v23 = load.f64 v0+168 - store.i32 v2, v1+0 - store.i32 v3, v1+8 - store.i32 v4, v1+16 - store.i32 v5, v1+24 - store.i32 v6, v1+32 - store.i32 v7, v1+40 - store.i32 v8, v1+48 - store.i32 v9, v1+56 - store.i32 v10, v1+64 - store.i32 v11, v1+72 - store.i32 v12, v1+80 - store.i32 v13, v1+88 - store.i32 v14, v1+96 - store.f64 v15, v1+104 - store.f64 v16, v1+112 - store.f64 v17, v1+120 - store.f64 v18, v1+128 - store.f64 v19, v1+136 - store.f64 v20, v1+144 - store.f64 v21, v1+152 - store.f64 v22, v1+160 - store.f64 v23, v1+168 - return -} -; sameln: version: 1 -; nextln: flags: 0 -; nextln: prologue size: 35 -; nextln: frame register: 0 -; nextln: frame register offset: 0 -; nextln: unwind codes: 12 -; nextln: -; nextln: offset: 1 -; nextln: op: PushNonvolatileRegister -; nextln: info: 5 -; nextln: -; nextln: offset: 5 -; nextln: op: PushNonvolatileRegister -; nextln: info: 3 -; nextln: -; nextln: offset: 6 -; nextln: op: PushNonvolatileRegister -; nextln: info: 6 -; nextln: -; nextln: offset: 7 -; nextln: op: PushNonvolatileRegister -; nextln: info: 7 -; nextln: -; nextln: offset: 9 -; nextln: op: PushNonvolatileRegister -; nextln: info: 12 -; nextln: -; nextln: offset: 11 -; nextln: op: PushNonvolatileRegister -; nextln: info: 13 -; nextln: -; nextln: offset: 13 -; nextln: op: PushNonvolatileRegister -; nextln: info: 14 -; nextln: -; nextln: offset: 15 -; nextln: op: PushNonvolatileRegister -; nextln: info: 15 -; nextln: -; nextln: offset: 19 -; nextln: op: SmallStackAlloc -; nextln: info: 8 -; nextln: -; nextln: offset: 24 -; nextln: op: SaveXmm128 -; nextln: info: 6 -; nextln: value: 3 (u16) -; nextln: -; nextln: offset: 29 -; nextln: op: SaveXmm128 -; nextln: info: 7 -; nextln: value: 2 (u16) -; nextln: -; nextln: offset: 35 -; nextln: op: SaveXmm128 -; nextln: info: 8 -; nextln: value: 1 (u16) diff --git a/cranelift/filetests/filetests/legalizer/bitrev-i128.clif b/cranelift/filetests/filetests/legalizer/bitrev-i128.clif deleted file mode 100644 index b58bf9bcb5..0000000000 --- a/cranelift/filetests/filetests/legalizer/bitrev-i128.clif +++ /dev/null @@ -1,89 +0,0 @@ -test legalizer -target x86_64 legacy - -function %reverse_bits(i128) -> i128 { -block0(v0: i128): - v1 = bitrev.i128 v0 - return v1 -} - -; check: block0(v2: i64, v3: i64): -; check: v0 = iconcat v2, v3 -; check: v33 = iconst.i64 0xaaaa_aaaa_aaaa_aaaa -; check: v6 = band v2, v33 -; check: v7 = ushr_imm v6, 1 -; check: v34 = iconst.i64 0x5555_5555_5555_5555 -; check: v8 = band v2, v34 -; check: v9 = ishl_imm v8, 1 -; check: v10 = bor v7, v9 -; check: v35 = iconst.i64 0xcccc_cccc_cccc_cccc -; check: v11 = band v10, v35 -; check: v12 = ushr_imm v11, 2 -; check: v36 = iconst.i64 0x3333_3333_3333_3333 -; check: v13 = band v10, v36 -; check: v14 = ishl_imm v13, 2 -; check: v15 = bor v12, v14 -; check: v37 = iconst.i64 0xf0f0_f0f0_f0f0_f0f0 -; check: v16 = band v15, v37 -; check: v17 = ushr_imm v16, 4 -; check: v38 = iconst.i64 0x0f0f_0f0f_0f0f_0f0f -; check: v18 = band v15, v38 -; check: v19 = ishl_imm v18, 4 -; check: v20 = bor v17, v19 -; check: v39 = iconst.i64 0xff00_ff00_ff00_ff00 -; check: v21 = band v20, v39 -; check: v22 = ushr_imm v21, 8 -; check: v40 = iconst.i64 0x00ff_00ff_00ff_00ff -; check: v23 = band v20, v40 -; check: v24 = ishl_imm v23, 8 -; check: v25 = bor v22, v24 -; check: v41 = iconst.i64 0xffff_0000_ffff_0000 -; check: v26 = band v25, v41 -; check: v27 = ushr_imm v26, 16 -; check: v42 = iconst.i64 0xffff_0000_ffff -; check: v28 = band v25, v42 -; check: v29 = ishl_imm v28, 16 -; check: v30 = bor v27, v29 -; check: v31 = ushr_imm v30, 32 -; check: v32 = ishl_imm v30, 32 -; check: v4 = bor v31, v32 -; check: v70 = iconst.i64 0xaaaa_aaaa_aaaa_aaaa -; check: v43 = band v3, v70 -; check: v44 = ushr_imm v43, 1 -; check: v71 = iconst.i64 0x5555_5555_5555_5555 -; check: v45 = band v3, v71 -; check: v46 = ishl_imm v45, 1 -; check: v47 = bor v44, v46 -; check: v72 = iconst.i64 0xcccc_cccc_cccc_cccc -; check: v48 = band v47, v72 -; check: v49 = ushr_imm v48, 2 -; check: v73 = iconst.i64 0x3333_3333_3333_3333 -; check: v50 = band v47, v73 -; check: v51 = ishl_imm v50, 2 -; check: v52 = bor v49, v51 -; check: v74 = iconst.i64 0xf0f0_f0f0_f0f0_f0f0 -; check: v53 = band v52, v74 -; check: v54 = ushr_imm v53, 4 -; check: v75 = iconst.i64 0x0f0f_0f0f_0f0f_0f0f -; check: v55 = band v52, v75 -; check: v56 = ishl_imm v55, 4 -; check: v57 = bor v54, v56 -; check: v76 = iconst.i64 0xff00_ff00_ff00_ff00 -; check: v58 = band v57, v76 -; check: v59 = ushr_imm v58, 8 -; check: v77 = iconst.i64 0x00ff_00ff_00ff_00ff -; check: v60 = band v57, v77 -; check: v61 = ishl_imm v60, 8 -; check: v62 = bor v59, v61 -; check: v78 = iconst.i64 0xffff_0000_ffff_0000 -; check: v63 = band v62, v78 -; check: v64 = ushr_imm v63, 16 -; check: v79 = iconst.i64 0xffff_0000_ffff -; check: v65 = band v62, v79 -; check: v66 = ishl_imm v65, 16 -; check: v67 = bor v64, v66 -; check: v68 = ushr_imm v67, 32 -; check: v69 = ishl_imm v67, 32 -; check: v5 = bor v68, v69 -; check: v1 = iconcat v5, v4 -; check: return v5, v4 diff --git a/cranelift/filetests/filetests/legalizer/bitrev.clif b/cranelift/filetests/filetests/legalizer/bitrev.clif deleted file mode 100644 index 6c9ead0fe2..0000000000 --- a/cranelift/filetests/filetests/legalizer/bitrev.clif +++ /dev/null @@ -1,206 +0,0 @@ -test legalizer -target x86_64 legacy - -function %reverse_bits_8(i8) -> i8 { -block0(v0: i8): - v1 = bitrev.i8 v0 - return v1 -} -; check: v16 = uextend.i32 v0 -; check: v17 = band_imm v16, 170 -; check: v2 = ireduce.i8 v17 -; check: v18 = uextend.i32 v2 -; check: v19 = ushr_imm v18, 1 -; check: v3 = ireduce.i8 v19 -; check: v20 = uextend.i32 v0 -; check: v21 = band_imm v20, 85 -; check: v4 = ireduce.i8 v21 -; check: v22 = uextend.i32 v4 -; check: v23 = ishl_imm v22, 1 -; check: v5 = ireduce.i8 v23 -; check: v24 = uextend.i32 v3 -; check: v25 = uextend.i32 v5 -; check: v26 = bor v24, v25 -; check: v6 = ireduce.i8 v26 -; check: v27 = uextend.i32 v6 -; check: v28 = band_imm v27, 204 -; check: v7 = ireduce.i8 v28 -; check: v29 = uextend.i32 v7 -; check: v30 = ushr_imm v29, 2 -; check: v8 = ireduce.i8 v30 -; check: v31 = uextend.i32 v6 -; check: v32 = band_imm v31, 51 -; check: v9 = ireduce.i8 v32 -; check: v33 = uextend.i32 v9 -; check: v34 = ishl_imm v33, 2 -; check: v10 = ireduce.i8 v34 -; check: v35 = uextend.i32 v8 -; check: v36 = uextend.i32 v10 -; check: v37 = bor v35, v36 -; check: v11 = ireduce.i8 v37 -; check: v38 = uextend.i32 v11 -; check: v39 = band_imm v38, 240 -; check: v12 = ireduce.i8 v39 -; check: v40 = uextend.i32 v12 -; check: v41 = ushr_imm v40, 4 -; check: v13 = ireduce.i8 v41 -; check: v42 = uextend.i32 v11 -; check: v43 = band_imm v42, 15 -; check: v14 = ireduce.i8 v43 -; check: v44 = uextend.i32 v14 -; check: v45 = ishl_imm v44, 4 -; check: v15 = ireduce.i8 v45 -; check: v46 = uextend.i32 v13 -; check: v47 = uextend.i32 v15 -; check: v48 = bor v46, v47 -; check: v1 = ireduce.i8 v48 -; check: return v1 - -function %reverse_bits_16(i16) -> i16 { -block0(v0: i16): - v1 = bitrev.i16 v0 - return v1 -} -; check: v21 = uextend.i32 v0 -; check: v22 = band_imm v21, 0xaaaa -; check: v2 = ireduce.i16 v22 -; check: v23 = uextend.i32 v2 -; check: v24 = ushr_imm v23, 1 -; check: v3 = ireduce.i16 v24 -; check: v25 = uextend.i32 v0 -; check: v26 = band_imm v25, 0x5555 -; check: v4 = ireduce.i16 v26 -; check: v27 = uextend.i32 v4 -; check: v28 = ishl_imm v27, 1 -; check: v5 = ireduce.i16 v28 -; check: v29 = uextend.i32 v3 -; check: v30 = uextend.i32 v5 -; check: v31 = bor v29, v30 -; check: v6 = ireduce.i16 v31 -; check: v32 = uextend.i32 v6 -; check: v33 = band_imm v32, 0xcccc -; check: v7 = ireduce.i16 v33 -; check: v34 = uextend.i32 v7 -; check: v35 = ushr_imm v34, 2 -; check: v8 = ireduce.i16 v35 -; check: v36 = uextend.i32 v6 -; check: v37 = band_imm v36, 0x3333 -; check: v9 = ireduce.i16 v37 -; check: v38 = uextend.i32 v9 -; check: v39 = ishl_imm v38, 2 -; check: v10 = ireduce.i16 v39 -; check: v40 = uextend.i32 v8 -; check: v41 = uextend.i32 v10 -; check: v42 = bor v40, v41 -; check: v11 = ireduce.i16 v42 -; check: v43 = uextend.i32 v11 -; check: v44 = band_imm v43, 0xf0f0 -; check: v12 = ireduce.i16 v44 -; check: v45 = uextend.i32 v12 -; check: v46 = ushr_imm v45, 4 -; check: v13 = ireduce.i16 v46 -; check: v47 = uextend.i32 v11 -; check: v48 = band_imm v47, 3855 -; check: v14 = ireduce.i16 v48 -; check: v49 = uextend.i32 v14 -; check: v50 = ishl_imm v49, 4 -; check: v15 = ireduce.i16 v50 -; check: v51 = uextend.i32 v13 -; check: v52 = uextend.i32 v15 -; check: v53 = bor v51, v52 -; check: v16 = ireduce.i16 v53 -; check: v54 = uextend.i32 v16 -; check: v55 = band_imm v54, 0xff00 -; check: v17 = ireduce.i16 v55 -; check: v56 = uextend.i32 v17 -; check: v57 = ushr_imm v56, 8 -; check: v18 = ireduce.i16 v57 -; check: v58 = uextend.i32 v16 -; check: v59 = band_imm v58, 255 -; check: v19 = ireduce.i16 v59 -; check: v60 = uextend.i32 v19 -; check: v61 = ishl_imm v60, 8 -; check: v20 = ireduce.i16 v61 -; check: v62 = uextend.i32 v18 -; check: v63 = uextend.i32 v20 -; check: v64 = bor v62, v63 -; check: v1 = ireduce.i16 v64 -; check: return v1 - -function %reverse_bits_32(i32) -> i32 { -block0(v0: i32): - v1 = bitrev.i32 v0 - return v1 -} -; check: v24 = iconst.i32 0xaaaa_aaaa -; check: v2 = band v0, v24 -; check: v3 = ushr_imm v2, 1 -; check: v4 = band_imm v0, 0x5555_5555 -; check: v5 = ishl_imm v4, 1 -; check: v6 = bor v3, v5 -; check: v25 = iconst.i32 0xcccc_cccc -; check: v7 = band v6, v25 -; check: v8 = ushr_imm v7, 2 -; check: v9 = band_imm v6, 0x3333_3333 -; check: v10 = ishl_imm v9, 2 -; check: v11 = bor v8, v10 -; check: v26 = iconst.i32 0xf0f0_f0f0 -; check: v12 = band v11, v26 -; check: v13 = ushr_imm v12, 4 -; check: v14 = band_imm v11, 0x0f0f_0f0f -; check: v15 = ishl_imm v14, 4 -; check: v16 = bor v13, v15 -; check: v27 = iconst.i32 0xff00_ff00 -; check: v17 = band v16, v27 -; check: v18 = ushr_imm v17, 8 -; check: v19 = band_imm v16, 0x00ff_00ff -; check: v20 = ishl_imm v19, 8 -; check: v21 = bor v18, v20 -; check: v22 = ushr_imm v21, 16 -; check: v23 = ishl_imm v21, 16 -; check: v1 = bor v22, v23 - - -function %reverse_bits_64(i64) -> i64 { -block0(v0: i64): - v1 = bitrev.i64 v0 - return v1 -} -; check: v29 = iconst.i64 0xaaaa_aaaa_aaaa_aaaa -; check: v2 = band v0, v29 -; check: v3 = ushr_imm v2, 1 -; check: v30 = iconst.i64 0x5555_5555_5555_5555 -; check: v4 = band v0, v30 -; check: v5 = ishl_imm v4, 1 -; check: v6 = bor v3, v5 -; check: v31 = iconst.i64 0xcccc_cccc_cccc_cccc -; check: v7 = band v6, v31 -; check: v8 = ushr_imm v7, 2 -; check: v32 = iconst.i64 0x3333_3333_3333_3333 -; check: v9 = band v6, v32 -; check: v10 = ishl_imm v9, 2 -; check: v11 = bor v8, v10 -; check: v33 = iconst.i64 0xf0f0_f0f0_f0f0_f0f0 -; check: v12 = band v11, v33 -; check: v13 = ushr_imm v12, 4 -; check: v34 = iconst.i64 0x0f0f_0f0f_0f0f_0f0f -; check: v14 = band v11, v34 -; check: v15 = ishl_imm v14, 4 -; check: v16 = bor v13, v15 -; check: v35 = iconst.i64 0xff00_ff00_ff00_ff00 -; check: v17 = band v16, v35 -; check: v18 = ushr_imm v17, 8 -; check: v36 = iconst.i64 0x00ff_00ff_00ff_00ff -; check: v19 = band v16, v36 -; check: v20 = ishl_imm v19, 8 -; check: v21 = bor v18, v20 -; check: v37 = iconst.i64 0xffff_0000_ffff_0000 -; check: v22 = band v21, v37 -; check: v23 = ushr_imm v22, 16 -; check: v38 = iconst.i64 0xffff_0000_ffff -; check: v24 = band v21, v38 -; check: v25 = ishl_imm v24, 16 -; check: v26 = bor v23, v25 -; check: v27 = ushr_imm v26, 32 -; check: v28 = ishl_imm v26, 32 -; check: v1 = bor v27, v28 diff --git a/cranelift/filetests/filetests/legalizer/br_table_cond.clif b/cranelift/filetests/filetests/legalizer/br_table_cond.clif deleted file mode 100644 index db464ae4d4..0000000000 --- a/cranelift/filetests/filetests/legalizer/br_table_cond.clif +++ /dev/null @@ -1,64 +0,0 @@ -test legalizer -set enable_probestack=false -set enable_jump_tables=false -target x86_64 legacy - -; Test that when jump_tables_enables is false, all jump tables are eliminated. -; regex: V=v\d+ -; regex: BB=block\d+ - -function u0:0(i64 vmctx) baldrdash_system_v { - gv0 = vmctx - gv1 = iadd_imm.i64 gv0, 48 - jt0 = jump_table [block2, block2, block7] - jt1 = jump_table [block8, block8] - -block0(v0: i64): - jump block5 - -block5: - v1 = global_value.i64 gv1 - v2 = load.i64 v1 - trapnz v2, interrupt - v3 = iconst.i32 0 - br_table v3, block3, jt0 -; check: block5: -; check: $(val0=$V) = iconst.i32 0 -; nextln: $(cmp0=$V) = icmp_imm eq $val0, 0 -; nextln: brnz $cmp0, block2 -; nextln: jump $(fail0=$BB) -; check: $fail0: -; nextln: $(cmp1=$V) = icmp_imm.i32 eq $val0, 1 -; nextln: brnz $cmp1, block2 -; nextln: jump $(fail1=$BB) -; check: $fail1: -; nextln: $(cmp2=$V) = icmp_imm.i32 eq $val0, 2 -; nextln: brnz $cmp2, block7 -; nextln: jump block3 - -block7: - v4 = iconst.i32 0 - br_table v4, block3, jt1 -; check: block7: -; check: $(val1=$V) = iconst.i32 0 -; nextln: $(cmp3=$V) = icmp_imm eq $val1, 0 -; nextln: brnz $cmp3, block8 -; nextln: jump $(fail3=$BB) -; check: $fail3: -; nextln: $(cmp4=$V) = icmp_imm.i32 eq $val1, 1 -; nextln: brnz $cmp4, block8 -; nextln: jump block3 - -block8: - jump block5 - -block3: - jump block2 - -block2: - jump block1 - -block1: - fallthrough_return -} -; not: jump_table diff --git a/cranelift/filetests/filetests/legalizer/empty_br_table.clif b/cranelift/filetests/filetests/legalizer/empty_br_table.clif deleted file mode 100644 index d320155470..0000000000 --- a/cranelift/filetests/filetests/legalizer/empty_br_table.clif +++ /dev/null @@ -1,17 +0,0 @@ -test legalizer -set enable_probestack=false -set enable_jump_tables=false -target x86_64 legacy - -function u0:0(i64) { - jt0 = jump_table [] - -block0(v0: i64): - br_table v0, block1, jt0 -; check: block0(v0: i64): -; nextln: jump block1 - -block1: - return -} -; not: jump_table diff --git a/cranelift/filetests/filetests/legalizer/icmp_imm_i128.clif b/cranelift/filetests/filetests/legalizer/icmp_imm_i128.clif deleted file mode 100644 index 6d72cc6499..0000000000 --- a/cranelift/filetests/filetests/legalizer/icmp_imm_i128.clif +++ /dev/null @@ -1,23 +0,0 @@ -test legalizer -target x86_64 legacy - -function %icmp_imm_i128(i128) -> i8 { -block0(v0: i128): - v1 = icmp_imm.i128 eq v0, 1 - v2 = bint.i8 v1 - return v2 -} - -; check: function %icmp_imm_i128(i64 [%rdi], i64 [%rsi]) -> i8 [%rax] fast { -; nextln: block0(v3: i64, v4: i64): -; nextln: v7 -> v3 -; nextln: v8 -> v4 -; nextln: [-] v0 = iconcat v3, v4 -; nextln: [RexOp1pu_id#b8] v5 = iconst.i64 1 -; nextln: [RexOp1pu_id#b8] v6 = iconst.i64 0 -; nextln: [RexOp1icscc#8039] v9 = icmp eq v7, v5 -; nextln: [RexOp1icscc#8039] v10 = icmp eq v8, v6 -; nextln: [RexOp1rr#21] v1 = band v9, v10 -; nextln: [RexOp2urm_noflags#4b6] v2 = bint.i8 v1 -; nextln: [Op1ret#c3] return v2 -; nextln: } diff --git a/cranelift/filetests/filetests/legalizer/pass_by_ref.clif b/cranelift/filetests/filetests/legalizer/pass_by_ref.clif deleted file mode 100644 index 141330cf01..0000000000 --- a/cranelift/filetests/filetests/legalizer/pass_by_ref.clif +++ /dev/null @@ -1,31 +0,0 @@ -test legalizer -target x86_64 legacy - -function %legalize_entry(i128) -> i64 windows_fastcall { -block0(v0: i128): - v1, v2 = isplit v0 - return v2 -} -; check: function %legalize_entry(i64 ptr [%rcx]) -> i64 [%rax] windows_fastcall { -; nextln: block0(v3: i64): -; nextln: v4 = load.i64 v3 -; nextln: v1 -> v4 -; nextln: v5 = load.i64 v3+8 -; nextln: v2 -> v5 -; nextln: v0 = iconcat v4, v5 -; nextln: return v2 - -function %legalize_call() { - fn0 = %foo(i32x4) windows_fastcall -block0: - v0 = vconst.i32x4 [1 2 3 4] - call fn0(v0) - return -} -; check: ss0 = explicit_slot 16 -; check: sig0 = (i64 ptr [%rcx]) windows_fastcall -; check: v0 = vconst.i32x4 const0 -; nextln: v1 = stack_addr.i64 ss0 -; nextln: store v0, v1 -; nextln: v2 = func_addr.i64 fn0 -; nextln: call_indirect sig0, v2(v1) diff --git a/cranelift/filetests/filetests/legalizer/popcnt-i128.clif b/cranelift/filetests/filetests/legalizer/popcnt-i128.clif deleted file mode 100644 index 8976ad0e25..0000000000 --- a/cranelift/filetests/filetests/legalizer/popcnt-i128.clif +++ /dev/null @@ -1,21 +0,0 @@ -test legalizer -target x86_64 legacy haswell - -function %foo() -> i128 { -block0: - v1 = iconst.i64 0x6400000042 - v2 = iconst.i64 0x7F10100042 - v3 = iconcat v1, v2 - v4 = popcnt.i128 v3 - return v4 -} - -; check: v1 = iconst.i64 0x0064_0000_0042 -; check: v2 = iconst.i64 0x007f_1010_0042 -; check: v3 = iconcat v1, v2 -; check: v5 = popcnt v1 -; check: v6 = popcnt v2 -; check: v7 = iadd v5, v6 -; check: v8 = iconst.i64 0 -; check: v4 = iconcat v7, v8 -; check: return v7, v8 diff --git a/cranelift/filetests/filetests/peepmatic/simplify_instruction_into_alias_of_value.clif b/cranelift/filetests/filetests/peepmatic/simplify_instruction_into_alias_of_value.clif index bb21ec2553..a7c059f6c0 100644 --- a/cranelift/filetests/filetests/peepmatic/simplify_instruction_into_alias_of_value.clif +++ b/cranelift/filetests/filetests/peepmatic/simplify_instruction_into_alias_of_value.clif @@ -7,12 +7,15 @@ target x86_64 ;; we need to make an alias `v3 -> v2`. function %replace_inst_with_alias() -> i32 { + sig0 = (i32, i32) -> i32, i32 + fn0 = u0:0 sig0 + block0: v0 = iconst.i32 0 - v1, v2 = x86_smulx v0, v0 + v1, v2 = call fn0(v0, v0) v3 = isub v2, v0 ; check: v0 = iconst.i32 0 - ; nextln: v1, v2 = x86_smulx v0, v0 + ; nextln: v1, v2 = call fn0(v0, v0) ; nextln: v3 -> v2 return v3 } diff --git a/cranelift/filetests/filetests/postopt/basic.clif b/cranelift/filetests/filetests/postopt/basic.clif deleted file mode 100644 index 55a8d03738..0000000000 --- a/cranelift/filetests/filetests/postopt/basic.clif +++ /dev/null @@ -1,125 +0,0 @@ -test postopt -target aarch64 -target i686 legacy - -; Test that compare+branch sequences are folded effectively on x86. - -function %br_icmp(i32, i32) -> i32 { -block0(v0: i32, v1: i32): -[DynRexOp1icscc#39,%rdx] v2 = icmp slt v0, v1 -[Op1t8jccd_long#85] brnz v2, block1 -[Op1jmpb#eb] jump block2 - -block2: -[Op1ret#c3] return v1 - -block1: -[Op1pu_id#b8,%rax] v8 = iconst.i32 3 -[Op1ret#c3] return v8 -} -; sameln: function %br_icmp -; nextln: block0(v0: i32, v1: i32): -; nextln: v9 = ifcmp v0, v1 -; nextln: v2 = trueif slt v9 -; nextln: brif slt v9, block1 -; nextln: jump block2 -; nextln: -; nextln: block2: -; nextln: return v1 -; nextln: -; nextln: block1: -; nextln: v8 = iconst.i32 3 -; nextln: return v8 -; nextln: } - -; Use brz instead of brnz, so the condition is inverted. - -function %br_icmp_inverse(i32, i32) -> i32 { -block0(v0: i32, v1: i32): -[DynRexOp1icscc#39,%rdx] v2 = icmp slt v0, v1 -[Op1t8jccd_long#84] brz v2, block1 -[Op1jmpb#eb] jump block2 - -block2: -[Op1ret#c3] return v1 - -block1: -[Op1pu_id#b8,%rax] v8 = iconst.i32 3 -[Op1ret#c3] return v8 -} -; sameln: function %br_icmp_inverse -; nextln: block0(v0: i32, v1: i32): -; nextln: v9 = ifcmp v0, v1 -; nextln: v2 = trueif slt v9 -; nextln: brif sge v9, block1 -; nextln: jump block2 -; nextln: -; nextln: block2: -; nextln: return v1 -; nextln: -; nextln: block1: -; nextln: v8 = iconst.i32 3 -; nextln: return v8 -; nextln: } - -; Use icmp_imm instead of icmp. - -function %br_icmp_imm(i32, i32) -> i32 { -block0(v0: i32, v1: i32): -[DynRexOp1icscc_ib#7083] v2 = icmp_imm slt v0, 2 -[Op1t8jccd_long#84] brz v2, block1 -[Op1jmpb#eb] jump block2 - -block2: -[Op1ret#c3] return v1 - -block1: -[Op1pu_id#b8,%rax] v8 = iconst.i32 3 -[Op1ret#c3] return v8 -} -; sameln: function %br_icmp_imm -; nextln: block0(v0: i32, v1: i32): -; nextln: v9 = ifcmp_imm v0, 2 -; nextln: v2 = trueif slt v9 -; nextln: brif sge v9, block1 -; nextln: jump block2 -; nextln: -; nextln: block2: -; nextln: return v1 -; nextln: -; nextln: block1: -; nextln: v8 = iconst.i32 3 -; nextln: return v8 -; nextln: } - -; Use fcmp instead of icmp. - -function %br_fcmp(f32, f32) -> f32 { -block0(v0: f32, v1: f32): -[Op2fcscc#42e,%rdx] v2 = fcmp gt v0, v1 -[Op1t8jccd_long#84] brz v2, block1 -[Op1jmpb#eb] jump block2 - -block2: -[Op1ret#c3] return v1 - -block1: -[Op1pu_id#b8,%rax] v18 = iconst.i32 0x40a8_0000 -[Mp2frurm#56e,%xmm0] v8 = bitcast.f32 v18 -[Op1ret#c3] return v8 -} -; sameln: function %br_fcmp -; nextln: block0(v0: f32, v1: f32): -; nextln: v19 = ffcmp v0, v1 -; nextln: v2 = trueff gt v19 -; nextln: brff ule v19, block1 -; nextln: jump block2 -; nextln: -; nextln: block2: -; nextln: return v1 -; nextln: -; nextln: block1: -; nextln: v18 = iconst.i32 0x40a8_0000 -; nextln: v8 = bitcast.f32 v18 -; nextln: return v8 -; nextln: } diff --git a/cranelift/filetests/filetests/postopt/complex_memory_ops.clif b/cranelift/filetests/filetests/postopt/complex_memory_ops.clif deleted file mode 100644 index acedb71087..0000000000 --- a/cranelift/filetests/filetests/postopt/complex_memory_ops.clif +++ /dev/null @@ -1,94 +0,0 @@ -test postopt -target x86_64 legacy - -function %dual_loads(i64, i64) -> i64 { -block0(v0: i64, v1: i64): -[RexOp1rr#8001] v3 = iadd v0, v1 - v4 = load.i64 v3 - v5 = uload8.i64 v3 - v6 = sload8.i64 v3 - v7 = uload16.i64 v3 - v8 = sload16.i64 v3 - v9 = uload32.i64 v3 - v10 = sload32.i64 v3 -[Op1ret#c3] return v10 -} - -; sameln: function %dual_loads -; nextln: block0(v0: i64, v1: i64): -; nextln: v3 = iadd v0, v1 -; nextln: v4 = load_complex.i64 v0+v1 -; nextln: v5 = uload8_complex.i64 v0+v1 -; nextln: v6 = sload8_complex.i64 v0+v1 -; nextln: v7 = uload16_complex.i64 v0+v1 -; nextln: v8 = sload16_complex.i64 v0+v1 -; nextln: v9 = uload32_complex v0+v1 -; nextln: v10 = sload32_complex v0+v1 -; nextln: return v10 -; nextln: } - -function %dual_loads2(i64, i64) -> i64 { -block0(v0: i64, v1: i64): -[RexOp1rr#8001] v3 = iadd v0, v1 - v4 = load.i64 v3+1 - v5 = uload8.i64 v3+1 - v6 = sload8.i64 v3+1 - v7 = uload16.i64 v3+1 - v8 = sload16.i64 v3+1 - v9 = uload32.i64 v3+1 - v10 = sload32.i64 v3+1 -[Op1ret#c3] return v10 -} - -; sameln: function %dual_loads2 -; nextln: block0(v0: i64, v1: i64): -; nextln: v3 = iadd v0, v1 -; nextln: v4 = load_complex.i64 v0+v1+1 -; nextln: v5 = uload8_complex.i64 v0+v1+1 -; nextln: v6 = sload8_complex.i64 v0+v1+1 -; nextln: v7 = uload16_complex.i64 v0+v1+1 -; nextln: v8 = sload16_complex.i64 v0+v1+1 -; nextln: v9 = uload32_complex v0+v1+1 -; nextln: v10 = sload32_complex v0+v1+1 -; nextln: return v10 -; nextln: } - -function %dual_stores(i64, i64, i64) { -block0(v0: i64, v1: i64, v2: i64): -[RexOp1rr#8001] v3 = iadd v0, v1 -[RexOp1st#8089] store.i64 v2, v3 -[RexOp1st#88] istore8.i64 v2, v3 -[RexMp1st#189] istore16.i64 v2, v3 -[RexOp1st#89] istore32.i64 v2, v3 -[Op1ret#c3] return -} - -; sameln: function %dual_stores -; nextln: block0(v0: i64, v1: i64, v2: i64): -; nextln: v3 = iadd v0, v1 -; nextln: store_complex v2, v0+v1 -; nextln: istore8_complex v2, v0+v1 -; nextln: istore16_complex v2, v0+v1 -; nextln: istore32_complex v2, v0+v1 -; nextln: return -; nextln: } - -function %dual_stores2(i64, i64, i64) { -block0(v0: i64, v1: i64, v2: i64): -[RexOp1rr#8001] v3 = iadd v0, v1 -[RexOp1stDisp8#8089] store.i64 v2, v3+1 -[RexOp1stDisp8#88] istore8.i64 v2, v3+1 -[RexMp1stDisp8#189] istore16.i64 v2, v3+1 -[RexOp1stDisp8#89] istore32.i64 v2, v3+1 -[Op1ret#c3] return -} - -; sameln: function %dual_stores2 -; nextln: block0(v0: i64, v1: i64, v2: i64): -; nextln: v3 = iadd v0, v1 -; nextln: store_complex v2, v0+v1+1 -; nextln: istore8_complex v2, v0+v1+1 -; nextln: istore16_complex v2, v0+v1+1 -; nextln: istore32_complex v2, v0+v1+1 -; nextln: return -; nextln: } diff --git a/cranelift/filetests/filetests/postopt/fold_offset_into_address.clif b/cranelift/filetests/filetests/postopt/fold_offset_into_address.clif deleted file mode 100644 index 84ddf3b884..0000000000 --- a/cranelift/filetests/filetests/postopt/fold_offset_into_address.clif +++ /dev/null @@ -1,32 +0,0 @@ -test postopt -target x86_64 legacy - -; Fold the immediate of an iadd_imm into an address offset. - -function u0:0(i64 vmctx) -> i64 { -block0(v0: i64): - v1 = iadd_imm.i64 v0, 16 -[RexOp1ldDisp8#808b] v2 = load.i64 notrap aligned v1 -[Op1ret#c3] return v2 -} - -; sameln: function u0:0(i64 vmctx) -> i64 fast { -; nextln: block0(v0: i64): -; nextln: v1 = iadd_imm v0, 16 -; nextln: [RexOp1ldDisp8#808b] v2 = load.i64 notrap aligned v0+16 -; nextln: [Op1ret#c3] return v2 -; nextln: } - -function u0:1(i64, i64 vmctx) { -block0(v3: i64, v0: i64): - v1 = iadd_imm.i64 v0, 16 -[RexOp1stDisp8#8089] store.i64 notrap aligned v3, v1 -[Op1ret#c3] return -} - -; sameln: function u0:1(i64, i64 vmctx) fast { -; nextln: block0(v3: i64, v0: i64): -; nextln: v1 = iadd_imm v0, 16 -; nextln: [RexOp1stDisp8#8089] store notrap aligned v3, v0+16 -; nextln: [Op1ret#c3] return -; nextln: } diff --git a/cranelift/filetests/filetests/regalloc/aliases.clif b/cranelift/filetests/filetests/regalloc/aliases.clif deleted file mode 100644 index e3dcfbad90..0000000000 --- a/cranelift/filetests/filetests/regalloc/aliases.clif +++ /dev/null @@ -1,35 +0,0 @@ -test regalloc -target x86_64 legacy haswell - -function %value_aliases(i32, f32, i64 vmctx) baldrdash_system_v { - gv0 = vmctx - heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000 - -block0(v0: i32, v1: f32, v2: i64): - v3 = iconst.i32 0 - jump block3(v3) - -block3(v4: i32): - v5 = heap_addr.i64 heap0, v4, 1 - v6 = load.f32 v5 - v7 -> v1 - v8 = fdiv v6, v7 - v9 = heap_addr.i64 heap0, v4, 1 - store v8, v9 - v10 = iconst.i32 4 - v11 = iadd v4, v10 - v12 -> v0 - v13 = icmp ult v11, v12 - v14 = bint.i32 v13 - brnz v14, block3(v11) - jump block4 - -block4: - jump block2 - -block2: - jump block1 - -block1: - return -} diff --git a/cranelift/filetests/filetests/regalloc/basic.clif b/cranelift/filetests/filetests/regalloc/basic.clif deleted file mode 100644 index 48111253ae..0000000000 --- a/cranelift/filetests/filetests/regalloc/basic.clif +++ /dev/null @@ -1,80 +0,0 @@ -test regalloc - -; We can add more ISAs once they have defined encodings. -target riscv32 - -; regex: RX=%x\d+ - -function %add(i32, i32) { -block0(v1: i32, v2: i32): - v3 = iadd v1, v2 -; check: [R#0c,%x5] -; sameln: iadd - return -} - -; Function with a dead argument. -function %dead_arg(i32, i32) -> i32{ -block0(v1: i32, v2: i32): -; not: regmove -; check: return v1 - return v1 -} - -; Return a value from a different register. -function %move1(i32, i32) -> i32 { -block0(v1: i32, v2: i32): -; not: regmove -; check: regmove v2, %x11 -> %x10 -; nextln: return v2 - return v2 -} - -; Swap two registers. -function %swap(i32, i32) -> i32, i32 { -block0(v1: i32, v2: i32): -; not: regmove -; check: regmove v2, %x11 -> $(tmp=$RX) -; nextln: regmove v1, %x10 -> %x11 -; nextln: regmove v2, $tmp -> %x10 -; nextln: return v2, v1 - return v2, v1 -} - -; Return a block argument. -function %retblock(i32, i32) -> i32 { -block0(v1: i32, v2: i32): - brnz v1, block1(v1) - jump block1(v2) - -block1(v10: i32): - return v10 -} - -; Pass a block argument as a function argument. -function %callblock(i32, i32) -> i32 { - fn0 = %foo(i32) -> i32 - -block0(v1: i32, v2: i32): - brnz v1, block1(v1) - jump block1(v2) - -block1(v10: i32): - v11 = call fn0(v10) - return v11 -} - -; Pass a block argument as a jump argument. -function %jumpblock(i32, i32) -> i32 { - fn0 = %foo(i32) -> i32 - -block0(v1: i32, v2: i32): - brnz v1, block1(v1, v2) - jump block1(v2, v1) - -block1(v10: i32, v11: i32): - jump block2(v10, v11) - -block2(v20: i32, v21: i32): - return v21 -} diff --git a/cranelift/filetests/filetests/regalloc/coalesce.clif b/cranelift/filetests/filetests/regalloc/coalesce.clif deleted file mode 100644 index 48395da1b3..0000000000 --- a/cranelift/filetests/filetests/regalloc/coalesce.clif +++ /dev/null @@ -1,157 +0,0 @@ -test regalloc -target riscv32 - -; Test the coalescer. -; regex: V=v\d+ -; regex: WS=\s+ -; regex: LOC=%\w+ -; regex: BB=block\d+ - -; This function is already CSSA, so no copies should be inserted. -function %cssa(i32) -> i32 { -block0(v0: i32): - ; not: copy - ; v0 is used by the branch and passed as an arg - that's no conflict. - brnz v0, block1(v0) - jump block2 - -block2: - ; v0 is live across the branch above. That's no conflict. - v1 = iadd_imm v0, 7 - jump block1(v1) - -block1(v10: i32): - v11 = iadd_imm v10, 7 - return v11 -} - -function %trivial(i32) -> i32 { -block0(v0: i32): - ; check: brnz v0, $(splitEdge=$BB) - brnz v0, block1(v0) - jump block2 - -block2: - ; not: copy - v1 = iadd_imm v0, 7 - jump block1(v1) - - ; check: $splitEdge: - ; nextln: $(cp1=$V) = copy.i32 v0 - ; nextln: jump block1($cp1) - -block1(v10: i32): - ; Use v0 in the destination block causes a conflict. - v11 = iadd v10, v0 - return v11 -} - -; A value is used as an SSA argument twice in the same branch. -function %dualuse(i32) -> i32 { -block0(v0: i32): - ; check: brnz v0, $(splitEdge=$BB) - brnz v0, block1(v0, v0) - jump block2 - -block2: - v1 = iadd_imm v0, 7 - v2 = iadd_imm v1, 56 - jump block1(v1, v2) - - ; check: $splitEdge: - ; check: $(cp1=$V) = copy.i32 v0 - ; nextln: jump block1($cp1, v0) - -block1(v10: i32, v11: i32): - v12 = iadd v10, v11 - return v12 -} - -; Interference away from the branch -; The interference can be broken with a copy at either branch. -function %interference(i32) -> i32 { -block0(v0: i32): - ; not: copy - ; check: brnz v0, $(splitEdge=$BB) - ; not: copy - brnz v0, block1(v0) - jump block2 - -block2: - v1 = iadd_imm v0, 7 - ; v1 and v0 interfere here: - v2 = iadd_imm v0, 8 - ; check: $(cp0=$V) = copy v1 - ; check: jump block1($cp0) - jump block1(v1) - - ; check: $splitEdge: - ; not: copy - ; nextln: jump block1(v0) - -block1(v10: i32): - ; not: copy - v11 = iadd_imm v10, 7 - return v11 -} - -; A loop where one induction variable is used as a backedge argument. -function %fibonacci(i32) -> i32 { -block0(v0: i32): - v1 = iconst.i32 1 - v2 = iconst.i32 2 - jump block1(v1, v2) - - ; check: $(splitEdge=$BB): - ; check: $(nv11b=$V) = copy.i32 v11 - ; not: copy - ; check: jump block1($nv11b, v12) - -block1(v10: i32, v11: i32): - ; v11 needs to be isolated because it interferes with v10. - ; check: block1(v10: i32 [$LOC], $(nv11a=$V): i32 [$LOC]) - ; check: v11 = copy $nv11a - v12 = iadd v10, v11 - v13 = icmp ult v12, v0 - ; check: brnz v13, $splitEdge - brnz v13, block1(v11, v12) - jump block2 - -block2: - return v12 -} - -; Function arguments passed on the stack aren't allowed to be part of a virtual -; register, at least for now. This is because the other values in the virtual -; register would need to be spilled to the incoming_arg stack slot which we treat -; as belonging to the caller. -function %stackarg(i32, i32, i32, i32, i32, i32, i32, i32, i32) -> i32 { -; check: ss0 = incoming_arg 4 -; not: incoming_arg -block0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32, v7: i32, v8: i32): - ; check: fill v8 - ; not: v8 - jump block1(v8) - -block1(v10: i32): - v11 = iadd_imm v10, 1 - return v11 -} - -function %gvn_unremovable_phi(i32) system_v { -block0(v0: i32): - v2 = iconst.i32 0 - jump block2(v2, v0) - -block2(v3: i32, v4: i32): - brnz v3, block2(v3, v4) - jump block3 - -block3: - v5 = iconst.i32 1 - brnz v3, block2(v2, v5) - jump block4 - -block4: - return -} diff --git a/cranelift/filetests/filetests/regalloc/coalescing-207.clif b/cranelift/filetests/filetests/regalloc/coalescing-207.clif deleted file mode 100644 index c549cbd3d2..0000000000 --- a/cranelift/filetests/filetests/regalloc/coalescing-207.clif +++ /dev/null @@ -1,1527 +0,0 @@ -test regalloc -target x86_64 legacy haswell - -; Reported as https://github.com/bytecodealliance/cranelift/issues/207 -; -; The coalescer creates a virtual register with two interfering values. -function %pr207(i64 vmctx, i32, i32) -> i32 system_v { - gv1 = vmctx - gv0 = iadd_imm.i64 gv1, -8 - heap0 = static gv0, min 0, bound 0x5000, offset_guard 0x0040_0000 - sig0 = (i64 vmctx, i32, i32) -> i32 system_v - sig1 = (i64 vmctx, i32, i32, i32) -> i32 system_v - sig2 = (i64 vmctx, i32, i32, i32) -> i32 system_v - fn0 = u0:2 sig0 - fn1 = u0:0 sig1 - fn2 = u0:1 sig2 - -block0(v0: i64, v1: i32, v2: i32): - v3 = iconst.i32 0 - v4 = iconst.i32 0 - v5 = iconst.i32 0 - v6 = iconst.i32 0x4ffe - v7 = icmp uge v5, v6 - brz v7, block1 - jump block100 - -block100: - trap heap_oob - -block1: - v8 = uextend.i64 v5 - v9 = iadd_imm.i64 v0, -8 - v10 = load.i64 v9 - v11 = iadd v10, v8 - v12 = load.i32 v11+4 - v13 = iconst.i32 1056 - v14 = isub v12, v13 - v15 = iconst.i32 0x4ffe - v16 = icmp.i32 uge v4, v15 - brz v16, block2 - jump block101 - -block101: - trap heap_oob - -block2: - v17 = uextend.i64 v4 - v18 = iadd_imm.i64 v0, -8 - v19 = load.i64 v18 - v20 = iadd v19, v17 - store.i32 v14, v20+4 - v21 = iconst.i32 0x4ffe - v22 = icmp.i32 uge v2, v21 - brz v22, block3 - jump block102 - -block102: - trap heap_oob - -block3: - v23 = uextend.i64 v2 - v24 = iadd_imm.i64 v0, -8 - v25 = load.i64 v24 - v26 = iadd v25, v23 - v27 = sload8.i32 v26 - v28 = iconst.i32 255 - v29 = band v27, v28 - v30 = iconst.i32 0 - v31 = icmp eq v29, v30 - v32 = bint.i32 v31 - brnz v32, block90(v14, v1) - jump block103 - -block103: - v33 = call fn0(v0, v1, v27) - v34 = iconst.i32 0 - v35 = iconst.i32 0 - v36 = icmp eq v33, v35 - v37 = bint.i32 v36 - brnz v37, block90(v14, v34) - jump block104 - -block104: - v38 = iconst.i32 0x4ffe - v39 = icmp.i32 uge v2, v38 - brz v39, block4 - jump block105 - -block105: - trap heap_oob - -block4: - v40 = uextend.i64 v2 - v41 = iadd_imm.i64 v0, -8 - v42 = load.i64 v41 - v43 = iadd v42, v40 - v44 = uload8.i32 v43+1 - v45 = iconst.i32 0 - v46 = icmp eq v44, v45 - v47 = bint.i32 v46 - brnz v47, block56(v33, v14) - jump block106 - -block106: - v48 = iconst.i32 0x4ffe - v49 = icmp.i32 uge v33, v48 - brz v49, block5 - jump block107 - -block107: - trap heap_oob - -block5: - v50 = uextend.i64 v33 - v51 = iadd_imm.i64 v0, -8 - v52 = load.i64 v51 - v53 = iadd v52, v50 - v54 = uload8.i32 v53+1 - v55 = iconst.i32 0 - v56 = icmp eq v54, v55 - v57 = bint.i32 v56 - brnz v57, block90(v14, v34) - jump block108 - -block108: - v58 = iconst.i32 0x4ffe - v59 = icmp.i32 uge v2, v58 - brz v59, block6 - jump block109 - -block109: - trap heap_oob - -block6: - v60 = uextend.i64 v2 - v61 = iadd_imm.i64 v0, -8 - v62 = load.i64 v61 - v63 = iadd v62, v60 - v64 = uload8.i32 v63+2 - v65 = iconst.i32 0 - v66 = icmp eq v64, v65 - v67 = bint.i32 v66 - brnz v67, block42 - jump block110 - -block110: - v68 = iconst.i32 0x4ffe - v69 = icmp.i32 uge v33, v68 - brz v69, block7 - jump block111 - -block111: - trap heap_oob - -block7: - v70 = uextend.i64 v33 - v71 = iadd_imm.i64 v0, -8 - v72 = load.i64 v71 - v73 = iadd v72, v70 - v74 = uload8.i32 v73+2 - v75 = iconst.i32 0 - v76 = icmp eq v74, v75 - v77 = bint.i32 v76 - brnz v77, block90(v14, v34) - jump block112 - -block112: - v78 = iconst.i32 0x4ffe - v79 = icmp.i32 uge v2, v78 - brz v79, block8 - jump block113 - -block113: - trap heap_oob - -block8: - v80 = uextend.i64 v2 - v81 = iadd_imm.i64 v0, -8 - v82 = load.i64 v81 - v83 = iadd v82, v80 - v84 = uload8.i32 v83+3 - v85 = iconst.i32 0 - v86 = icmp eq v84, v85 - v87 = bint.i32 v86 - brnz v87, block46 - jump block114 - -block114: - v88 = iconst.i32 0x4ffe - v89 = icmp.i32 uge v33, v88 - brz v89, block9 - jump block115 - -block115: - trap heap_oob - -block9: - v90 = uextend.i64 v33 - v91 = iadd_imm.i64 v0, -8 - v92 = load.i64 v91 - v93 = iadd v92, v90 - v94 = uload8.i32 v93+3 - v95 = iconst.i32 0 - v96 = icmp eq v94, v95 - v97 = bint.i32 v96 - brnz v97, block90(v14, v34) - jump block116 - -block116: - v98 = iconst.i32 0x4ffe - v99 = icmp.i32 uge v2, v98 - brz v99, block10 - jump block117 - -block117: - trap heap_oob - -block10: - v100 = uextend.i64 v2 - v101 = iadd_imm.i64 v0, -8 - v102 = load.i64 v101 - v103 = iadd v102, v100 - v104 = uload8.i32 v103+4 - v105 = iconst.i32 0 - v106 = icmp eq v104, v105 - v107 = bint.i32 v106 - brnz v107, block54 - jump block118 - -block118: - v108 = iconst.i32 1 - v109 = iadd.i32 v2, v108 - v110 = iconst.i32 1048 - v111 = iadd.i32 v14, v110 - v112 = iconst.i64 0 - v113 = iconst.i32 0x4ffe - v114 = icmp uge v111, v113 - brz v114, block11 - jump block119 - -block119: - trap heap_oob - -block11: - v115 = uextend.i64 v111 - v116 = iadd_imm.i64 v0, -8 - v117 = load.i64 v116 - v118 = iadd v117, v115 - store.i64 v112, v118 - v119 = iconst.i32 1040 - v120 = iadd.i32 v14, v119 - v121 = iconst.i64 0 - v122 = iconst.i32 0x4ffe - v123 = icmp uge v120, v122 - brz v123, block12 - jump block120 - -block120: - trap heap_oob - -block12: - v124 = uextend.i64 v120 - v125 = iadd_imm.i64 v0, -8 - v126 = load.i64 v125 - v127 = iadd v126, v124 - store.i64 v121, v127 - v128 = iconst.i64 0 - v129 = iconst.i32 0x4ffe - v130 = icmp.i32 uge v14, v129 - brz v130, block13 - jump block121 - -block121: - trap heap_oob - -block13: - v131 = uextend.i64 v14 - v132 = iadd_imm.i64 v0, -8 - v133 = load.i64 v132 - v134 = iadd v133, v131 - store.i64 v128, v134+1032 - v135 = iconst.i64 0 - v136 = iconst.i32 0x4ffe - v137 = icmp.i32 uge v14, v136 - brz v137, block14 - jump block122 - -block122: - trap heap_oob - -block14: - v138 = uextend.i64 v14 - v139 = iadd_imm.i64 v0, -8 - v140 = load.i64 v139 - v141 = iadd v140, v138 - store.i64 v135, v141+1024 - v142 = iconst.i32 -1 - jump block15(v142, v27) - -block15(v143: i32, v144: i32): - v145 = iadd.i32 v33, v143 - v146 = iconst.i32 1 - v147 = iadd v145, v146 - v148 = iconst.i32 0x4ffe - v149 = icmp uge v147, v148 - brz v149, block16 - jump block123 - -block123: - trap heap_oob - -block16: - v150 = uextend.i64 v147 - v151 = iadd_imm.i64 v0, -8 - v152 = load.i64 v151 - v153 = iadd v152, v150 - v154 = uload8.i32 v153 - v155 = iconst.i32 0 - v156 = icmp eq v154, v155 - v157 = bint.i32 v156 - brnz v157, block89(v14) - jump block124 - -block124: - v158 = iconst.i32 255 - v159 = band.i32 v144, v158 - v160 = iconst.i32 2 - v161 = ishl v159, v160 - v162 = iadd.i32 v14, v161 - v163 = iconst.i32 2 - v164 = iadd.i32 v143, v163 - v165 = iconst.i32 0x4ffe - v166 = icmp uge v162, v165 - brz v166, block17 - jump block125 - -block125: - trap heap_oob - -block17: - v167 = uextend.i64 v162 - v168 = iadd_imm.i64 v0, -8 - v169 = load.i64 v168 - v170 = iadd v169, v167 - store.i32 v164, v170 - v171 = iconst.i32 1024 - v172 = iadd.i32 v14, v171 - v173 = iconst.i32 3 - v174 = ushr.i32 v159, v173 - v175 = iconst.i32 28 - v176 = band v174, v175 - v177 = iadd v172, v176 - v178 = iconst.i32 0x4ffe - v179 = icmp uge v177, v178 - brz v179, block18 - jump block126 - -block126: - trap heap_oob - -block18: - v180 = uextend.i64 v177 - v181 = iadd_imm.i64 v0, -8 - v182 = load.i64 v181 - v183 = iadd v182, v180 - v184 = load.i32 v183 - v185 = iconst.i32 1 - v186 = iconst.i32 31 - v187 = band.i32 v144, v186 - v188 = ishl v185, v187 - v189 = bor v184, v188 - v190 = iconst.i32 0x4ffe - v191 = icmp.i32 uge v177, v190 - brz v191, block19 - jump block127 - -block127: - trap heap_oob - -block19: - v192 = uextend.i64 v177 - v193 = iadd_imm.i64 v0, -8 - v194 = load.i64 v193 - v195 = iadd v194, v192 - store.i32 v189, v195 - v196 = iadd.i32 v109, v143 - v197 = iconst.i32 1 - v198 = iadd.i32 v143, v197 - v199 = iconst.i32 1 - v200 = iadd v196, v199 - v201 = iconst.i32 0x4ffe - v202 = icmp uge v200, v201 - brz v202, block20 - jump block128 - -block128: - trap heap_oob - -block20: - v203 = uextend.i64 v200 - v204 = iadd_imm.i64 v0, -8 - v205 = load.i64 v204 - v206 = iadd v205, v203 - v207 = uload8.i32 v206 - brnz v207, block15(v198, v207) - jump block21 - -block21: - v208 = iconst.i32 -1 - v209 = iconst.i32 1 - v210 = iconst.i32 -1 - v211 = iconst.i32 1 - v212 = iconst.i32 1 - v213 = iadd.i32 v198, v212 - v214 = iconst.i32 2 - v215 = icmp ult v213, v214 - v216 = bint.i32 v215 - brnz v216, block38(v2, v211, v209, v210, v208, v198, v213, v33, v14) - jump block129 - -block129: - v217 = iconst.i32 -1 - v218 = iconst.i32 0 - v219 = iconst.i32 1 - v220 = iconst.i32 1 - v221 = iconst.i32 1 - v222 = copy.i32 v44 - jump block22(v217, v221, v44, v220, v218, v219, v213, v222, v198, v33, v14) - -block22(v223: i32, v224: i32, v225: i32, v226: i32, v227: i32, v228: i32, v229: i32, v230: i32, v231: i32, v232: i32, v233: i32): - v234 = copy v228 - v235 = iadd v223, v224 - v236 = iadd.i32 v2, v235 - v237 = iconst.i32 0x4ffe - v238 = icmp uge v236, v237 - brz v238, block23 - jump block130 - -block130: - trap heap_oob - -block23: - v239 = uextend.i64 v236 - v240 = iadd_imm.i64 v0, -8 - v241 = load.i64 v240 - v242 = iadd v241, v239 - v243 = uload8.i32 v242 - v244 = iconst.i32 255 - v245 = band.i32 v225, v244 - v246 = icmp ne v243, v245 - v247 = bint.i32 v246 - brnz v247, block24 - jump block131 - -block131: - v248 = icmp.i32 ne v224, v226 - v249 = bint.i32 v248 - brnz v249, block25 - jump block132 - -block132: - v250 = iadd.i32 v227, v226 - v251 = iconst.i32 1 - jump block27(v251, v250, v223, v226) - -block24: - v252 = icmp.i32 ule v243, v245 - v253 = bint.i32 v252 - brnz v253, block26 - jump block133 - -block133: - v254 = isub.i32 v234, v223 - v255 = iconst.i32 1 - jump block27(v255, v234, v223, v254) - -block25: - v256 = iconst.i32 1 - v257 = iadd.i32 v224, v256 - v258 = copy.i32 v227 - jump block27(v257, v258, v223, v226) - -block26: - v259 = iconst.i32 1 - v260 = iconst.i32 1 - v261 = iadd.i32 v227, v260 - v262 = iconst.i32 1 - v263 = copy.i32 v227 - jump block27(v259, v261, v263, v262) - -block27(v264: i32, v265: i32, v266: i32, v267: i32): - v268 = iadd v264, v265 - v269 = icmp uge v268, v229 - v270 = bint.i32 v269 - brnz v270, block29 - jump block134 - -block134: - v271 = iadd.i32 v2, v268 - v272 = iconst.i32 0x4ffe - v273 = icmp uge v271, v272 - brz v273, block28 - jump block135 - -block135: - trap heap_oob - -block28: - v274 = uextend.i64 v271 - v275 = iadd_imm.i64 v0, -8 - v276 = load.i64 v275 - v277 = iadd v276, v274 - v278 = uload8.i32 v277 - v279 = copy.i32 v265 - jump block22(v266, v264, v278, v267, v279, v268, v229, v230, v231, v232, v233) - -block29: - jump block30 - -block30: - v280 = iconst.i32 -1 - v281 = iconst.i32 0 - v282 = iconst.i32 1 - v283 = iconst.i32 1 - v284 = iconst.i32 1 - jump block31(v280, v284, v230, v283, v281, v282, v229, v267, v266, v231, v232, v233) - -block31(v285: i32, v286: i32, v287: i32, v288: i32, v289: i32, v290: i32, v291: i32, v292: i32, v293: i32, v294: i32, v295: i32, v296: i32): - v297 = copy v290 - v298 = iadd v285, v286 - v299 = iadd.i32 v2, v298 - v300 = iconst.i32 0x4ffe - v301 = icmp uge v299, v300 - brz v301, block32 - jump block136 - -block136: - trap heap_oob - -block32: - v302 = uextend.i64 v299 - v303 = iadd_imm.i64 v0, -8 - v304 = load.i64 v303 - v305 = iadd v304, v302 - v306 = uload8.i32 v305 - v307 = iconst.i32 255 - v308 = band.i32 v287, v307 - v309 = icmp ne v306, v308 - v310 = bint.i32 v309 - brnz v310, block33 - jump block137 - -block137: - v311 = icmp.i32 ne v286, v288 - v312 = bint.i32 v311 - brnz v312, block34 - jump block138 - -block138: - v313 = iadd.i32 v289, v288 - v314 = iconst.i32 1 - jump block36(v314, v313, v285, v288) - -block33: - v315 = icmp.i32 uge v306, v308 - v316 = bint.i32 v315 - brnz v316, block35 - jump block139 - -block139: - v317 = isub.i32 v297, v285 - v318 = iconst.i32 1 - jump block36(v318, v297, v285, v317) - -block34: - v319 = iconst.i32 1 - v320 = iadd.i32 v286, v319 - v321 = copy.i32 v289 - jump block36(v320, v321, v285, v288) - -block35: - v322 = iconst.i32 1 - v323 = iconst.i32 1 - v324 = iadd.i32 v289, v323 - v325 = iconst.i32 1 - v326 = copy.i32 v289 - jump block36(v322, v324, v326, v325) - -block36(v327: i32, v328: i32, v329: i32, v330: i32): - v331 = iadd v327, v328 - v332 = icmp uge v331, v291 - v333 = bint.i32 v332 - brnz v333, block38(v2, v330, v292, v329, v293, v294, v291, v295, v296) - jump block140 - -block140: - v334 = iadd.i32 v2, v331 - v335 = iconst.i32 0x4ffe - v336 = icmp uge v334, v335 - brz v336, block37 - jump block141 - -block141: - trap heap_oob - -block37: - v337 = uextend.i64 v334 - v338 = iadd_imm.i64 v0, -8 - v339 = load.i64 v338 - v340 = iadd v339, v337 - v341 = uload8.i32 v340 - v342 = copy.i32 v328 - jump block31(v329, v327, v341, v330, v342, v331, v291, v292, v293, v294, v295, v296) - -block38(v343: i32, v344: i32, v345: i32, v346: i32, v347: i32, v348: i32, v349: i32, v350: i32, v351: i32): - v352 = iconst.i32 1 - v353 = iadd v346, v352 - v354 = iconst.i32 1 - v355 = iadd v347, v354 - v356 = icmp ugt v353, v355 - v357 = bint.i32 v356 - brnz v357, block39(v344) - jump block142 - -block142: - v358 = copy v345 - jump block39(v358) - -block39(v359: i32): - v360 = iadd.i32 v343, v359 - brnz.i32 v357, block40(v346) - jump block143 - -block143: - v361 = copy.i32 v347 - jump block40(v361) - -block40(v362: i32): - v363 = iconst.i32 1 - v364 = iadd v362, v363 - v365 = call fn1(v0, v343, v360, v364) - v366 = iconst.i32 0 - v367 = icmp eq v365, v366 - v368 = bint.i32 v367 - brnz v368, block63 - jump block144 - -block144: - v369 = iconst.i32 1 - v370 = iadd v362, v369 - v371 = isub.i32 v348, v370 - v372 = iconst.i32 1 - v373 = iadd v371, v372 - v374 = icmp ugt v362, v373 - v375 = bint.i32 v374 - v376 = copy v362 - brnz v375, block41(v376) - jump block145 - -block145: - v377 = copy v373 - jump block41(v377) - -block41(v378: i32): - v379 = iconst.i32 1 - v380 = iadd v378, v379 - v381 = iconst.i32 0 - jump block64(v380, v381) - -block42: - v382 = iconst.i32 8 - v383 = ishl.i32 v29, v382 - v384 = bor v383, v44 - v385 = iconst.i32 0x4ffe - v386 = icmp.i32 uge v33, v385 - brz v386, block43 - jump block146 - -block146: - trap heap_oob - -block43: - v387 = uextend.i64 v33 - v388 = iadd_imm.i64 v0, -8 - v389 = load.i64 v388 - v390 = iadd v389, v387 - v391 = uload8.i32 v390 - jump block44(v391, v54, v33) - -block44(v392: i32, v393: i32, v394: i32): - v395 = iconst.i32 8 - v396 = ishl v392, v395 - v397 = iconst.i32 0xff00 - v398 = band v396, v397 - v399 = iconst.i32 255 - v400 = band v393, v399 - v401 = bor v398, v400 - v402 = icmp eq v401, v384 - v403 = bint.i32 v402 - brnz v403, block56(v394, v14) - jump block147 - -block147: - v404 = iconst.i32 2 - v405 = iadd v394, v404 - v406 = iconst.i32 1 - v407 = iadd v394, v406 - v408 = iconst.i32 0x4ffe - v409 = icmp uge v405, v408 - brz v409, block45 - jump block148 - -block148: - trap heap_oob - -block45: - v410 = uextend.i64 v405 - v411 = iadd_imm.i64 v0, -8 - v412 = load.i64 v411 - v413 = iadd v412, v410 - v414 = uload8.i32 v413 - brnz v414, block44(v401, v414, v407) - jump block90(v14, v34) - -block46: - v415 = iconst.i32 8 - v416 = ishl.i32 v74, v415 - v417 = iconst.i32 16 - v418 = ishl.i32 v54, v417 - v419 = bor v416, v418 - v420 = iconst.i32 0x4ffe - v421 = icmp.i32 uge v33, v420 - brz v421, block47 - jump block149 - -block149: - trap heap_oob - -block47: - v422 = uextend.i64 v33 - v423 = iadd_imm.i64 v0, -8 - v424 = load.i64 v423 - v425 = iadd v424, v422 - v426 = uload8.i32 v425 - v427 = iconst.i32 24 - v428 = ishl v426, v427 - v429 = bor.i32 v419, v428 - v430 = iconst.i32 16 - v431 = ishl.i32 v44, v430 - v432 = iconst.i32 24 - v433 = ishl.i32 v29, v432 - v434 = bor v431, v433 - v435 = iconst.i32 8 - v436 = ishl.i32 v64, v435 - v437 = bor v434, v436 - v438 = icmp eq v429, v437 - v439 = bint.i32 v438 - brnz v439, block56(v33, v14) - jump block48(v33, v429) - -block48(v440: i32, v441: i32): - v442 = iconst.i32 1 - v443 = iadd v440, v442 - v444 = iconst.i32 3 - v445 = iadd v440, v444 - v446 = iconst.i32 0x4ffe - v447 = icmp uge v445, v446 - brz v447, block49 - jump block150 - -block150: - trap heap_oob - -block49: - v448 = uextend.i64 v445 - v449 = iadd_imm.i64 v0, -8 - v450 = load.i64 v449 - v451 = iadd v450, v448 - v452 = uload8.i32 v451 - v453 = iconst.i32 0 - v454 = icmp eq v452, v453 - v455 = bint.i32 v454 - brnz v455, block51(v14) - jump block151 - -block151: - v456 = bor.i32 v441, v452 - v457 = iconst.i32 8 - v458 = ishl v456, v457 - v459 = icmp ne v458, v437 - v460 = bint.i32 v459 - v461 = copy.i32 v443 - brnz v460, block48(v461, v458) - jump block50 - -block50: - jump block51(v14) - -block51(v462: i32): - v463 = iconst.i32 0 - v464 = iconst.i32 1056 - v465 = iadd v462, v464 - v466 = iconst.i32 0x4ffe - v467 = icmp uge v463, v466 - brz v467, block52 - jump block152 - -block152: - trap heap_oob - -block52: - v468 = uextend.i64 v463 - v469 = iadd_imm.i64 v0, -8 - v470 = load.i64 v469 - v471 = iadd v470, v468 - store.i32 v465, v471+4 - v472 = iconst.i32 0 - brnz.i32 v452, block53(v443) - jump block153 - -block153: - v473 = copy v472 - jump block53(v473) - -block53(v474: i32): - return v474 - -block54: - v475 = iconst.i32 8 - v476 = ishl.i32 v74, v475 - v477 = iconst.i32 16 - v478 = ishl.i32 v54, v477 - v479 = bor v476, v478 - v480 = bor v479, v94 - v481 = iconst.i32 0x4ffe - v482 = icmp.i32 uge v33, v481 - brz v482, block55 - jump block154 - -block154: - trap heap_oob - -block55: - v483 = uextend.i64 v33 - v484 = iadd_imm.i64 v0, -8 - v485 = load.i64 v484 - v486 = iadd v485, v483 - v487 = uload8.i32 v486 - v488 = iconst.i32 24 - v489 = ishl v487, v488 - v490 = bor.i32 v480, v489 - v491 = iconst.i32 16 - v492 = ishl.i32 v44, v491 - v493 = iconst.i32 24 - v494 = ishl.i32 v29, v493 - v495 = bor v492, v494 - v496 = iconst.i32 8 - v497 = ishl.i32 v64, v496 - v498 = bor v495, v497 - v499 = bor v498, v84 - v500 = icmp ne v490, v499 - v501 = bint.i32 v500 - brnz v501, block57 - jump block56(v33, v14) - -block56(v502: i32, v503: i32): - v504 = copy v502 - jump block90(v503, v504) - -block57: - jump block58(v33, v490) - -block58(v505: i32, v506: i32): - v507 = iconst.i32 4 - v508 = iadd v505, v507 - v509 = iconst.i32 1 - v510 = iadd v505, v509 - v511 = iconst.i32 0x4ffe - v512 = icmp uge v508, v511 - brz v512, block59 - jump block155 - -block155: - trap heap_oob - -block59: - v513 = uextend.i64 v508 - v514 = iadd_imm.i64 v0, -8 - v515 = load.i64 v514 - v516 = iadd v515, v513 - v517 = uload8.i32 v516 - v518 = iconst.i32 0 - v519 = icmp eq v517, v518 - v520 = bint.i32 v519 - brnz v520, block61(v14) - jump block156 - -block156: - v521 = iconst.i32 8 - v522 = ishl.i32 v506, v521 - v523 = bor v522, v517 - v524 = icmp ne v523, v499 - v525 = bint.i32 v524 - brnz v525, block58(v510, v523) - jump block60 - -block60: - jump block61(v14) - -block61(v526: i32): - v527 = iconst.i32 0 - brnz.i32 v517, block62(v510) - jump block157 - -block157: - v528 = copy v527 - jump block62(v528) - -block62(v529: i32): - v530 = copy v529 - jump block90(v526, v530) - -block63: - v531 = isub.i32 v348, v359 - v532 = iconst.i32 1 - v533 = iadd v531, v532 - jump block64(v359, v533) - -block64(v534: i32, v535: i32): - v536 = iconst.i32 1 - v537 = iadd.i32 v343, v536 - v538 = iconst.i32 0 - v539 = isub v538, v362 - v540 = iconst.i32 63 - v541 = bor.i32 v349, v540 - v542 = isub.i32 v348, v534 - v543 = iconst.i32 1 - v544 = iadd v542, v543 - v545 = iconst.i32 0 - v546 = copy.i32 v350 - jump block65(v350, v546, v349, v541, v348, v351, v544, v534, v545, v535, v343, v364, v537, v539, v362) - -block65(v547: i32, v548: i32, v549: i32, v550: i32, v551: i32, v552: i32, v553: i32, v554: i32, v555: i32, v556: i32, v557: i32, v558: i32, v559: i32, v560: i32, v561: i32): - v562 = copy v556 - v563 = isub v547, v548 - v564 = icmp uge v563, v549 - v565 = bint.i32 v564 - brnz v565, block67(v547) - jump block158 - -block158: - v566 = iconst.i32 0 - v567 = call fn2(v0, v547, v566, v550) - brnz v567, block66 - jump block159 - -block159: - v568 = iadd v547, v550 - jump block67(v568) - -block66: - v569 = isub.i32 v567, v548 - v570 = icmp ult v569, v549 - v571 = bint.i32 v570 - brnz v571, block89(v552) - jump block160 - -block160: - v572 = copy.i32 v567 - jump block67(v572) - -block67(v573: i32): - v574 = iconst.i32 1 - v575 = iadd.i32 v548, v551 - v576 = iconst.i32 0x4ffe - v577 = icmp uge v575, v576 - brz v577, block68 - jump block161 - -block161: - trap heap_oob - -block68: - v578 = uextend.i64 v575 - v579 = iadd_imm.i64 v0, -8 - v580 = load.i64 v579 - v581 = iadd v580, v578 - v582 = uload8.i32 v581 - v583 = iconst.i32 31 - v584 = band v582, v583 - v585 = ishl.i32 v574, v584 - v586 = iconst.i32 1024 - v587 = iadd.i32 v552, v586 - v588 = iconst.i32 3 - v589 = ushr v582, v588 - v590 = iconst.i32 28 - v591 = band v589, v590 - v592 = iadd v587, v591 - v593 = iconst.i32 0x4ffe - v594 = icmp uge v592, v593 - brz v594, block69 - jump block162 - -block162: - trap heap_oob - -block69: - v595 = uextend.i64 v592 - v596 = iadd_imm.i64 v0, -8 - v597 = load.i64 v596 - v598 = iadd v597, v595 - v599 = load.i32 v598 - v600 = band.i32 v585, v599 - v601 = iconst.i32 0 - v602 = icmp eq v600, v601 - v603 = bint.i32 v602 - brnz v603, block74 - jump block163 - -block163: - v604 = iconst.i32 2 - v605 = ishl.i32 v582, v604 - v606 = iadd.i32 v552, v605 - v607 = iconst.i32 0x4ffe - v608 = icmp uge v606, v607 - brz v608, block70 - jump block164 - -block164: - trap heap_oob - -block70: - v609 = uextend.i64 v606 - v610 = iadd_imm.i64 v0, -8 - v611 = load.i64 v610 - v612 = iadd v611, v609 - v613 = load.i32 v612 - v614 = isub.i32 v551, v613 - v615 = iconst.i32 -1 - v616 = icmp eq v614, v615 - v617 = bint.i32 v616 - brnz v617, block75 - jump block165 - -block165: - v618 = iconst.i32 1 - v619 = iadd v614, v618 - v620 = icmp ult v619, v554 - v621 = bint.i32 v620 - v622 = copy.i32 v553 - brnz v621, block71(v622) - jump block166 - -block166: - v623 = copy v619 - jump block71(v623) - -block71(v624: i32): - v625 = copy v624 - brnz.i32 v555, block72(v625) - jump block72(v619) - -block72(v626: i32): - brnz.i32 v562, block73(v626) - jump block73(v619) - -block73(v627: i32): - v628 = copy.i32 v554 - v629 = copy.i32 v562 - jump block87(v548, v627, v573, v549, v550, v551, v552, v553, v628, v629, v557, v558, v559, v560, v561) - -block74: - v630 = copy.i32 v549 - v631 = copy.i32 v554 - v632 = copy.i32 v562 - jump block87(v548, v630, v573, v549, v550, v551, v552, v553, v631, v632, v557, v558, v559, v560, v561) - -block75: - v633 = icmp.i32 ugt v558, v555 - v634 = bint.i32 v633 - v635 = copy.i32 v558 - brnz v634, block76(v635) - jump block167 - -block167: - v636 = copy.i32 v555 - jump block76(v636) - -block76(v637: i32): - v638 = iadd.i32 v557, v637 - v639 = iconst.i32 0x4ffe - v640 = icmp uge v638, v639 - brz v640, block77 - jump block168 - -block168: - trap heap_oob - -block77: - v641 = uextend.i64 v638 - v642 = iadd_imm.i64 v0, -8 - v643 = load.i64 v642 - v644 = iadd v643, v641 - v645 = uload8.i32 v644 - v646 = iconst.i32 0 - v647 = icmp eq v645, v646 - v648 = bint.i32 v647 - brnz v648, block82(v548, v549, v551, v552) - jump block169 - -block169: - v649 = iadd.i32 v548, v637 - v650 = iadd.i32 v559, v637 - v651 = iadd.i32 v560, v637 - jump block78(v645, v649, v651, v650) - -block78(v652: i32, v653: i32, v654: i32, v655: i32): - v656 = iconst.i32 255 - v657 = band v652, v656 - v658 = iconst.i32 0x4ffe - v659 = icmp uge v653, v658 - brz v659, block79 - jump block170 - -block170: - trap heap_oob - -block79: - v660 = uextend.i64 v653 - v661 = iadd_imm.i64 v0, -8 - v662 = load.i64 v661 - v663 = iadd v662, v660 - v664 = uload8.i32 v663 - v665 = icmp.i32 ne v657, v664 - v666 = bint.i32 v665 - v667 = copy.i32 v554 - v668 = copy.i32 v562 - brnz v666, block87(v548, v654, v573, v549, v550, v551, v552, v553, v667, v668, v557, v558, v559, v560, v561) - jump block171 - -block171: - v669 = iconst.i32 1 - v670 = iadd.i32 v653, v669 - v671 = iconst.i32 1 - v672 = iadd.i32 v654, v671 - v673 = iconst.i32 0x4ffe - v674 = icmp.i32 uge v655, v673 - brz v674, block80 - jump block172 - -block172: - trap heap_oob - -block80: - v675 = uextend.i64 v655 - v676 = iadd_imm.i64 v0, -8 - v677 = load.i64 v676 - v678 = iadd v677, v675 - v679 = uload8.i32 v678 - v680 = iconst.i32 1 - v681 = iadd.i32 v655, v680 - brnz v679, block78(v679, v670, v672, v681) - jump block81 - -block81: - jump block82(v548, v549, v551, v552) - -block82(v682: i32, v683: i32, v684: i32, v685: i32): - v686 = icmp.i32 ule v558, v555 - v687 = bint.i32 v686 - brnz v687, block90(v685, v682) - jump block173 - -block173: - v688 = copy.i32 v561 - jump block83(v688) - -block83(v689: i32): - v690 = iadd.i32 v557, v689 - v691 = iconst.i32 0x4ffe - v692 = icmp uge v690, v691 - brz v692, block84 - jump block174 - -block174: - trap heap_oob - -block84: - v693 = uextend.i64 v690 - v694 = iadd_imm.i64 v0, -8 - v695 = load.i64 v694 - v696 = iadd v695, v693 - v697 = uload8.i32 v696 - v698 = iadd.i32 v682, v689 - v699 = iconst.i32 0x4ffe - v700 = icmp uge v698, v699 - brz v700, block85 - jump block175 - -block175: - trap heap_oob - -block85: - v701 = uextend.i64 v698 - v702 = iadd_imm.i64 v0, -8 - v703 = load.i64 v702 - v704 = iadd v703, v701 - v705 = uload8.i32 v704 - v706 = icmp.i32 ne v697, v705 - v707 = bint.i32 v706 - brnz v707, block86 - jump block176 - -block176: - v708 = icmp.i32 ule v689, v555 - v709 = bint.i32 v708 - v710 = iconst.i32 -1 - v711 = iadd.i32 v689, v710 - v712 = iconst.i32 0 - v713 = icmp eq v709, v712 - v714 = bint.i32 v713 - brnz v714, block83(v711) - jump block90(v685, v682) - -block86: - v715 = copy.i32 v554 - v716 = copy.i32 v562 - jump block88(v682, v554, v573, v683, v550, v684, v685, v553, v715, v562, v716, v557, v558, v559, v560, v561) - -block87(v717: i32, v718: i32, v719: i32, v720: i32, v721: i32, v722: i32, v723: i32, v724: i32, v725: i32, v726: i32, v727: i32, v728: i32, v729: i32, v730: i32, v731: i32): - v732 = copy v718 - v733 = iconst.i32 0 - jump block88(v717, v732, v719, v720, v721, v722, v723, v724, v725, v733, v726, v727, v728, v729, v730, v731) - -block88(v734: i32, v735: i32, v736: i32, v737: i32, v738: i32, v739: i32, v740: i32, v741: i32, v742: i32, v743: i32, v744: i32, v745: i32, v746: i32, v747: i32, v748: i32, v749: i32): - v750 = iadd v734, v735 - v751 = copy v742 - v752 = copy v743 - v753 = copy v744 - jump block65(v736, v750, v737, v738, v739, v740, v741, v751, v752, v753, v745, v746, v747, v748, v749) - -block89(v754: i32): - v755 = iconst.i32 0 - jump block90(v754, v755) - -block90(v756: i32, v757: i32): - v758 = iconst.i32 0 - v759 = iconst.i32 1056 - v760 = iadd v756, v759 - v761 = iconst.i32 0x4ffe - v762 = icmp uge v758, v761 - brz v762, block91 - jump block177 - -block177: - trap heap_oob - -block91: - v763 = uextend.i64 v758 - v764 = iadd_imm.i64 v0, -8 - v765 = load.i64 v764 - v766 = iadd v765, v763 - store.i32 v760, v766+4 - jump block92(v757) - -block92(v767: i32): - return v767 -} - -; Same problem from musl.wasm. -function %musl(f64 [%xmm0], i64 vmctx [%rdi]) -> f64 [%xmm0] system_v { - gv0 = vmctx - heap0 = static gv0, min 0, bound 0x0001_0000_0000, offset_guard 0x8000_0000 - sig0 = (f64 [%xmm0], i32 [%rdi], i64 vmctx [%rsi]) -> f64 [%xmm0] system_v - fn0 = u0:517 sig0 - -block0(v0: f64, v1: i64): - v3 = iconst.i64 0 - v4 = iconst.i32 0 - v131 = iconst.i64 0 - v5 = bitcast.f64 v131 - v6 = iconst.i32 0 - v7 = iconst.i32 0 - v8 = iconst.i32 0 - v132 = uextend.i64 v8 - v133 = iadd_imm v1, 0 - v134 = load.i64 v133 - v9 = iadd v134, v132 - v10 = load.i32 v9+4 - v11 = iconst.i32 16 - v12 = isub v10, v11 - v135 = uextend.i64 v7 - v136 = iadd_imm v1, 0 - v137 = load.i64 v136 - v13 = iadd v137, v135 - store v12, v13+4 - v14 = bitcast.i64 v0 - v15 = iconst.i64 63 - v16 = ushr v14, v15 - v17 = ireduce.i32 v16 - v18 = iconst.i64 32 - v19 = ushr v14, v18 - v20 = ireduce.i32 v19 - v21 = iconst.i32 0x7fff_ffff - v22 = band v20, v21 - v23 = iconst.i32 0x4086_232b - v24 = icmp ult v22, v23 - v25 = bint.i32 v24 - brnz v25, block10 - jump block178 - -block178: - v26 = iconst.i64 0x7fff_ffff_ffff_ffff - v27 = band v14, v26 - v28 = iconst.i64 0x7ff0_0000_0000_0000 - v29 = icmp ule v27, v28 - v30 = bint.i32 v29 - brnz v30, block9 - jump block2(v12, v0) - -block10: - v31 = iconst.i32 0x3fd6_2e43 - v32 = icmp.i32 ult v22, v31 - v33 = bint.i32 v32 - brnz v33, block8 - jump block179 - -block179: - v34 = iconst.i32 0x3ff0_a2b2 - v35 = icmp.i32 uge v22, v34 - v36 = bint.i32 v35 - brnz v36, block6 - jump block180 - -block180: - v37 = iconst.i32 1 - v38 = bxor.i32 v17, v37 - v39 = isub v38, v17 - jump block5(v0, v39) - -block9: - v138 = iconst.i64 0x4086_2e42_fefa_39ef - v40 = bitcast.f64 v138 - v41 = fcmp ge v40, v0 - v42 = bint.i32 v41 - v139 = fcmp.f64 uno v0, v0 - v140 = fcmp.f64 one v0, v0 - v43 = bor v139, v140 - v44 = bint.i32 v43 - v45 = bor v42, v44 - brnz v45, block7 - jump block181 - -block181: - v141 = iconst.i64 0x7fe0_0000_0000_0000 - v46 = bitcast.f64 v141 - v47 = fmul.f64 v0, v46 - jump block2(v12, v47) - -block8: - v48 = iconst.i32 0x3e30_0000 - v49 = icmp.i32 ule v22, v48 - v50 = bint.i32 v49 - brnz v50, block3 - jump block182 - -block182: - v51 = iconst.i32 0 - v142 = iconst.i64 0 - v52 = bitcast.f64 v142 - v178 = copy.f64 v0 - jump block4(v0, v178, v52, v51) - -block7: - v143 = iconst.i64 0xc086_232b_dd7a_bcd2 - v53 = bitcast.f64 v143 - v54 = fcmp.f64 ge v0, v53 - v55 = bint.i32 v54 - v56 = bor v55, v44 - brnz v56, block6 - jump block183 - -block183: - v144 = iconst.i64 0xb6a0_0000_0000_0000 - v57 = bitcast.f64 v144 - v58 = fdiv v57, v0 - v59 = fdemote.f32 v58 - v145 = uextend.i64 v12 - v146 = iadd_imm.i64 v1, 0 - v147 = load.i64 v146 - v60 = iadd v147, v145 - store v59, v60+12 - v148 = iconst.i64 0 - v61 = bitcast.f64 v148 - v149 = iconst.i64 0xc087_4910_d52d_3051 - v62 = bitcast.f64 v149 - v63 = fcmp gt v62, v0 - v64 = bint.i32 v63 - brnz v64, block2(v12, v61) - jump block6 - -block6: - v150 = iconst.i64 0x3ff7_1547_652b_82fe - v66 = bitcast.f64 v150 - v67 = fmul.f64 v0, v66 - v69 = iconst.i32 3 - v70 = ishl.i32 v17, v69 - v71 = iconst.i32 5040 - v72 = iadd v70, v71 - v151 = uextend.i64 v72 - v152 = iadd_imm.i64 v1, 0 - v153 = load.i64 v152 - v73 = iadd v153, v151 - v74 = load.f64 v73 - v75 = fadd v67, v74 - v76 = x86_cvtt2si.i32 v75 - v158 = iconst.i32 0x8000_0000 - v154 = icmp ne v76, v158 - brnz v154, block11 - jump block184 - -block184: - v155 = fcmp uno v75, v75 - brz v155, block12 - jump block185 - -block185: - trap bad_toint - -block12: - v159 = iconst.i64 0xc1e0_0000_0020_0000 - v156 = bitcast.f64 v159 - v157 = fcmp ge v156, v75 - brz v157, block13 - jump block186 - -block186: - trap int_ovf - -block13: - jump block11 - -block11: - jump block5(v0, v76) - -block5(v77: f64, v78: i32): - v79 = fcvt_from_sint.f64 v78 - v160 = iconst.i64 0xbfe6_2e42_fee0_0000 - v80 = bitcast.f64 v160 - v81 = fmul v79, v80 - v82 = fadd v77, v81 - v161 = iconst.i64 0x3dea_39ef_3579_3c76 - v83 = bitcast.f64 v161 - v84 = fmul v79, v83 - v85 = fsub v82, v84 - jump block4(v82, v85, v84, v78) - -block4(v86: f64, v87: f64, v108: f64, v113: i32): - v88 = fmul v87, v87 - v162 = iconst.i64 0x3e66_3769_72be_a4d0 - v89 = bitcast.f64 v162 - v90 = fmul v88, v89 - v163 = iconst.i64 0xbeeb_bd41_c5d2_6bf1 - v91 = bitcast.f64 v163 - v92 = fadd v90, v91 - v93 = fmul v88, v92 - v164 = iconst.i64 0x3f11_566a_af25_de2c - v94 = bitcast.f64 v164 - v95 = fadd v93, v94 - v96 = fmul v88, v95 - v165 = iconst.i64 0xbf66_c16c_16be_bd93 - v97 = bitcast.f64 v165 - v98 = fadd v96, v97 - v99 = fmul v88, v98 - v166 = iconst.i64 0x3fc5_5555_5555_553e - v100 = bitcast.f64 v166 - v101 = fadd v99, v100 - v102 = fmul v88, v101 - v103 = fsub v87, v102 - v104 = fmul v87, v103 - v167 = iconst.i64 0x4000_0000_0000_0000 - v105 = bitcast.f64 v167 - v106 = fsub v105, v103 - v107 = fdiv v104, v106 - v109 = fsub v107, v108 - v110 = fadd v86, v109 - v168 = iconst.i64 0x3ff0_0000_0000_0000 - v111 = bitcast.f64 v168 - v112 = fadd v110, v111 - v169 = iconst.i32 0 - v114 = icmp eq v113, v169 - v115 = bint.i32 v114 - brnz v115, block2(v12, v112) - jump block187 - -block187: - v116 = call fn0(v112, v113, v1) - jump block2(v12, v116) - -block3: - v170 = iconst.i64 0x7fe0_0000_0000_0000 - v117 = bitcast.f64 v170 - v118 = fadd.f64 v0, v117 - v171 = uextend.i64 v12 - v172 = iadd_imm.i64 v1, 0 - v173 = load.i64 v172 - v119 = iadd v173, v171 - store v118, v119 - v174 = iconst.i64 0x3ff0_0000_0000_0000 - v120 = bitcast.f64 v174 - v121 = fadd.f64 v0, v120 - jump block2(v12, v121) - -block2(v123: i32, v130: f64): - v122 = iconst.i32 0 - v127 = iconst.i32 16 - v128 = iadd v123, v127 - v175 = uextend.i64 v122 - v176 = iadd_imm.i64 v1, 0 - v177 = load.i64 v176 - v129 = iadd v177, v175 - store v128, v129+4 - jump block1(v130) - -block1(v2: f64): - return v2 -} diff --git a/cranelift/filetests/filetests/regalloc/coalescing-216.clif b/cranelift/filetests/filetests/regalloc/coalescing-216.clif deleted file mode 100644 index 4c9b27d6b0..0000000000 --- a/cranelift/filetests/filetests/regalloc/coalescing-216.clif +++ /dev/null @@ -1,87 +0,0 @@ -test regalloc -target x86_64 legacy haswell - -; Reported as https://github.com/bytecodealliance/cranelift/issues/216 from the Binaryen fuzzer. -; -; The (old) coalescer creates a virtual register with two identical values. -function %pr216(i32 [%rdi], i64 vmctx [%rsi]) -> i64 [%rax] system_v { -block0(v0: i32, v1: i64): - v3 = iconst.i64 0 - v5 = iconst.i32 0 - brz v5, block3(v3) - jump block4(v3, v3) - -block4(v11: i64, v29: i64): - v6 = iconst.i32 0 - brz v6, block14 - jump block15 - -block15: - v9 = iconst.i32 -17 - v12 = iconst.i32 0xffff_ffff_ffff_8000 - jump block9(v12) - -block9(v10: i32): - brnz v10, block8(v9, v11, v11) - jump block16 - -block16: - brz.i32 v9, block13 - jump block17 - -block17: - v13 = iconst.i32 0 - brnz v13, block6(v11, v11) - jump block18 - -block18: - v14 = iconst.i32 0 - brz v14, block12 - jump block11 - -block12: - jump block4(v11, v11) - -block11: - jump block10(v11) - -block13: - v15 = iconst.i64 1 - jump block10(v15) - -block10(v21: i64): - v16 = iconst.i32 0 - brnz v16, block6(v21, v11) - jump block19 - -block19: - v17 = iconst.i32 0xffff_ffff_ffff_9f35 - jump block8(v17, v21, v11) - -block8(v8: i32, v23: i64, v28: i64): - jump block7(v8, v23, v28) - -block14: - v18 = iconst.i32 0 - jump block7(v18, v11, v29) - -block7(v7: i32, v22: i64, v27: i64): - jump block6(v22, v27) - -block6(v20: i64, v25: i64): - v19 = iconst.i32 0xffc7 - brnz v19, block4(v20, v25) - jump block5 - -block5: - jump block3(v25) - -block3(v24: i64): - jump block2(v24) - -block2(v4: i64): - jump block1(v4) - -block1(v2: i64): - return v2 -} diff --git a/cranelift/filetests/filetests/regalloc/coloring-227.clif b/cranelift/filetests/filetests/regalloc/coloring-227.clif deleted file mode 100644 index d47a905637..0000000000 --- a/cranelift/filetests/filetests/regalloc/coloring-227.clif +++ /dev/null @@ -1,115 +0,0 @@ -test regalloc -target x86_64 legacy haswell - -function %pr227(i32 [%rdi], i32 [%rsi], i32 [%rdx], i32 [%rcx], i64 vmctx [%r8]) system_v { - gv0 = vmctx - heap0 = static gv0, min 0, bound 0x0001_0000_0000, offset_guard 0x8000_0000 - - block0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i64): -[RexOp1pu_id#b8] v5 = iconst.i32 0 -[RexOp1pu_id#b8] v6 = iconst.i32 0 -[RexOp1tjccb#74] brz v6, block10 -[Op1jmpb#eb] jump block3(v5, v5, v5, v5, v5, v5, v0, v1, v2, v3) - - block3(v15: i32, v17: i32, v25: i32, v31: i32, v40: i32, v47: i32, v54: i32, v61: i32, v68: i32, v75: i32): -[Op1jmpb#eb] jump block6 - - block6: -[RexOp1pu_id#b8] v8 = iconst.i32 0 -[RexOp1tjccb#75] brnz v8, block5 -[Op1jmpb#eb] jump block20 - - block20: -[RexOp1pu_id#b8] v9 = iconst.i32 0 -[RexOp1pu_id#b8] v11 = iconst.i32 0 -[DynRexOp1icscc#39] v12 = icmp.i32 eq v15, v11 -[RexOp2urm_noflags#4b6] v13 = bint.i32 v12 -[DynRexOp1rr#21] v14 = band v9, v13 -[RexOp1tjccb#75] brnz v14, block6 -[Op1jmpb#eb] jump block7 - - block7: -[RexOp1tjccb#74] brz.i32 v17, block8 -[Op1jmpb#eb] jump block17 - - block17: -[RexOp1pu_id#b8] v18 = iconst.i32 0 -[RexOp1tjccb#74] brz v18, block9 -[Op1jmpb#eb] jump block16 - - block16: -[RexOp1pu_id#b8] v21 = iconst.i32 0 -[RexOp1umr#89] v79 = uextend.i64 v5 -[RexOp1r_ib#8083] v80 = iadd_imm.i64 v4, 0 -[RexOp1ld#808b] v81 = load.i64 v80 -[RexOp1rr#8001] v22 = iadd v81, v79 -[RexMp1st#189] istore16 v21, v22 -[Op1jmpb#eb] jump block9 - - block9: -[Op1jmpb#eb] jump block8 - - block8: -[RexOp1pu_id#b8] v27 = iconst.i32 3 -[RexOp1pu_id#b8] v28 = iconst.i32 4 -[DynRexOp1rr#09] v35 = bor.i32 v31, v13 -[RexOp1tjccb#75] brnz v35, block15(v27) -[Op1jmpb#eb] jump block15(v28) - - block15(v36: i32): -[Op1jmpb#eb] jump block3(v25, v36, v25, v31, v40, v47, v54, v61, v68, v75) - - block5: -[Op1jmpb#eb] jump block4 - - block4: -[Op1jmpb#eb] jump block2(v40, v47, v54, v61, v68, v75) - - block10: -[RexOp1pu_id#b8] v43 = iconst.i32 0 -[Op1jmpb#eb] jump block2(v43, v5, v0, v1, v2, v3) - - block2(v7: i32, v45: i32, v52: i32, v59: i32, v66: i32, v73: i32): -[RexOp1pu_id#b8] v44 = iconst.i32 0 -[RexOp1tjccb#74] brz v44, block12 -[Op1jmpb#eb] jump block18 - - block18: -[RexOp1pu_id#b8] v50 = iconst.i32 11 -[RexOp1tjccb#74] brz v50, block14 -[Op1jmpb#eb] jump block19 - - block19: -[RexOp1umr#89] v82 = uextend.i64 v52 -[RexOp1r_ib#8083] v83 = iadd_imm.i64 v4, 0 -[RexOp1ld#808b] v84 = load.i64 v83 -[RexOp1rr#8001] v57 = iadd v84, v82 -[RexOp1ld#8b] v58 = load.i32 v57 -[RexOp1umr#89] v85 = uextend.i64 v58 -[RexOp1r_ib#8083] v86 = iadd_imm.i64 v4, 0 -[RexOp1ld#808b] v87 = load.i64 v86 -[RexOp1rr#8001] v64 = iadd v87, v85 -[RexOp1st#88] istore8 v59, v64 -[RexOp1pu_id#b8] v65 = iconst.i32 0 -[Op1jmpb#eb] jump block13(v65) - - block14: -[Op1jmpb#eb] jump block13(v66) - - block13(v51: i32): -[RexOp1umr#89] v88 = uextend.i64 v45 -[RexOp1r_ib#8083] v89 = iadd_imm.i64 v4, 0 -[RexOp1ld#808b] v90 = load.i64 v89 -[RexOp1rr#8001] v71 = iadd v90, v88 -[RexOp1st#89] store v51, v71 -[Op1jmpb#eb] jump block12 - - block12: -[Op1jmpb#eb] jump block11 - - block11: -[Op1jmpb#eb] jump block1 - - block1: -[Op1ret#c3] return -} diff --git a/cranelift/filetests/filetests/regalloc/constraints.clif b/cranelift/filetests/filetests/regalloc/constraints.clif deleted file mode 100644 index 60cd731ed8..0000000000 --- a/cranelift/filetests/filetests/regalloc/constraints.clif +++ /dev/null @@ -1,82 +0,0 @@ -test regalloc -target i686 - -; regex: V=v\d+ -; regex: REG=%r([abcd]x|[sd]i) - -; Tied operands, both are killed at instruction. -function %tied_easy() -> i32 { -block0: - v0 = iconst.i32 12 - v1 = iconst.i32 13 - ; not: copy - ; check: isub - v2 = isub v0, v1 - return v2 -} - -; Tied operand is live after instruction. -function %tied_alive() -> i32 { -block0: - v0 = iconst.i32 12 - v1 = iconst.i32 13 - ; check: $(v0c=$V) = copy v0 - ; check: v2 = isub $v0c, v1 - v2 = isub v0, v1 - ; check: v3 = iadd v2, v0 - v3 = iadd v2, v0 - return v3 -} - -; Fixed register constraint. -function %fixed_op() -> i32 { -block0: - ; check: ,%rax] - ; sameln: v0 = iconst.i32 12 - v0 = iconst.i32 12 - v1 = iconst.i32 13 - ; The dynamic shift amount must be in %rcx - ; check: regmove v0, %rax -> %rcx - v2 = ishl v1, v0 - return v2 -} - -; Fixed register constraint twice. -function %fixed_op_twice() -> i32 { -block0: - ; check: ,%rax] - ; sameln: v0 = iconst.i32 12 - v0 = iconst.i32 12 - v1 = iconst.i32 13 - ; The dynamic shift amount must be in %rcx - ; check: regmove v0, %rax -> %rcx - v2 = ishl v1, v0 - ; check: regmove v0, %rcx -> $REG - ; check: regmove v2, $REG -> %rcx - v3 = ishl v0, v2 - - return v3 -} - -; Tied use of a diverted register. -function %fixed_op_twice() -> i32 { -block0: - ; check: ,%rax] - ; sameln: v0 = iconst.i32 12 - v0 = iconst.i32 12 - v1 = iconst.i32 13 - ; The dynamic shift amount must be in %rcx - ; check: regmove v0, %rax -> %rcx - ; check: v2 = ishl v1, v0 - v2 = ishl v1, v0 - - ; Now v0 is globally allocated to %rax, but diverted to %rcx. - ; Check that the tied def gets the diverted register. - v3 = isub v0, v2 - ; not: regmove - ; check: ,%rcx] - ; sameln: isub - ; Move it into place for the return value. - ; check: regmove v3, %rcx -> %rax - return v3 -} diff --git a/cranelift/filetests/filetests/regalloc/fallthrough-return.clif b/cranelift/filetests/filetests/regalloc/fallthrough-return.clif deleted file mode 100644 index 90650aa4f0..0000000000 --- a/cranelift/filetests/filetests/regalloc/fallthrough-return.clif +++ /dev/null @@ -1,23 +0,0 @@ -test regalloc -target x86_64 legacy - -; Test that fallthrough returns are visited by reload and coloring. - -function %foo() -> f64 { - fn0 = %bar() - -block0: - v0 = f64const 0.0 - call fn0() - fallthrough_return v0 -} -; check: fill v0 - -function %foo() -> f64 { - fn0 = %bar() -> f64, f64 - -block0: - v0, v1 = call fn0() - fallthrough_return v1 -} -; check: regmove v1, %xmm1 -> %xmm0 diff --git a/cranelift/filetests/filetests/regalloc/ghost-param.clif b/cranelift/filetests/filetests/regalloc/ghost-param.clif deleted file mode 100644 index 1d569727dd..0000000000 --- a/cranelift/filetests/filetests/regalloc/ghost-param.clif +++ /dev/null @@ -1,45 +0,0 @@ -test regalloc -target x86_64 legacy haswell - -; This test case would create a block parameter that was a ghost value. -; The coalescer would insert a copy of the ghost value, leading to verifier errors. -; -; We don't allow block parameters to be ghost values any longer. -; -; Test case by binaryen fuzzer! - -function %pr215(i64 vmctx [%rdi]) system_v { -block0(v0: i64): - v10 = iconst.i64 0 - v1 = bitcast.f64 v10 - jump block5(v1) - -block5(v9: f64): - v11 = iconst.i64 0xffff_ffff_ff9a_421a - v4 = bitcast.f64 v11 - v6 = iconst.i32 0 - v7 = iconst.i32 1 - brnz v7, block4(v6) - jump block8 - -block8: - v8 = iconst.i32 0 - jump block7(v8) - -block7(v5: i32): - brnz v5, block3(v4) - jump block5(v4) - -block4(v3: i32): - brnz v3, block2 - jump block3(v9) - -block3(v2: f64): - jump block2 - -block2: - jump block1 - -block1: - return -} diff --git a/cranelift/filetests/filetests/regalloc/global-constraints.clif b/cranelift/filetests/filetests/regalloc/global-constraints.clif deleted file mode 100644 index 1fe89ae823..0000000000 --- a/cranelift/filetests/filetests/regalloc/global-constraints.clif +++ /dev/null @@ -1,30 +0,0 @@ -test regalloc -target i686 legacy - -; This test covers the troubles when values with global live ranges are defined -; by instructions with constrained register classes. -; -; The icmp_imm instrutions write their b1 result to the ABCD register class on -; 32-bit x86. So if we define 5 live values, they can't all fit. -function %global_constraints(i32) { -block0(v0: i32): - v1 = icmp_imm eq v0, 1 - v2 = icmp_imm ugt v0, 2 - v3 = icmp_imm sle v0, 3 - v4 = icmp_imm ne v0, 4 - v5 = icmp_imm sge v0, 5 - brnz v5, block1 - jump block2 - -block2: - return - -block1: - ; Make sure v1-v5 are live in. - v10 = band v1, v2 - v11 = bor v3, v4 - v12 = bor v10, v11 - v13 = bor v12, v5 - trapnz v13, user0 - return -} diff --git a/cranelift/filetests/filetests/regalloc/global-fixed.clif b/cranelift/filetests/filetests/regalloc/global-fixed.clif deleted file mode 100644 index 6d31f7511a..0000000000 --- a/cranelift/filetests/filetests/regalloc/global-fixed.clif +++ /dev/null @@ -1,16 +0,0 @@ -test regalloc -target x86_64 legacy haswell - -function %foo() system_v { -block4: - v3 = iconst.i32 0 - jump block3 - -block3: - v9 = udiv v3, v3 - jump block1 - -block1: - v19 = iadd.i32 v9, v9 - jump block3 -} diff --git a/cranelift/filetests/filetests/regalloc/gpr-deref-safe-335.clif b/cranelift/filetests/filetests/regalloc/gpr-deref-safe-335.clif deleted file mode 100644 index c4534b0f8b..0000000000 --- a/cranelift/filetests/filetests/regalloc/gpr-deref-safe-335.clif +++ /dev/null @@ -1,44 +0,0 @@ -test regalloc -target x86_64 legacy - -function u0:587() fast { -block0: - v97 = iconst.i32 0 - v169 = iconst.i32 0 - v1729 = iconst.i32 0 - jump block100(v97, v97, v97, v97, v97) - -block100(v1758: i32, v1784: i32, v1845: i32, v1856: i32, v1870: i32): - v1762 = iconst.i32 0 - v1769 = iconst.i32 0 - v1774 = iconst.i32 0 - v1864 = iconst.i32 0 - v1897 = iconst.i32 0 - jump block102(v1774, v1784, v1845, v1856, v1870, v1758, v1762, v169, v1729, v97, v169, v169, v169, v169) - -block102(v1785: i32, v1789: i32, v1843: i32, v1854: i32, v1868: i32, v1882: i32, v1890: i32, v1901: i32, v1921: i32, v1933: i32, v2058: i32, v2124: i32, v2236: i32, v2366: i32): - v1929 = iconst.i32 0 - v1943 = iconst.i32 0 - v1949 = iconst.i32 0 - jump block123(v1897, v1769) - -block123(v1950: i32, v1979: i32): - v1955 = iconst.i32 0 - brz v1955, block125 - jump block122(v1929, v1843, v1864, v2058, v1882, v1897, v1943, v1868, v2124, v1901) - -block125: - v1961 = iadd_imm.i32 v1949, 0 - v1952 = iconst.i32 0 - v1962 = iconst.i64 0 - v1963 = load.i32 v1962 - brz v1963, block123(v1952, v1961) - jump block127 - -block127: - v1966 = iconst.i32 0 - jump block122(v1963, v1966, v1966, v1966, v1966, v1966, v1966, v1966, v1966, v1966) - -block122(v1967: i32, v1971: i32, v1972: i32, v1978: i32, v2032: i32, v2041: i32, v2053: i32, v2076: i32, v2085: i32, v2096: i32): - trap user0 -} diff --git a/cranelift/filetests/filetests/regalloc/infinite-interference.clif b/cranelift/filetests/filetests/regalloc/infinite-interference.clif deleted file mode 100644 index b7a7736405..0000000000 --- a/cranelift/filetests/filetests/regalloc/infinite-interference.clif +++ /dev/null @@ -1,37 +0,0 @@ -test regalloc -target riscv32 - -; Here, the coalescer initially builds vreg0 = [v1, v2, v3] -; -; There's interference between v1 and v2 at the brz instruction. Isolating v2 is not going to -; resolve that conflict since v1 will just interfere with the inserted copy too. - -;function %c1(i32) -> i32 { -;block0(v0: i32): -; v1 = iadd_imm v0, 1 -; v2 = iconst.i32 1 -; brz v1, block1(v2) -; jump block2 -; -;block1(v3: i32): -; return v3 -; -;block2: -; jump block1(v1) -;} - -; Same thing with v1 and v2 swapped to reverse the order of definitions. - -function %c2(i32) -> i32 { -block0(v0: i32): - v1 = iadd_imm v0, 1 - v2 = iconst.i32 1 - brz v2, block1(v1) - jump block2 - -block1(v3: i32): - return v3 - -block2: - jump block1(v2) -} diff --git a/cranelift/filetests/filetests/regalloc/iterate.clif b/cranelift/filetests/filetests/regalloc/iterate.clif deleted file mode 100644 index 3272199bca..0000000000 --- a/cranelift/filetests/filetests/regalloc/iterate.clif +++ /dev/null @@ -1,164 +0,0 @@ -test regalloc -target x86_64 legacy haswell - -function u0:9(i64 [%rdi], f32 [%xmm0], f64 [%xmm1], i32 [%rsi], i32 [%rdx], i64 vmctx [%r14]) -> i64 [%rax] baldrdash_system_v { -block0(v0: i64, v1: f32, v2: f64, v3: i32, v4: i32, v5: i64): - v32 = iconst.i32 0 - v6 = bitcast.f32 v32 - v7 = iconst.i64 0 - v33 = iconst.i64 0 - v8 = bitcast.f64 v33 - v34 = iconst.i32 0xbe99_999a - v9 = bitcast.f32 v34 - v10 = iconst.i32 40 - v11 = iconst.i32 -7 - v35 = iconst.i32 0x40b0_0000 - v12 = bitcast.f32 v35 - v13 = iconst.i64 6 - v36 = iconst.i64 0x4020_0000_0000_0000 - v14 = bitcast.f64 v36 - v44 = iconst.i64 0 - v37 = icmp slt v0, v44 - brnz v37, block2 - jump block11 - -block11: - v38 = fcvt_from_sint.f64 v0 - jump block3(v38) - -block2: - v45 = iconst.i32 1 - v39 = ushr.i64 v0, v45 - v40 = band_imm.i64 v0, 1 - v41 = bor v39, v40 - v42 = fcvt_from_sint.f64 v41 - v43 = fadd v42, v42 - jump block3(v43) - -block3(v15: f64): - v16 = fpromote.f64 v9 - v46 = uextend.i64 v10 - v17 = fcvt_from_sint.f64 v46 - v18 = fcvt_from_sint.f64 v11 - v19 = fpromote.f64 v12 - v54 = iconst.i64 0 - v47 = icmp.i64 slt v13, v54 - brnz v47, block4 - jump block12 - -block12: - v48 = fcvt_from_sint.f64 v13 - jump block5(v48) - -block4: - v55 = iconst.i32 1 - v49 = ushr.i64 v13, v55 - v50 = band_imm.i64 v13, 1 - v51 = bor v49, v50 - v52 = fcvt_from_sint.f64 v51 - v53 = fadd v52, v52 - jump block5(v53) - -block5(v20: f64): - v63 = iconst.i64 0 - v56 = icmp.i64 slt v7, v63 - brnz v56, block6 - jump block13 - -block13: - v57 = fcvt_from_sint.f64 v7 - jump block7(v57) - -block6: - v64 = iconst.i32 1 - v58 = ushr.i64 v7, v64 - v59 = band_imm.i64 v7, 1 - v60 = bor v58, v59 - v61 = fcvt_from_sint.f64 v60 - v62 = fadd v61, v61 - jump block7(v62) - -block7(v21: f64): - v22 = fadd v21, v14 - v23 = fadd.f64 v20, v22 - v24 = fadd.f64 v19, v23 - v25 = fadd.f64 v18, v24 - v26 = fadd.f64 v17, v25 - v27 = fadd.f64 v2, v26 - v28 = fadd.f64 v16, v27 - v29 = fadd.f64 v15, v28 - v30 = x86_cvtt2si.i64 v29 - v69 = iconst.i64 0x8000_0000_0000_0000 - v65 = icmp ne v30, v69 - brnz v65, block8 - jump block15 - -block15: - v66 = fcmp uno v29, v29 - brz v66, block9 - jump block16 - -block16: - trap bad_toint - -block9: - v70 = iconst.i64 0xc3e0_0000_0000_0000 - v67 = bitcast.f64 v70 - v68 = fcmp gt v67, v29 - brz v68, block10 - jump block17 - -block17: - trap int_ovf - -block10: - jump block8 - -block8: - jump block1(v30) - -block1(v31: i64): - return v31 -} - -function u0:26(i64 vmctx [%r14]) -> i64 [%rax] baldrdash_system_v { - gv1 = vmctx - gv0 = iadd_imm.i64 gv1, 48 - sig0 = (i32 [%rdi], i64 [%rsi], i64 vmctx [%r14], i64 sigid [%rbx]) -> i64 [%rax] baldrdash_system_v - -block0(v0: i64): - v1 = iconst.i32 32 - v2 = iconst.i64 64 - v3 = iconst.i32 9 - v4 = iconst.i64 1063 - v5 = iadd_imm v0, 48 - v6 = load.i32 v5 - v7 = icmp uge v3, v6 - ; If we're unlucky, there are no ABCD registers available for v7 at this branch. - brz v7, block2 - jump block4 - -block4: - trap heap_oob - -block2: - v8 = load.i64 v5+8 - v9 = uextend.i64 v3 - v16 = iconst.i64 16 - v10 = imul v9, v16 - v11 = iadd v8, v10 - v12 = load.i64 v11 - brnz v12, block3 - jump block5 - -block5: - trap icall_null - -block3: - v13 = load.i64 v11+8 - v14 = call_indirect.i64 sig0, v12(v1, v2, v13, v4) - jump block1(v14) - -block1(v15: i64): - return v15 -} diff --git a/cranelift/filetests/filetests/regalloc/multi-constraints.clif b/cranelift/filetests/filetests/regalloc/multi-constraints.clif deleted file mode 100644 index 0a6b160f09..0000000000 --- a/cranelift/filetests/filetests/regalloc/multi-constraints.clif +++ /dev/null @@ -1,51 +0,0 @@ -test regalloc -target x86_64 legacy haswell - -; Test combinations of constraints. -; -; The x86 ushr instruction requires its second operand to be passed in %rcx and its output is -; tied to the first input operand. -; -; If we pass the same value to both operands, both constraints must be satisfied. - -; Found by the Binaryen fuzzer in PR221. -; -; Conditions triggering the problem: -; -; - The same value used for a tied operand and a fixed operand. -; - The common value is already in %rcx. -; - The tied output value is live outside the block. -; -; Under these conditions, Solver::add_tied_input() would create a variable for the tied input -; without considering the fixed constraint. -function %pr221(i64 [%rdi], i64 [%rsi], i64 [%rdx], i64 [%rcx]) -> i64 [%rax] { -block0(v0: i64, v1: i64, v2: i64, v3: i64): - v4 = ushr v3, v3 - jump block1 - -block1: - return v4 -} - -; Found by the Binaryen fuzzer in PR218. -; -; This is a similar situation involving combined constraints on the ushr instruction: -; -; - The %rcx register is already in use by a globally live value. -; - The ushr x, x result is also a globally live value. -; -; Since the ushr x, x result is forced to be placed in %rcx, we must set the replace_global_defines -; flag so it can be reassigned to a different global register. -function %pr218(i64 [%rdi], i64 [%rsi], i64 [%rdx], i64 [%rcx]) -> i64 [%rax] { -block0(v0: i64, v1: i64, v2: i64, v3: i64): - ; check: regmove v3, %rcx -> - v4 = ushr v0, v0 - ; check: v4 = copy - jump block1 - -block1: - ; v3 is globally live in %rcx. - ; v4 is also globally live. Needs to be assigned something else for the trip across the CFG edge. - v5 = iadd v3, v4 - return v5 -} diff --git a/cranelift/filetests/filetests/regalloc/multiple-returns.clif b/cranelift/filetests/filetests/regalloc/multiple-returns.clif deleted file mode 100644 index 8825a4df72..0000000000 --- a/cranelift/filetests/filetests/regalloc/multiple-returns.clif +++ /dev/null @@ -1,23 +0,0 @@ -test regalloc -target x86_64 legacy - -; Return the same value twice. This needs a copy so that each value can be -; allocated its own register. -function %multiple_returns() -> i64, i64 { -block0: - v2 = iconst.i64 0 - return v2, v2 -} -; check: v2 = iconst.i64 0 -; check: v3 = copy v2 -; check: return v2, v3 - -; Same thing, now with a fallthrough_return. -function %multiple_returns() -> i64, i64 { -block0: - v2 = iconst.i64 0 - fallthrough_return v2, v2 -} -; check: v2 = iconst.i64 0 -; check: v3 = copy v2 -; check: fallthrough_return v2, v3 diff --git a/cranelift/filetests/filetests/regalloc/output-interference.clif b/cranelift/filetests/filetests/regalloc/output-interference.clif deleted file mode 100644 index 1ba797f6c8..0000000000 --- a/cranelift/filetests/filetests/regalloc/output-interference.clif +++ /dev/null @@ -1,14 +0,0 @@ -test regalloc -target x86_64 legacy haswell - -function %test(i64) -> i64 system_v { -block0(v0: i64): - v2 = iconst.i64 12 - ; This division clobbers two of its fixed input registers on x86. - ; These are FixedTied constraints that the spiller needs to resolve. - v5 = udiv v0, v2 - v6 = iconst.i64 13 - v9 = udiv v0, v6 - v10 = iadd v5, v9 - return v10 -} diff --git a/cranelift/filetests/filetests/regalloc/reload-208.clif b/cranelift/filetests/filetests/regalloc/reload-208.clif deleted file mode 100644 index 5e6a7e9864..0000000000 --- a/cranelift/filetests/filetests/regalloc/reload-208.clif +++ /dev/null @@ -1,112 +0,0 @@ -test regalloc -target x86_64 legacy haswell - -; regex: V=v\d+ -; regex: BB=block\d+ - -; Filed as https://github.com/bytecodealliance/cranelift/issues/208 -; -; The verifier complains about a branch argument that is not in the same virtual register as the -; corresponding block argument. -; -; The problem was the reload pass rewriting block arguments on "brnz v9, block3(v9)" - -function %pr208(i64 vmctx [%rdi]) system_v { - gv1 = vmctx - gv0 = iadd_imm.i64 gv1, -8 - heap0 = static gv0, min 0, bound 0x5000, offset_guard 0x0040_0000 - sig0 = (i64 vmctx [%rdi]) -> i32 [%rax] system_v - sig1 = (i64 vmctx [%rdi], i32 [%rsi]) system_v - fn0 = u0:1 sig0 - fn1 = u0:3 sig1 - -block0(v0: i64): - v1 = iconst.i32 0 - v2 = call fn0(v0) - v20 = iconst.i32 0x4ffe - v16 = icmp uge v2, v20 - brz v16, block5 - jump block9 - -block9: - trap heap_oob - -block5: - v17 = uextend.i64 v2 - v18 = iadd_imm.i64 v0, -8 - v19 = load.i64 v18 - v3 = iadd v19, v17 - v4 = load.i32 v3 - v21 = iconst.i32 0 - v5 = icmp eq v4, v21 - v6 = bint.i32 v5 - brnz v6, block2 - jump block3(v4) - - ; check: block5: - ; check: jump block3(v4) - ; check: $(splitEdge=$BB): - ; nextln: jump block3(v9) - -block3(v7: i32): - call fn1(v0, v7) - v26 = iconst.i32 0x4ffe - v22 = icmp uge v7, v26 - brz v22, block6 - jump block10 - -block10: - trap heap_oob - -block6: - v23 = uextend.i64 v7 - v24 = iadd_imm.i64 v0, -8 - v25 = load.i64 v24 - v8 = iadd v25, v23 - v9 = load.i32 v8+56 - ; check: v9 = spill - ; check: brnz $V, $splitEdge - brnz v9, block3(v9) - jump block4 - -block4: - jump block2 - -block2: - v10 = iconst.i32 0 - v31 = iconst.i32 0x4ffe - v27 = icmp uge v10, v31 - brz v27, block7 - jump block11 - -block11: - trap heap_oob - -block7: - v28 = uextend.i64 v10 - v29 = iadd_imm.i64 v0, -8 - v30 = load.i64 v29 - v11 = iadd v30, v28 - v12 = load.i32 v11+12 - call fn1(v0, v12) - v13 = iconst.i32 0 - v36 = iconst.i32 0x4ffe - v32 = icmp uge v13, v36 - brz v32, block8 - jump block12 - -block12: - trap heap_oob - -block8: - v33 = uextend.i64 v13 - v34 = iadd_imm.i64 v0, -8 - v35 = load.i64 v34 - v14 = iadd v35, v33 - v15 = load.i32 v14+12 - call fn1(v0, v15) - jump block1 - -block1: - return -} diff --git a/cranelift/filetests/filetests/regalloc/reload-779.clif b/cranelift/filetests/filetests/regalloc/reload-779.clif deleted file mode 100644 index 5dafe32b5c..0000000000 --- a/cranelift/filetests/filetests/regalloc/reload-779.clif +++ /dev/null @@ -1,23 +0,0 @@ -test compile -target x86_64 legacy - -; Filed as https://github.com/bytecodealliance/cranelift/issues/779 -; -; The copy_nop optimisation to reload (see Issue 773) was creating -; copy_nop instructions for types for which there were no encoding. - -function u0:0(i64, i64, i64) system_v { - sig0 = () system_v - sig1 = (i16) system_v - fn1 = u0:94 sig0 - fn2 = u0:95 sig1 - -block0(v0: i64, v1: i64, v2: i64): - v3 = iconst.i16 0 - jump block1(v3) - -block1(v4: i16): - call fn1() - call fn2(v4) - jump block1(v4) -} diff --git a/cranelift/filetests/filetests/regalloc/reload.clif b/cranelift/filetests/filetests/regalloc/reload.clif deleted file mode 100644 index 1ae755a988..0000000000 --- a/cranelift/filetests/filetests/regalloc/reload.clif +++ /dev/null @@ -1,46 +0,0 @@ -test regalloc -target riscv32 legacy enable_e - -; regex: V=v\d+ - -; Check that we can handle a function return value that got spilled. -function %spill_return() -> i32 { - fn0 = %foo() -> i32 system_v - -block0: - v0 = call fn0() - ; check: $(reg=$V) = call fn0 - ; check: v0 = spill $reg - v2 = call fn0() - ; check: v2 = call fn0 - return v0 - ; check: $(reload=$V) = fill v0 - ; check: return $reload -} - -; Check that copies where the arg has been spilled are replaced with fills. -; -; RV32E has 6 registers for function arguments so the 7th, v6, will be placed -; on the stack. -function %spilled_copy_arg(i32, i32, i32, i32, i32, i32, i32) -> i32 { - -block0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32): - ; not: copy - ; check: v10 = fill v6 - v10 = copy v6 - return v10 -} - -; Check that copies where the result has been spilled are replaced with spills. -; -; v1 is live across a call so it will be spilled. -function %spilled_copy_result(i32) -> i32 { - fn0 = %foo(i32) - -block0(v0: i32): - ; not: copy - ; check: v1 = spill v0 - v1 = copy v0 - call fn0(v1) - return v1 -} diff --git a/cranelift/filetests/filetests/regalloc/schedule-moves.clif b/cranelift/filetests/filetests/regalloc/schedule-moves.clif deleted file mode 100644 index 701a91a15a..0000000000 --- a/cranelift/filetests/filetests/regalloc/schedule-moves.clif +++ /dev/null @@ -1,39 +0,0 @@ -test regalloc -target i686 legacy haswell - -function %pr165() system_v { -block0: - v0 = iconst.i32 0x0102_0304 - v1 = iconst.i32 0x1102_0304 - v2 = iconst.i32 0x2102_0304 - v20 = ishl v1, v0 - v21 = ishl v2, v0 - v22 = sshr v1, v0 - v23 = sshr v2, v0 - v24 = ushr v1, v0 - v25 = ushr v2, v0 - istore8 v0, v1+0x2710 - istore8 v1, v0+0x2710 - return -} - -; Same as above, but use so many registers that spilling is required. -; Note: This is also a candidate for using xchg instructions. -function %emergency_spill() system_v { -block0: - v0 = iconst.i32 0x0102_0304 - v1 = iconst.i32 0x1102_0304 - v2 = iconst.i32 0x2102_0304 - v3 = iconst.i32 0x3102_0304 - v4 = iconst.i32 0x4102_0304 - v20 = ishl v1, v0 - v21 = ishl v2, v3 - v22 = sshr v1, v0 - v23 = sshr v2, v0 - v24 = ushr v1, v0 - v25 = ushr v2, v0 - istore8 v0, v1+0x2710 - istore8 v1, v0+0x2710 - istore8 v3, v4+0x2710 - return -} diff --git a/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var-2.clif b/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var-2.clif deleted file mode 100644 index b280db086f..0000000000 --- a/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var-2.clif +++ /dev/null @@ -1,100 +0,0 @@ -test compile -set opt_level=speed -set enable_pinned_reg=true -target x86_64 legacy haswell - -function u0:0(i32, i32, i32, i64 vmctx) -> i64 uext system_v { -block0(v0: i32, v1: i32, v2: i32, v3: i64): - v236 = iconst.i32 0x4de9_bd37 - v424 = iconst.i32 0 - jump block37(v424) - -block37(v65: i32): - v433 = iconst.i32 0 - jump block40(v433) - -block40(v70: i32): - v75 = iconst.i32 0 - v259 = iconst.i32 0 - v78 -> v259 - v449 = iconst.i32 0 - v450, v451 = x86_sdivmodx v75, v449, v259 - v79 -> v450 - v269 = iconst.i32 0 - v270 = ushr_imm v269, 31 - v271 = iadd v269, v270 - v98 -> v271 - v100 = iconst.i32 -31 - v272 = iconst.i32 0x4de9_bd37 - v490, v273 = x86_smulx v100, v272 - v493 = iconst.i32 0 - jump block61(v493) - -block61(v103: i32): - v104 = iconst.i32 -23 - v105 = iconst.i32 -23 - v106 = popcnt v105 - v500 = sshr_imm v104, 31 - v501 = iconst.i32 0 - jump block64(v501) - -block64(v107: i32): - v108 = iconst.i32 0 - v109 = iconst.i32 0 - v278 = iconst.i32 0 - v507, v279 = x86_smulx v109, v278 - v280 = isub v279, v109 - v281 = sshr_imm v280, 11 - v282 = iconst.i32 0 - v283 = iadd v281, v282 - v111 -> v283 - v112 = rotr v108, v283 - jump block65 - -block65: - v509 = iconst.i32 0 - v510, v511 = x86_sdivmodx v107, v509, v112 - v113 -> v510 - v114 = iconst.i32 0 - v517 = iconst.i32 0 - v518, v519 = x86_sdivmodx v103, v517, v114 - v115 -> v518 - v534 = iconst.i32 0 - v122 -> v534 - v541 = iconst.i32 0 - v542, v543 = x86_sdivmodx v271, v541, v122 - v123 -> v542 - v289 = iconst.i32 0 - v125 -> v289 - v550 = iconst.i32 0 - v551, v552 = x86_sdivmodx v79, v550, v289 - v126 -> v551 - v130 = iconst.i32 0 - v558 = iconst.i32 0 - v559, v560 = x86_sdivmodx v70, v558, v130 - v131 -> v559 - v305 = iconst.i32 0 - v140 -> v305 - v577 = iconst.i32 0 - v578, v579 = x86_sdivmodx v65, v577, v305 - v141 -> v578 - v166 = iconst.i32 0 - v167 = iconst.i32 -31 - v318 = iconst.i32 0x4de9_bd37 - v650, v319 = x86_smulx v167, v318 - v320 = isub v319, v167 - v321 = sshr_imm v320, 4 - v322 = iconst.i32 0 - v323 = iadd v321, v322 - v169 -> v323 - v652 = iconst.i32 0 - v653, v654 = x86_sdivmodx v166, v652, v323 - v170 -> v653 - v171 = iconst.i32 -23 - v172 = iconst.i32 -23 - v173 = popcnt v172 - v174 = popcnt v173 - v660 = sshr_imm v171, 31 - v661, v662 = x86_sdivmodx v171, v660, v174 - trap user0 -} diff --git a/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var-3.clif b/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var-3.clif deleted file mode 100644 index 1c2d1b2bc0..0000000000 --- a/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var-3.clif +++ /dev/null @@ -1,137 +0,0 @@ -test compile -set opt_level=speed -set enable_pinned_reg=true -target x86_64 legacy haswell - -function u0:0(i32, i32, i32, i64 vmctx) -> i64 uext system_v { -block0(v0: i32, v1: i32, v2: i32, v3: i64): - v5 = iconst.i32 -8 - v114 = iconst.i32 0 - v16 = iconst.i32 -8 - v17 = popcnt v16 - v192 = ifcmp_imm v17, -1 - trapif ne v192, user0 - jump block12 - -block12: - v122 = iconst.i32 0 - v123 = ushr_imm v122, 31 - v124 = iadd v122, v123 - v20 -> v124 - v25 = iconst.i32 -19 - v204 = iconst.i32 0 - v31 -> v204 - v210 = ifcmp_imm v31, -1 - trapif ne v210, user0 - jump block18 - -block18: - v215 = iconst.i32 0 - jump block19(v215) - -block19(v32: i32): - v35 = iconst.i32 0 - v218 = ifcmp_imm v35, -1 - trapif ne v218, user0 - jump block21 - -block21: - v223 = iconst.i32 0 - jump block22(v223) - -block22(v36: i32): - v136 = iconst.i32 0 - v40 -> v136 - v227 = ifcmp_imm v136, -1 - trapif ne v227, user0 - jump block24 - -block24: - v232 = iconst.i32 0 - jump block25(v232) - -block25(v41: i32): - v142 = iconst.i32 0 - v45 -> v142 - v236 = ifcmp_imm v142, -1 - trapif ne v236, user0 - jump block27 - -block27: - v241 = iconst.i32 0 - jump block28(v241) - -block28(v46: i32): - v49 = iconst.i32 0 - v244 = ifcmp_imm v49, -1 - trapif ne v244, user0 - jump block30 - -block30: - v254 = iconst.i32 0 - v53 -> v254 - v54 = iconst.i32 -23 - v55 = popcnt v54 - v143 = iconst.i32 0x4de9_bd37 - v260, v144 = x86_smulx v55, v143 - v145 = iconst.i32 0 - v146 = sshr_imm v145, 4 - v147 = iconst.i32 0 - v148 = iadd v146, v147 - v57 -> v148 - v58 = ishl v53, v148 - jump block35 - -block35: - v262 = iconst.i32 0 - v263, v264 = x86_sdivmodx v46, v262, v58 - v59 -> v263 - v270 = iconst.i32 0 - v271, v272 = x86_sdivmodx v41, v270, v59 - v60 -> v271 - v61 = f32const 0.0 - v280 = iconst.i32 0 - v281 = ffcmp v61, v61 - trapff ord v281, user0 - jump block41(v280) - -block41(v62: i32): - v157 = iconst.i32 0 - v158 = sshr_imm v157, 4 - v159 = iconst.i32 0 - v160 = iadd v158, v159 - v75 -> v160 - v308 = ifcmp_imm v160, -1 - trapif ne v308, user0 - jump block52 - -block52: - v87 = iconst.i32 -23 - v88 = iconst.i32 -23 - v89 = popcnt v88 - v161 = iconst.i32 0x4de9_bd37 - v324, v162 = x86_smulx v89, v161 - v163 = isub v162, v89 - v164 = sshr_imm v163, 4 - v165 = iconst.i32 0 - v166 = iadd v164, v165 - v91 -> v166 - v326 = iconst.i32 0 - v327, v328 = x86_sdivmodx v87, v326, v166 - v92 -> v327 - v351 = iconst.i32 0 - v99 -> v351 - v358 = iconst.i32 0 - v359, v360 = x86_sdivmodx v36, v358, v99 - v100 -> v359 - v102 = iconst.i32 0 - v103 = rotr.i32 v32, v102 - v366 = iconst.i32 0 - v367, v368 = x86_sdivmodx v25, v366, v103 - v104 -> v367 - v383 = iconst.i32 0 - v107 -> v383 - v390 = iconst.i32 0 - v391, v392 = x86_sdivmodx v124, v390, v107 - trap user0 -} diff --git a/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var.clif b/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var.clif deleted file mode 100644 index 1aec10354f..0000000000 --- a/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var.clif +++ /dev/null @@ -1,173 +0,0 @@ -test compile -set opt_level=speed -set enable_pinned_reg=true -target x86_64 legacy haswell - -;; Test for the issue #1123; https://github.com/bytecodealliance/cranelift/issues/1123 - -function u0:0(i32, i32, i32, i64 vmctx) -> i64 uext system_v { -block0(v0: i32, v1: i32, v2: i32, v3: i64): - v351 = iconst.i32 0x4de9_bd37 - v31 = iconst.i32 -23 - v35 = iconst.i32 0 - v36 = iconst.i32 -31 - v357 = iconst.i32 0x4de9_bd37 - v530, v358 = x86_smulx v36, v357 - v359 = isub v358, v36 - v360 = sshr_imm v359, 4 - v361 = iconst.i32 0 - v362 = iadd v360, v361 - v38 -> v362 - v532 = sshr_imm v35, 31 - v533, v534 = x86_sdivmodx v35, v532, v362 - v39 -> v533 - v53 = iconst.i32 0 - v547 = ifcmp_imm v53, -1 - trapif ne v547, user0 - jump block30 - -block30: - v75 = iconst.i32 0 - v581 = ifcmp_imm v75, -1 - trapif ne v581, user0 - jump block42 - -block42: - v136 = iconst.i32 0 - v691 = ifcmp_imm v136, -1 - trapif ne v691, user0 - jump block81 - -block81: - v158 = iconst.i32 0 - v725 = ifcmp_imm v158, -1 - trapif ne v725, user0 - jump block93 - -block93: - v760 = iconst.i32 0 - jump block106(v760) - -block106(v175: i32): - v179 = iconst.i32 0 - v180 = icmp_imm eq v179, 0 - v183 = iconst.i32 0 - v766 = ifcmp_imm v183, -1 - trapif ne v766, user0 - jump block108 - -block108: - v771 = iconst.i32 0 - jump block109(v771) - -block109(v184: i32): - v785 = iconst.i32 0 - v193 -> v785 - v791 = ifcmp_imm v193, -1 - trapif ne v791, user0 - jump block117 - -block117: - v796 = iconst.i32 0 - jump block118(v796) - -block118(v194: i32): - v203 = iconst.i32 -63 - v809 = iconst.i32 0 - v207 -> v809 - v815 = ifcmp_imm v207, -1 - trapif ne v815, user0 - jump block126 - -block126: - v209 = iconst.i32 0 - v823 = ifcmp_imm v209, -1 - trapif ne v823, user0 - jump block129 - -block129: - v213 = iconst.i32 -23 - v214 = iconst.i32 -19 - v215 = icmp_imm eq v214, 0 - v216 = bint.i32 v215 - v217 = popcnt v216 - v435 = iconst.i32 0x7df7_df7d - v831, v436 = x86_smulx v217, v435 - v437 = isub v436, v217 - v438 = sshr_imm v437, 5 - v439 = ushr_imm v438, 31 - v440 = iadd v438, v439 - v219 -> v440 - v220 = rotr v213, v440 - v229 = iconst.i32 0 - v841 = iconst.i32 0 - v842, v843 = x86_sdivmodx v194, v841, v229 - v230 -> v842 - v849 = iconst.i32 0 - v850, v851 = x86_sdivmodx v184, v849, v230 - v231 -> v850 - v232 = iconst.i32 0 - v857 = iconst.i32 0 - v858, v859 = x86_sdivmodx v175, v857, v232 - v233 -> v858 - v915 = iconst.i32 0 - jump block163(v915) - -block163(v253: i32): - v255 = iconst.i32 0 - v256 = iconst.i32 -23 - v257 = iconst.i32 -19 - v258 = icmp_imm eq v257, 0 - v259 = bint.i32 v258 - v260 = popcnt v259 - v447 = iconst.i32 0x7df7_df7d - v921, v448 = x86_smulx v260, v447 - v449 = isub v448, v260 - v450 = sshr_imm v449, 5 - v451 = ushr_imm v450, 31 - v452 = iadd v450, v451 - v262 -> v452 - v263 = rotr v256, v452 - v264 = popcnt v263 - v265 = popcnt v264 - v266 = popcnt v265 - v267 = rotr v255, v266 - v268 = popcnt v267 - v923 = iconst.i32 0 - v924, v925 = x86_sdivmodx v253, v923, v268 - v269 -> v924 - v276 = iconst.i32 0 - v277 = iconst.i32 -63 - v278 = popcnt v277 - v947 = iconst.i32 0 - v948, v949 = x86_sdivmodx v276, v947, v278 - v279 -> v948 - v309 = iconst.i32 0 - v310 = iconst.i32 0 - v311 = iconst.i32 0 - v312 = icmp_imm eq v311, 0 - v313 = bint.i32 v312 - v314 = rotr v310, v313 - v315 = iconst.i32 -31 - v464 = iconst.i32 0 - v1020, v465 = x86_smulx v315, v464 - v466 = isub v465, v315 - v467 = sshr_imm v466, 4 - v468 = iconst.i32 0 - v469 = iadd v467, v468 - v317 -> v469 - v1022 = iconst.i32 0 - v1023, v1024 = x86_sdivmodx v314, v1022, v469 - v318 -> v1023 - v320 = iconst.i32 0 - v321 = iconst.i32 -19 - v322 = popcnt v321 - v1030 = iconst.i32 0 - v1031, v1032 = x86_sdivmodx v320, v1030, v322 - v323 -> v1031 - v1047 = iconst.i32 0 - v325 -> v1047 - v1054 = sshr_imm v309, 31 - v1055, v1056 = x86_sdivmodx v309, v1054, v325 - trap user0 -} diff --git a/cranelift/filetests/filetests/regalloc/spill-noregs.clif b/cranelift/filetests/filetests/regalloc/spill-noregs.clif deleted file mode 100644 index e3540f6a59..0000000000 --- a/cranelift/filetests/filetests/regalloc/spill-noregs.clif +++ /dev/null @@ -1,175 +0,0 @@ -test regalloc -target x86_64 legacy - -; Test case found by the Binaryen fuzzer. -; -; The spiller panics with a -; 'Ran out of GPR registers when inserting copy before v68 = icmp.i32 eq v66, v67', -; cranelift-codegen/src/regalloc/spilling.rs:425:28 message. -; -; The process_reg_uses() function is trying to insert a copy before the icmp instruction in block4 -; and runs out of registers to spill. Note that block7 has a lot of dead parameter values. -; -; The spiller was not releasing register pressure for dead block parameters. - -function %pr223(i32 [%rdi], i64 vmctx [%rsi]) -> i64 [%rax] system_v { -block0(v0: i32, v1: i64): - v2 = iconst.i32 0 - v3 = iconst.i64 0 - v4 = iconst.i32 0xffff_ffff_bb3f_4a2c - brz v4, block5 - jump block1 - -block1: - v5 = iconst.i32 0 - v6 = copy.i64 v3 - v7 = copy.i64 v3 - v8 = copy.i64 v3 - v9 = copy.i64 v3 - v10 = copy.i64 v3 - v11 = copy.i64 v3 - v12 = copy.i64 v3 - v13 = copy.i64 v3 - v14 = copy.i64 v3 - v15 = copy.i64 v3 - v16 = copy.i64 v3 - brnz v5, block4(v2, v3, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16) - jump block2 - -block2: - v17 = iconst.i32 0 - v18 = copy.i64 v3 - v19 = copy.i64 v3 - v20 = copy.i64 v3 - v21 = copy.i64 v3 - v22 = copy.i64 v3 - v23 = copy.i64 v3 - v24 = copy.i64 v3 - v25 = copy.i64 v3 - v26 = copy.i64 v3 - v27 = copy.i64 v3 - v28 = copy.i64 v3 - brnz v17, block4(v2, v3, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28) - jump block3 - -block3: - jump block1 - -block4(v29: i32, v30: i64, v31: i64, v32: i64, v33: i64, v34: i64, v35: i64, v36: i64, v37: i64, v38: i64, v39: i64, v40: i64, v41: i64): - jump block7(v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41) - -block5: - jump block6 - -block6: - v42 = copy.i64 v3 - v43 = copy.i64 v3 - v44 = copy.i64 v3 - v45 = copy.i64 v3 - v46 = copy.i64 v3 - v47 = copy.i64 v3 - v48 = copy.i64 v3 - v49 = copy.i64 v3 - v50 = copy.i64 v3 - v51 = copy.i64 v3 - v52 = copy.i64 v3 - jump block7(v2, v3, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52) - -block7(v53: i32, v54: i64, v55: i64, v56: i64, v57: i64, v58: i64, v59: i64, v60: i64, v61: i64, v62: i64, v63: i64, v64: i64, v65: i64): - v66 = iconst.i32 0 - v67 = iconst.i32 0 - v68 = icmp eq v66, v67 - v69 = bint.i32 v68 - jump block8 - -block8: - jump block9 - -block9: - v70 = iconst.i32 0xffff_ffff_ffff_912f - brz v70, block10 - jump block35 - -block10: - v71 = iconst.i32 0 - brz v71, block11 - jump block27 - -block11: - jump block12 - -block12: - jump block13 - -block13: - jump block14 - -block14: - jump block15 - -block15: - jump block16 - -block16: - jump block17 - -block17: - jump block18 - -block18: - jump block19 - -block19: - jump block20 - -block20: - jump block21 - -block21: - jump block22 - -block22: - jump block23 - -block23: - jump block24 - -block24: - jump block25 - -block25: - jump block26 - -block26: - jump block27 - -block27: - jump block28 - -block28: - jump block29 - -block29: - jump block30 - -block30: - jump block31 - -block31: - jump block32 - -block32: - jump block33 - -block33: - jump block34 - -block34: - jump block35 - -block35: - jump block36 - -block36: - trap user0 -} diff --git a/cranelift/filetests/filetests/regalloc/spill.clif b/cranelift/filetests/filetests/regalloc/spill.clif deleted file mode 100644 index 2a3f2ad959..0000000000 --- a/cranelift/filetests/filetests/regalloc/spill.clif +++ /dev/null @@ -1,223 +0,0 @@ -test regalloc - -; Test the spiler on an ISA with few registers. -; RV32E has 16 registers, where: -; - %x0 is hardwired to zero. -; - %x1 is the return address. -; - %x2 is the stack pointer. -; - %x3 is the global pointer. -; - %x4 is the thread pointer. -; - %x10-%x15 are function arguments. -; -; regex: V=v\d+ -; regex: WS=\s+ - -target riscv32 legacy enable_e - -; In straight-line code, the first value defined is spilled. -; That is in order: -; 1. The argument v1. -; 2. The link register. -; 3. The first computed value, v2 -function %pyramid(i32) -> i32 { -; check: ss0 = spill_slot 4 -; check: ss1 = spill_slot 4 -; check: ss2 = spill_slot 4 -; not: spill_slot -block0(v1: i32): -; check: block0($(rv1=$V): i32 [%x10], $(rlink=$V): i32 [%x1]) - ; check: ,ss0]$WS v1 = spill $rv1 - ; nextln: ,ss1]$WS $(link=$V) = spill $rlink - ; not: spill - v2 = iadd_imm v1, 12 - ; check: $(r1v2=$V) = iadd_imm - ; nextln: ,ss2]$WS v2 = spill $r1v2 - ; not: spill - v3 = iadd_imm v2, 12 - v4 = iadd_imm v3, 12 - v5 = iadd_imm v4, 12 - v6 = iadd_imm v5, 12 - v7 = iadd_imm v6, 12 - v8 = iadd_imm v7, 12 - v9 = iadd_imm v8, 12 - v10 = iadd_imm v9, 12 - v11 = iadd_imm v10, 12 - v12 = iadd_imm v11, 12 - v13 = iadd_imm v12, 12 - v14 = iadd_imm v13, 12 - v33 = iadd v13, v14 - ; check: iadd v13 - v32 = iadd v33, v12 - v31 = iadd v32, v11 - v30 = iadd v31, v10 - v29 = iadd v30, v9 - v28 = iadd v29, v8 - v27 = iadd v28, v7 - v26 = iadd v27, v6 - v25 = iadd v26, v5 - v24 = iadd v25, v4 - v23 = iadd v24, v3 - v22 = iadd v23, v2 - ; check: $(r2v2=$V) = fill v2 - ; check: v22 = iadd v23, $r2v2 - v21 = iadd v22, v1 - ; check: $(r2v1=$V) = fill v1 - ; check: v21 = iadd v22, $r2v1 - ; check: $(rlink2=$V) = fill $link - return v21 - ; check: return v21, $rlink2 -} - -; All values live across a call must be spilled -function %across_call(i32) { - fn0 = %foo(i32) -block0(v1: i32): - ; check: v1 = spill - call fn0(v1) - ; check: call fn0 - call fn0(v1) - ; check: fill v1 - ; check: call fn0 - return -} - -; The same value used for two function arguments. -function %doubleuse(i32) { - fn0 = %xx(i32, i32) -block0(v0: i32): - ; check: $(c=$V) = copy v0 - call fn0(v0, v0) - ; check: call fn0(v0, $c) - return -} - -; The same value used as indirect callee and argument. -function %doubleuse_icall1(i32) { - sig0 = (i32) system_v -block0(v0: i32): - ; not:copy - call_indirect sig0, v0(v0) - return -} - -; The same value used as indirect callee and two arguments. -function %doubleuse_icall2(i32) { - sig0 = (i32, i32) system_v -block0(v0: i32): - ; check: $(c=$V) = copy v0 - call_indirect sig0, v0(v0, v0) - ; check: call_indirect sig0, v0(v0, $c) - return -} - -; Two arguments on the stack. -function %stackargs(i32, i32, i32, i32, i32, i32, i32, i32) -> i32 { -; check: ss0 = incoming_arg 4 -; check: ss1 = incoming_arg 4, offset 4 -; not: incoming_arg -block0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32, v7: i32): - ; unordered: fill v6 - ; unordered: fill v7 - v10 = iadd v6, v7 - return v10 -} - -; More block arguments than registers. -function %blockargs(i32) -> i32 { -block0(v1: i32): - ; check: v1 = spill - v2 = iconst.i32 1 - jump block1(v2, v2, v2, v2, v2, v2, v2, v2, v2, v2, v2, v2) - -block1(v10: i32, v11: i32, v12: i32, v13: i32, v14: i32, v15: i32, v16: i32, v17: i32, v18: i32, v19: i32, v20: i32, v21: i32): - v22 = iadd v10, v11 - v23 = iadd v22, v12 - v24 = iadd v23, v13 - v25 = iadd v24, v14 - v26 = iadd v25, v15 - v27 = iadd v26, v16 - v28 = iadd v27, v17 - v29 = iadd v28, v18 - v30 = iadd v29, v19 - v31 = iadd v30, v20 - v32 = iadd v31, v21 - v33 = iadd v32, v1 - return v33 -} - -; Spilling a block argument to make room for a branch operand. -function %brargs(i32) -> i32 { -block0(v1: i32): - ; check: v1 = spill - v2 = iconst.i32 1 - brnz v1, block1(v2, v2, v2, v2, v2, v2, v2, v2, v2, v2, v2, v2) - jump block2 - -block2: - return v1 - -block1(v10: i32, v11: i32, v12: i32, v13: i32, v14: i32, v15: i32, v16: i32, v17: i32, v18: i32, v19: i32, v20: i32, v21: i32): - v22 = iadd v10, v11 - v23 = iadd v22, v12 - v24 = iadd v23, v13 - v25 = iadd v24, v14 - v26 = iadd v25, v15 - v27 = iadd v26, v16 - v28 = iadd v27, v17 - v29 = iadd v28, v18 - v30 = iadd v29, v19 - v31 = iadd v30, v20 - v32 = iadd v31, v21 - v33 = iadd v32, v1 - return v33 -} - -; In straight-line code, the first value defined is spilled. -; That is in order: -; 1. The argument v1. -; 2. The link register. -; 3. The first computed value, v2 -function %use_spilled_value(i32) -> i32 { -; check: ss0 = spill_slot 4 -; check: ss1 = spill_slot 4 -; check: ss2 = spill_slot 4 -block0(v1: i32): -; check: block0($(rv1=$V): i32 [%x10], $(rlink=$V): i32 [%x1]) - ; check: ,ss0]$WS v1 = spill $rv1 - ; nextln: ,ss1]$WS $(link=$V) = spill $rlink - ; not: spill - v2 = iadd_imm v1, 12 - ; check: $(r1v2=$V) = iadd_imm - ; nextln: ,ss2]$WS v2 = spill $r1v2 - v3 = iadd_imm v2, 12 - v4 = iadd_imm v3, 12 - v5 = iadd_imm v4, 12 - v6 = iadd_imm v5, 12 - v7 = iadd_imm v6, 12 - v8 = iadd_imm v7, 12 - v9 = iadd_imm v8, 12 - v10 = iadd_imm v9, 12 - v11 = iadd_imm v10, 12 - v12 = iadd_imm v11, 12 - v13 = iadd_imm v12, 12 - v14 = iadd_imm v13, 12 - - ; Here we have maximum register pressure, and v2 has been spilled. - ; What happens if we use it? - v33 = iadd v2, v14 - v32 = iadd v33, v12 - v31 = iadd v32, v11 - v30 = iadd v31, v10 - v29 = iadd v30, v9 - v28 = iadd v29, v8 - v27 = iadd v28, v7 - v26 = iadd v27, v6 - v25 = iadd v26, v5 - v24 = iadd v25, v4 - v23 = iadd v24, v3 - v22 = iadd v23, v2 - v21 = iadd v22, v1 - v20 = iadd v21, v13 - v19 = iadd v20, v2 - return v21 -} diff --git a/cranelift/filetests/filetests/regalloc/unreachable_code.clif b/cranelift/filetests/filetests/regalloc/unreachable_code.clif deleted file mode 100644 index 219a299880..0000000000 --- a/cranelift/filetests/filetests/regalloc/unreachable_code.clif +++ /dev/null @@ -1,47 +0,0 @@ -; Use "test compile" here otherwise the dead blocks won't be eliminated. -test compile - -set enable_probestack=0 -target x86_64 legacy haswell - -; This function contains unreachable blocks which trip up the register -; allocator if they don't get cleared out. -function %unreachable_blocks(i64 vmctx) -> i32 baldrdash_system_v { -block0(v0: i64): - v1 = iconst.i32 0 - v2 = iconst.i32 0 - jump block2 - -block2: - jump block4 - -block4: - jump block2 - -; Everything below this point is unreachable. - -block3(v3: i32): - v5 = iadd.i32 v2, v3 - jump block6 - -block6: - jump block6 - -block7(v6: i32): - v7 = iadd.i32 v5, v6 - jump block8 - -block8: - jump block10 - -block10: - jump block8 - -block9(v8: i32): - v10 = iadd.i32 v7, v8 - jump block1(v10) - -block1(v11: i32): - return v11 -} - diff --git a/cranelift/filetests/filetests/regalloc/x86-regres.clif b/cranelift/filetests/filetests/regalloc/x86-regres.clif deleted file mode 100644 index 935b33c5b7..0000000000 --- a/cranelift/filetests/filetests/regalloc/x86-regres.clif +++ /dev/null @@ -1,49 +0,0 @@ -test regalloc -target i686 legacy - -; regex: V=v\d+ -; regex: BB=block\d+ - -; The value v9 appears both as the branch control and one of the block arguments -; in the brnz instruction in block2. It also happens that v7 and v9 are assigned -; to the same register, so v9 doesn't need to be moved before the brnz. -; -; This ended up confusong the constraint solver which had not made a record of -; the fixed register assignment for v9 since it was already in the correct -; register. -function %pr147(i32) -> i32 system_v { -block0(v0: i32): - v1 = iconst.i32 0 - v2 = iconst.i32 1 - v3 = iconst.i32 0 - jump block2(v3, v2, v0) - - ; check: $(splitEdge=$BB): - ; check: jump block2($V, $V, v9) - -block2(v4: i32, v5: i32, v7: i32): - ; check: block2 - v6 = iadd v4, v5 - v8 = iconst.i32 -1 - ; v7 is killed here and v9 gets the same register. - v9 = iadd v7, v8 - ; check: v9 = iadd v7, v8 - ; Here v9 the brnz control appears to interfere with v9 the block argument, - ; so divert_fixed_input_conflicts() calls add_var(v9), which is ok. The - ; add_var sanity checks got confused when no fixed assignment could be - ; found for v9. - ; - ; We should be able to handle this situation without making copies of v9. - brnz v9, block2(v5, v6, v9) - ; check: brnz v9, $splitEdge - jump block3 - -block3: - return v5 -} - -function %select_i64(i64, i64, i32) -> i64 { -block0(v0: i64, v1: i64, v2: i32): - v3 = select v2, v0, v1 - return v3 -} diff --git a/cranelift/filetests/filetests/regress/allow-relaxation-shrink.clif b/cranelift/filetests/filetests/regress/allow-relaxation-shrink.clif deleted file mode 100644 index fd95cc2f4c..0000000000 --- a/cranelift/filetests/filetests/regress/allow-relaxation-shrink.clif +++ /dev/null @@ -1,57 +0,0 @@ -test compile -target aarch64 -target x86_64 legacy - -; This checks that code shrink is allowed while relaxing code, when code shrink -; has not run. - -function u0:0(i64, i64) -> i64 system_v { - ss1 = explicit_slot 8 - sig0 = (i64) -> i64 system_v - fn0 = u0:8 sig0 - -block0(v0: i64, v1: i64): - v3 = stack_addr.i64 ss1 - v5 = call fn0(v1) - v6 = iconst.i64 0 - v8 = iconst.i64 0 - jump block3(v6, v1, v8) - -block3(v39: i64, v40: i64, v42: i64): - v9 = load.i64 v3 - v11 = icmp_imm ugt v9, 1 - v12 = bint.i8 v11 - v13 = uextend.i32 v12 - v14 = icmp_imm eq v13, 0 - brnz v14, block4 - jump block5 - -block4: - v18 = icmp_imm.i64 eq v40, 0 - v19 = bint.i8 v18 - v20 = uextend.i32 v19 - brz v20, block6 - jump block7 - -block7: - trap user0 - -block5: - v22 = iconst.i32 1 - v23 = ishl.i64 v39, v22 - v25 = iconst.i64 1 - v26 = band.i64 v42, v25 - v27 = bor v23, v26 - v28 = iconst.i32 1 - v29 = ushr.i64 v42, v28 - v30 = load.i64 v3 - v31 = iconst.i32 1 - v32 = ushr v30, v31 - store v32, v3 - jump block3(v27, v40, v29) - -block6: - v38 = iconst.i64 0 - return v38 -} - diff --git a/cranelift/filetests/filetests/safepoint/basic.clif b/cranelift/filetests/filetests/safepoint/basic.clif deleted file mode 100644 index 47acf2ad72..0000000000 --- a/cranelift/filetests/filetests/safepoint/basic.clif +++ /dev/null @@ -1,71 +0,0 @@ -test safepoint -set enable_safepoints=true -target x86_64 legacy - -function %test(i32, r64, r64) -> r64 { - block0(v0: i32, v1:r64, v2:r64): - jump block1(v0) - block1(v3: i32): - v4 = irsub_imm v3, 1 - jump block2(v4) - block2(v5: i32): - resumable_trap interrupt - brz v5, block1(v5) - jump block3 - block3: - v6 = null.r64 - v7 = is_null v6 - brnz v7, block2(v0) - jump block4 - block4: - brnz v0, block5 - jump block6 - block5: - return v1 - block6: - return v2 -} - -; sameln: function %test(i32 [%rdi], r64 [%rsi], r64 [%rdx]) -> r64 [%rax] fast { -; nextln: block0(v0: i32 [%rdi], v1: r64 [%rsi], v2: r64 [%rdx]): -; nextln: v10 = copy v0 -; nextln: jump block1(v10) -; nextln: -; nextln: block7: -; nextln: regmove.i32 v5, %rcx -> %rax -; nextln: jump block1(v5) -; nextln: -; nextln: block1(v3: i32 [%rax]): -; nextln: v8 = iconst.i32 1 -; nextln: v4 = isub v8, v3 -; nextln: jump block2(v4) -; nextln: -; nextln: block8: -; nextln: v9 = copy.i32 v0 -; nextln: regmove v9, %rax -> %rcx -; nextln: jump block2(v9) -; nextln: -; nextln: block2(v5: i32 [%rcx]): -; nextln: safepoint v1, v2 -; nextln: resumable_trap interrupt -; nextln: brz v5, block7 -; nextln: jump block3 -; nextln: -; nextln: block3: -; nextln: v6 = null.r64 -; nextln: v7 = is_null v6 -; nextln: brnz v7, block8 -; nextln: jump block4 -; nextln: -; nextln: block4: -; nextln: brnz.i32 v0, block5 -; nextln: jump block6 -; nextln: -; nextln: block5: -; nextln: regmove.r64 v1, %rsi -> %rax -; nextln: return v1 -; nextln: -; nextln: block6: -; nextln: regmove.r64 v2, %rdx -> %rax -; nextln: return v2 -; nextln: } diff --git a/cranelift/filetests/filetests/safepoint/call.clif b/cranelift/filetests/filetests/safepoint/call.clif deleted file mode 100644 index ffcf41fb46..0000000000 --- a/cranelift/filetests/filetests/safepoint/call.clif +++ /dev/null @@ -1,58 +0,0 @@ -test safepoint -set enable_safepoints=true -target x86_64 legacy - -function %direct() -> r64 { - fn0 = %none() - fn1 = %one() -> r64 - fn2 = %two() -> i32, r64 - -block0: - call fn0() - v1 = call fn1() - v2, v3 = call fn2() - brz v2, block2 - jump block1 -block1: - return v1 -block2: - v4 = call fn1() - return v3 -} - -; sameln: function %direct() -> r64 [%rax] fast { -; nextln: ss0 = spill_slot 8 -; nextln: ss1 = spill_slot 8 -; nextln: sig0 = () fast -; nextln: sig1 = () -> r64 [%rax] fast -; nextln: sig2 = () -> i32 [%rax], r64 [%rdx] fast -; nextln: fn0 = %none sig0 -; nextln: fn1 = %one sig1 -; nextln: fn2 = %two sig2 -; nextln: -; nextln: block0: -; nextln: v5 = func_addr.i64 fn0 -; nextln: call_indirect sig0, v5() -; nextln: v6 = func_addr.i64 fn1 -; nextln: v9 = call_indirect sig1, v6() -; nextln: v1 = spill v9 -; nextln: v7 = func_addr.i64 fn2 -; nextln: safepoint v1 -; nextln: v2, v10 = call_indirect sig2, v7() -; nextln: v3 = spill v10 -; nextln: brz v2, block2 -; nextln: jump block1 -; nextln: -; nextln: block1: -; nextln: v11 = fill.r64 v1 -; nextln: regmove v11, %r15 -> %rax -; nextln: return v11 -; nextln: -; nextln: block2: -; nextln: v8 = func_addr.i64 fn1 -; nextln: safepoint v3 -; nextln: v4 = call_indirect sig1, v8() -; nextln: v12 = fill.r64 v3 -; nextln: regmove v12, %r15 -> %rax -; nextln: return v12 -; nextln: } diff --git a/cranelift/filetests/filetests/simple_preopt/simplify_instruction_into_alias_of_value.clif b/cranelift/filetests/filetests/simple_preopt/simplify_instruction_into_alias_of_value.clif deleted file mode 100644 index 5d10588da3..0000000000 --- a/cranelift/filetests/filetests/simple_preopt/simplify_instruction_into_alias_of_value.clif +++ /dev/null @@ -1,18 +0,0 @@ -test simple_preopt -target aarch64 -target x86_64 - -;; The `isub` is a no-op, but we can't replace the whole `isub` instruction with -;; its `v2` operand's instruction because `v2` is one of many results. Instead, -;; we need to make an alias `v3 -> v2`. - -function %replace_inst_with_alias() -> i32 { -block0: - v0 = iconst.i32 0 - v1, v2 = x86_smulx v0, v0 - v3 = isub v2, v0 - ; check: v0 = iconst.i32 0 - ; nextln: v1, v2 = x86_smulx v0, v0 - ; nextln: v3 -> v2 - return v3 -} diff --git a/cranelift/filetests/filetests/stack_maps/call.clif b/cranelift/filetests/filetests/stack_maps/call.clif deleted file mode 100644 index 6563ad450a..0000000000 --- a/cranelift/filetests/filetests/stack_maps/call.clif +++ /dev/null @@ -1,103 +0,0 @@ -test stack_maps -set enable_safepoints=true -target x86_64 legacy - -function %icall_fast(r64) -> r64 fast { -; check: function %icall_fast -; nextln: ss0 = spill_slot 8, offset -32 - fn0 = %none() -block0(v0: r64): -; check: ss0] v0 = spill v2 -; check: safepoint v0 - call fn0() - return v0 -} -; check: Stack maps: -; nextln: -; nextln: safepoint v0 -; nextln: - mapped words: 4 -; nextln: - live: [0] - -function %icall_sys_v(r64) -> r64 system_v { -; check: function %icall_sys_v -; nextln: ss0 = spill_slot 8, offset -32 - fn0 = %none() -block0(v0: r64): -; check: ss0] v0 = spill v2 -; check: safepoint v0 - call fn0() - return v0 -} -; check: Stack maps: -; nextln: -; nextln: safepoint v0 -; nextln: - mapped words: 4 -; nextln: - live: [0] - -function %icall_fastcall(r64) -> r64 windows_fastcall { -; check: function %icall_fastcall -; nextln: ss0 = spill_slot 8, offset -32 -; nextln: ss1 = incoming_arg 24, offset -24 -; nextln: ss2 = explicit_slot 32, offset -64 - fn0 = %none() -block0(v0: r64): -; check: ss0] v0 = spill v2 -; check: safepoint v0 - call fn0() - return v0 -} -; check: Stack maps: -; nextln: -; nextln: safepoint v0 -; nextln: - mapped words: 8 -; nextln: - live: [4] - -function %call_fast(r64) -> r64 fast { -; check: function %call_fast -; nextln: ss0 = spill_slot 8, offset -32 - fn0 = colocated %none() -block0(v0: r64): -; check: ss0] v0 = spill v1 -; check: safepoint v0 - call fn0() - return v0 -} -; check: Stack maps: -; nextln: -; nextln: safepoint v0 -; nextln: - mapped words: 4 -; nextln: - live: [0] - -function %call_sys_v(r64) -> r64 system_v { -; check: function %call_sys_v -; nextln: ss0 = spill_slot 8, offset -32 - fn0 = colocated %none() -block0(v0: r64): -; check: ss0] v0 = spill v1 -; check: safepoint v0 - call fn0() - return v0 -} -; check: Stack maps: -; nextln: -; nextln: safepoint v0 -; nextln: - mapped words: 4 -; nextln: - live: [0] - -function %call_fastcall(r64) -> r64 windows_fastcall { -; check: function %call_fastcall -; nextln: ss0 = spill_slot 8, offset -32 -; nextln: ss1 = incoming_arg 24, offset -24 -; nextln: ss2 = explicit_slot 32, offset -64 - fn0 = colocated %none() -block0(v0: r64): -; check: ss0] v0 = spill v1 -; check: safepoint v0 - call fn0() - return v0 -} -; check: Stack maps: -; nextln: -; nextln: safepoint v0 -; nextln: - mapped words: 8 -; nextln: - live: [4] diff --git a/cranelift/filetests/filetests/stack_maps/incoming_args.clif b/cranelift/filetests/filetests/stack_maps/incoming_args.clif deleted file mode 100644 index e8231c3aad..0000000000 --- a/cranelift/filetests/filetests/stack_maps/incoming_args.clif +++ /dev/null @@ -1,30 +0,0 @@ -test stack_maps -set enable_safepoints=true -target x86_64 legacy - -;; Incoming args get included in stack maps. - -function %incoming_args(r64, r64, r64, r64, r64) -> r64 windows_fastcall { -; check: r64 [32] -; nextln: ss0 = incoming_arg 8, offset 32 -; nextln: ss1 = incoming_arg 24, offset -24 -; nextln: ss2 = explicit_slot 32, offset -64 - - fn0 = %none() -; nextln: sig0 = () fast -; nextln: fn0 = %none sig0 - -block0(v0: r64, v1: r64, v2: r64, v3: r64, v4: r64): -; check: v4: r64 [ss0] - - call fn0() -; check: safepoint v4 -; nextln: call_indirect - return v4 -} - -; check: Stack maps: -; nextln: -; nextln: safepoint v4 -; nextln: - mapped words: 13 -; nextln: - live: [12] diff --git a/cranelift/filetests/filetests/verifier/flags.clif b/cranelift/filetests/filetests/verifier/flags.clif deleted file mode 100644 index 088523d24a..0000000000 --- a/cranelift/filetests/filetests/verifier/flags.clif +++ /dev/null @@ -1,77 +0,0 @@ -test verifier -target aarch64 -target i686 - -; Simple, correct use of CPU flags. -function %simple(i32) -> i32 { - block0(v0: i32): - [DynRexOp1rcmp#39] v1 = ifcmp v0, v0 - [Op2seti_abcd#490] v2 = trueif ugt v1 - [Op2urm_noflags_abcd#4b6] v3 = bint.i32 v2 - [Op1ret#c3] return v3 -} - -; Overlapping flag values of different types. -function %overlap(i32, f32) -> i32 { - block0(v0: i32, v1: f32): - [DynRexOp1rcmp#39] v2 = ifcmp v0, v0 - [Op2fcmp#42e] v3 = ffcmp v1, v1 - [Op2setf_abcd#490] v4 = trueff gt v3 ; error: conflicting live CPU flags: v2 and v3 - [Op2seti_abcd#490] v5 = trueif ugt v2 - [Op1rr#21] v6 = band v4, v5 - [Op2urm_noflags_abcd#4b6] v7 = bint.i32 v6 - [Op1ret#c3] return v7 -} - -; CPU flags clobbered by arithmetic. -function %clobbered(i32) -> i32 { - block0(v0: i32): - [DynRexOp1rcmp#39] v1 = ifcmp v0, v0 - [DynRexOp1rr#01] v2 = iadd v0, v0 ; error: encoding clobbers live CPU flags in v1 - [Op2seti_abcd#490] v3 = trueif ugt v1 - [Op2urm_noflags_abcd#4b6] v4 = bint.i32 v3 - [Op1ret#c3] return v4 -} - -; CPU flags not clobbered by load. -function %live_across_load(i32) -> i32 { - block0(v0: i32): - [DynRexOp1rcmp#39] v1 = ifcmp v0, v0 - [Op1ld#8b] v2 = load.i32 v0 - [Op2seti_abcd#490] v3 = trueif ugt v1 - [Op2urm_noflags_abcd#4b6] v4 = bint.i32 v3 - [Op1ret#c3] return v4 -} - -; Correct use of CPU flags across block. -function %live_across_block(i32) -> i32 { - block0(v0: i32): - [DynRexOp1rcmp#39] v1 = ifcmp v0, v0 - [Op1jmpb#eb] jump block1 - block1: - [Op2seti_abcd#490] v2 = trueif ugt v1 - [Op2urm_noflags_abcd#4b6] v3 = bint.i32 v2 - [Op1ret#c3] return v3 -} - -function %live_across_block_backwards(i32) -> i32 { - block0(v0: i32): - [Op1jmpb#eb] jump block2 - block1: - [Op2seti_abcd#490] v2 = trueif ugt v1 - [Op2urm_noflags_abcd#4b6] v3 = bint.i32 v2 - [Op1ret#c3] return v3 - block2: - [DynRexOp1rcmp#39] v1 = ifcmp v0, v0 - [Op1jmpb#eb] jump block1 -} - -; Flags live into loop. -function %live_into_loop(i32) -> i32 { - block0(v0: i32): - [DynRexOp1rcmp#39] v1 = ifcmp v0, v0 - [Op1jmpb#eb] jump block1 - block1: - [Op2seti_abcd#490] v2 = trueif ugt v1 - [Op1jmpb#eb] jump block1 -} diff --git a/cranelift/filetests/filetests/wasm/multi-val-b1.clif b/cranelift/filetests/filetests/wasm/multi-val-b1.clif deleted file mode 100644 index f41f867918..0000000000 --- a/cranelift/filetests/filetests/wasm/multi-val-b1.clif +++ /dev/null @@ -1,68 +0,0 @@ -test compile -target x86_64 legacy haswell - -;; `b1` return values need to be legalized into bytes so that they can be stored -;; in memory. - -function %return_4_b1s(b1, b1, b1, b1) -> b1, b1, b1, b1 { -;; check: function %return_4_b1s(b1 [%rsi], b1 [%rdx], b1 [%rcx], b1 [%r8], i64 sret [%rdi], i64 fp [%rbp]) -> i64 sret [%rax], i64 fp [%rbp] fast { - -block0(v0: b1, v1: b1, v2: b1, v3: b1): -; check: block0(v0: b1 [%rsi], v1: b1 [%rdx], v2: b1 [%rcx], v3: b1 [%r8], v4: i64 [%rdi], v13: i64 [%rbp]): - - return v0, v1, v2, v3 - ; check: v5 = bint.i8 v0 - ; nextln: v9 = uextend.i32 v5 - ; nextln: istore8 notrap aligned v9, v4 - ; nextln: v6 = bint.i8 v1 - ; nextln: v10 = uextend.i32 v6 - ; nextln: istore8 notrap aligned v10, v4+1 - ; nextln: v7 = bint.i8 v2 - ; nextln: v11 = uextend.i32 v7 - ; nextln: istore8 notrap aligned v11, v4+2 - ; nextln: v8 = bint.i8 v3 - ; nextln: v12 = uextend.i32 v8 - ; nextln: istore8 notrap aligned v12, v4+3 -} - -function %call_4_b1s() { -; check: function %call_4_b1s(i64 fp [%rbp], i64 csr [%rbx]) -> i64 fp [%rbp], i64 csr [%rbx] fast { -; nextln: ss0 = sret_slot 4, offset -28 - - fn0 = colocated %return_4_b1s(b1, b1, b1, b1) -> b1, b1, b1, b1 - ; check: sig0 = (b1 [%rsi], b1 [%rdx], b1 [%rcx], b1 [%r8], i64 sret [%rdi]) -> i64 sret [%rax] fast - -block0: -; check: block0(v26: i64 [%rbp], v27: i64 [%rbx]): - - v0 = bconst.b1 true - v1 = bconst.b1 false - v2 = bconst.b1 true - v3 = bconst.b1 false - - ; check: v8 = stack_addr.i64 ss0 - v4, v5, v6, v7 = call fn0(v0, v1, v2, v3) - ; check: v9 = call fn0(v0, v1, v2, v3, v8) - ; nextln: v22 = uload8.i32 notrap aligned v9 - ; nextln: v10 = ireduce.i8 v22 - ; nextln: v11 = raw_bitcast.b8 v10 - ; nextln: v12 = breduce.b1 v11 - ; nextln: v4 -> v12 - ; nextln: v23 = uload8.i32 notrap aligned v9+1 - ; nextln: v13 = ireduce.i8 v23 - ; nextln: v14 = raw_bitcast.b8 v13 - ; nextln: v15 = breduce.b1 v14 - ; nextln: v5 -> v15 - ; nextln: v24 = uload8.i32 notrap aligned v9+2 - ; nextln: v16 = ireduce.i8 v24 - ; nextln: v17 = raw_bitcast.b8 v16 - ; nextln: v18 = breduce.b1 v17 - ; nextln: v6 -> v18 - ; nextln: v25 = uload8.i32 notrap aligned v9+3 - ; nextln: v19 = ireduce.i8 v25 - ; nextln: v20 = raw_bitcast.b8 v19 - ; nextln: v21 = breduce.b1 v20 - ; nextln: v7 -> v21 - - return -} diff --git a/cranelift/filetests/filetests/wasm/multi-val-call-indirect.clif b/cranelift/filetests/filetests/wasm/multi-val-call-indirect.clif deleted file mode 100644 index 06d0814dfb..0000000000 --- a/cranelift/filetests/filetests/wasm/multi-val-call-indirect.clif +++ /dev/null @@ -1,26 +0,0 @@ -test legalizer -target x86_64 legacy haswell - -;; Indirect calls with many returns. - -function %call_indirect_many_rets(i64) { - ; check: ss0 = sret_slot 32 - - sig0 = () -> i64, i64, i64, i64 - ; check: sig0 = (i64 sret [%rdi]) -> i64 sret [%rax] fast - -block0(v0: i64): - v1, v2, v3, v4 = call_indirect sig0, v0() - ; check: v5 = stack_addr.i64 ss0 - ; nextln: v6 = call_indirect sig0, v0(v5) - ; nextln: v7 = load.i64 notrap aligned v6 - ; nextln: v1 -> v7 - ; nextln: v8 = load.i64 notrap aligned v6+8 - ; nextln: v2 -> v8 - ; nextln: v9 = load.i64 notrap aligned v6+16 - ; nextln: v3 -> v9 - ; nextln: v10 = load.i64 notrap aligned v6+24 - ; nextln: v4 -> v10 - - return -} diff --git a/cranelift/filetests/filetests/wasm/multi-val-call-legalize-args.clif b/cranelift/filetests/filetests/wasm/multi-val-call-legalize-args.clif deleted file mode 100644 index aae733ddf4..0000000000 --- a/cranelift/filetests/filetests/wasm/multi-val-call-legalize-args.clif +++ /dev/null @@ -1,24 +0,0 @@ -test legalizer -target x86_64 legacy haswell - -;; Test if arguments are legalized if function uses sret - -function %call_indirect_with_split_arg(i64, i64, i64) { - ; check: ss0 = sret_slot 32 - sig0 = (i128) -> i64, i64, i64, i64 - ; check: sig0 = (i64 [%rsi], i64 [%rdx], i64 sret [%rdi]) -> i64 sret [%rax] fast -block0(v0: i64, v1: i64, v2: i64): - v3 = iconcat v1, v2 - v4, v5, v6, v7 = call_indirect sig0, v0(v3) - ; check: v8 = stack_addr.i64 ss0 - ; check: v9 = call_indirect sig0, v0(v1, v2, v8) - ; check: v10 = load.i64 notrap aligned v9 - ; check: v4 -> v10 - ; check: v11 = load.i64 notrap aligned v9+8 - ; check: v5 -> v11 - ; check: v12 = load.i64 notrap aligned v9+16 - ; check: v6 -> v12 - ; check: v13 = load.i64 notrap aligned v9+24 - ; check: v7 -> v13 - return -} diff --git a/cranelift/filetests/filetests/wasm/multi-val-reuse-ret-ptr-stack-slot.clif b/cranelift/filetests/filetests/wasm/multi-val-reuse-ret-ptr-stack-slot.clif deleted file mode 100644 index c58102aedc..0000000000 --- a/cranelift/filetests/filetests/wasm/multi-val-reuse-ret-ptr-stack-slot.clif +++ /dev/null @@ -1,61 +0,0 @@ -test legalizer -target x86_64 legacy haswell - -;; Test that we don't reuse `sret` stack slots for multiple calls. We could do -;; this one day, but it would require some care to ensure that we don't have -;; subsequent calls overwrite the results of previous calls. - -function %foo() -> i32, f32 { - ; check: ss0 = sret_slot 20 - ; nextln: ss1 = sret_slot 20 - - fn0 = %f() -> i32, i32, i32, i32, i32 - fn1 = %g() -> f32, f32, f32, f32, f32 - ; check: sig0 = (i64 sret [%rdi]) -> i64 sret [%rax] fast - ; nextln: sig1 = (i64 sret [%rdi]) -> i64 sret [%rax] fast - ; nextln: fn0 = %f sig0 - ; nextln: fn1 = %g sig1 - -block0: - v0, v1, v2, v3, v4 = call fn0() - ; check: v18 = stack_addr.i64 ss0 - ; nextln: v25 = func_addr.i64 fn0 - ; nextln: v19 = call_indirect sig0, v25(v18) - ; nextln: v20 = load.i32 notrap aligned v19 - ; nextln: v0 -> v20 - ; nextln: v21 = load.i32 notrap aligned v19+4 - ; nextln: v1 -> v21 - ; nextln: v22 = load.i32 notrap aligned v19+8 - ; nextln: v2 -> v22 - ; nextln: v23 = load.i32 notrap aligned v19+12 - ; nextln: v3 -> v23 - ; nextln: v24 = load.i32 notrap aligned v19+16 - ; nextln: v4 -> v24 - - v5, v6, v7, v8, v9 = call fn1() - ; check: v26 = stack_addr.i64 ss1 - ; nextln: v33 = func_addr.i64 fn1 - ; nextln: v27 = call_indirect sig1, v33(v26) - ; nextln: v28 = load.f32 notrap aligned v27 - ; nextln: v5 -> v28 - ; nextln: v29 = load.f32 notrap aligned v27+4 - ; nextln: v6 -> v29 - ; nextln: v30 = load.f32 notrap aligned v27+8 - ; nextln: v7 -> v30 - ; nextln: v31 = load.f32 notrap aligned v27+12 - ; nextln: v8 -> v31 - ; nextln: v32 = load.f32 notrap aligned v27+16 - ; nextln: v9 -> v32 - - v10 = iadd v0, v1 - v11 = iadd v2, v3 - v12 = iadd v10, v11 - v13 = iadd v12, v4 - - v14 = fadd v5, v6 - v15 = fadd v7, v8 - v16 = fadd v14, v15 - v17 = fadd v16, v9 - - return v13, v17 -} diff --git a/cranelift/filetests/filetests/wasm/multi-val-sret-slot-alignment.clif b/cranelift/filetests/filetests/wasm/multi-val-sret-slot-alignment.clif deleted file mode 100644 index da9f25ed97..0000000000 --- a/cranelift/filetests/filetests/wasm/multi-val-sret-slot-alignment.clif +++ /dev/null @@ -1,51 +0,0 @@ -test legalizer -target x86_64 legacy haswell - -;; Need to insert padding after the `i8`s so that the `i32` and `i64` are -;; aligned. - -function %returner() -> i8, i32, i8, i64 { -; check: function %returner(i64 sret [%rdi]) -> i64 sret [%rax] fast { - -block0: -; check: block0(v4: i64): - - v0 = iconst.i8 0 - v1 = iconst.i32 1 - v2 = iconst.i8 2 - v3 = iconst.i64 3 - return v0, v1, v2, v3 - ; check: v6 = uextend.i32 v0 - ; nextln: istore8 notrap aligned v6, v4 - ; nextln: store notrap aligned v1, v4+4 - ; nextln: v7 = uextend.i32 v2 - ; nextln: istore8 notrap aligned v7, v4+8 - ; nextln: store notrap aligned v3, v4+16 - ; nextln: return v4 -} - -function %caller() { - ; check: ss0 = sret_slot 24 - - fn0 = %returner() -> i8, i32, i8, i64 - ; check: sig0 = (i64 sret [%rdi]) -> i64 sret [%rax] fast - ; nextln: fn0 = %returner sig0 - -block0: - v0, v1, v2, v3 = call fn0() - ; check: v4 = stack_addr.i64 ss0 - ; nextln: v10 = func_addr.i64 fn0 - ; nextln: v5 = call_indirect sig0, v10(v4) - ; nextln: v11 = uload8.i32 notrap aligned v5 - ; nextln: v6 = ireduce.i8 v11 - ; nextln: v0 -> v6 - ; nextln: v7 = load.i32 notrap aligned v5+4 - ; nextln: v1 -> v7 - ; nextln: v12 = uload8.i32 notrap aligned v5+8 - ; nextln: v8 = ireduce.i8 v12 - ; nextln: v2 -> v8 - ; nextln: v9 = load.i64 notrap aligned v5+16 - ; nextln: v3 -> v9 - - return -} diff --git a/cranelift/filetests/src/function_runner.rs b/cranelift/filetests/src/function_runner.rs index d764b916e5..6a7fb5a282 100644 --- a/cranelift/filetests/src/function_runner.rs +++ b/cranelift/filetests/src/function_runner.rs @@ -47,8 +47,8 @@ impl SingleFunctionCompiler { } /// Build a [SingleFunctionCompiler] using the host machine's ISA and the passed flags. - pub fn with_host_isa(flags: settings::Flags, variant: BackendVariant) -> Self { - let builder = builder_with_options(variant, true) + pub fn with_host_isa(flags: settings::Flags) -> Self { + let builder = builder_with_options(true) .expect("Unable to build a TargetIsa for the current host"); let isa = builder.finish(flags); Self::new(isa) @@ -58,7 +58,7 @@ impl SingleFunctionCompiler { /// ISA. pub fn with_default_host_isa() -> Self { let flags = settings::Flags::new(settings::builder()); - Self::with_host_isa(flags, BackendVariant::Any) + Self::with_host_isa(flags) } /// Compile the passed [Function] to a `CompiledFunction`. This function will: diff --git a/cranelift/filetests/src/test_run.rs b/cranelift/filetests/src/test_run.rs index 4b9e528cfd..86b346e21b 100644 --- a/cranelift/filetests/src/test_run.rs +++ b/cranelift/filetests/src/test_run.rs @@ -48,11 +48,10 @@ impl SubTest for TestRun { ); return Ok(()); } - let variant = context.isa.unwrap().variant(); let test_env = RuntestEnvironment::parse(&context.details.comments[..])?; - let mut compiler = SingleFunctionCompiler::with_host_isa(context.flags.clone(), variant); + let mut compiler = SingleFunctionCompiler::with_host_isa(context.flags.clone()); for comment in context.details.comments.iter() { if let Some(command) = parse_run_command(comment.text, &func.signature)? { trace!("Parsed run command: {}", command); diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs index a82be29ace..93c6bf0a3f 100644 --- a/cranelift/interpreter/src/step.rs +++ b/cranelift/interpreter/src/step.rs @@ -1033,44 +1033,6 @@ where } Opcode::IaddPairwise => assign(binary_pairwise(arg(0)?, arg(1)?, ctrl_ty, Value::add)?), - // TODO: these instructions should be removed once the new backend makes these obsolete - // (see https://github.com/bytecodealliance/wasmtime/issues/1936); additionally, the - // "all-arch" feature for cranelift-codegen would become unnecessary for this crate. - Opcode::X86Udivmodx - | Opcode::X86Sdivmodx - | Opcode::X86Umulx - | Opcode::X86Smulx - | Opcode::X86Cvtt2si - | Opcode::X86Vcvtudq2ps - | Opcode::X86Fmin - | Opcode::X86Fmax - | Opcode::X86Push - | Opcode::X86Pop - | Opcode::X86Bsr - | Opcode::X86Bsf - | Opcode::X86Pshufd - | Opcode::X86Pshufb - | Opcode::X86Pblendw - | Opcode::X86Pextr - | Opcode::X86Pinsr - | Opcode::X86Insertps - | Opcode::X86Punpckh - | Opcode::X86Punpckl - | Opcode::X86Movsd - | Opcode::X86Movlhps - | Opcode::X86Psll - | Opcode::X86Psrl - | Opcode::X86Psra - | Opcode::X86Pmullq - | Opcode::X86Pmuludq - | Opcode::X86Ptest - | Opcode::X86Pmaxs - | Opcode::X86Pmaxu - | Opcode::X86Pmins - | Opcode::X86Pminu - | Opcode::X86Palignr - | Opcode::X86ElfTlsGetAddr - | Opcode::X86MachoTlsGetAddr => unimplemented!("x86 instruction: {}", inst.opcode()), Opcode::JumpTableBase | Opcode::JumpTableEntry | Opcode::IndirectJumpTableBr => { unimplemented!("Legacy instruction: {}", inst.opcode()) } diff --git a/cranelift/native/src/lib.rs b/cranelift/native/src/lib.rs index b2364c6ad9..c2a5aa78b8 100644 --- a/cranelift/native/src/lib.rs +++ b/cranelift/native/src/lib.rs @@ -30,7 +30,7 @@ use target_lexicon::Triple; /// machine, or `Err(())` if the host machine is not supported /// in the current configuration. pub fn builder() -> Result { - builder_with_options(isa::BackendVariant::Any, true) + builder_with_options(true) } /// Return an `isa` builder configured for the current host @@ -40,17 +40,11 @@ pub fn builder() -> Result { /// Selects the given backend variant specifically; this is /// useful when more than oen backend exists for a given target /// (e.g., on x86-64). -pub fn builder_with_options( - variant: isa::BackendVariant, - infer_native_flags: bool, -) -> Result { - let mut isa_builder = - isa::lookup_variant(Triple::host(), variant).map_err(|err| match err { - isa::LookupError::SupportDisabled => { - "support for architecture disabled at compile time" - } - isa::LookupError::Unsupported => "unsupported architecture", - })?; +pub fn builder_with_options(infer_native_flags: bool) -> Result { + let mut isa_builder = isa::lookup_variant(Triple::host()).map_err(|err| match err { + isa::LookupError::SupportDisabled => "support for architecture disabled at compile time", + isa::LookupError::Unsupported => "unsupported architecture", + })?; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { diff --git a/cranelift/reader/src/parser.rs b/cranelift/reader/src/parser.rs index a8f20230fc..98c46ab2e3 100644 --- a/cranelift/reader/src/parser.rs +++ b/cranelift/reader/src/parser.rs @@ -22,7 +22,7 @@ use cranelift_codegen::ir::{ HeapStyle, JumpTable, JumpTableData, MemFlags, Opcode, SigRef, Signature, StackSlot, StackSlotData, StackSlotKind, Table, TableData, Type, Value, ValueLoc, }; -use cranelift_codegen::isa::{self, BackendVariant, CallConv, Encoding, RegUnit, TargetIsa}; +use cranelift_codegen::isa::{self, CallConv, Encoding, RegUnit, TargetIsa}; use cranelift_codegen::packed_option::ReservedValue; use cranelift_codegen::{settings, settings::Configurable, timing}; use smallvec::SmallVec; @@ -1159,19 +1159,7 @@ impl<'a> Parser<'a> { Ok(triple) => triple, Err(err) => return err!(loc, err), }; - // Look for `machinst` or `legacy` option before instantiating IsaBuilder. - let variant = match words.peek() { - Some(&"machinst") => { - words.next(); - BackendVariant::MachInst - } - Some(&"legacy") => { - words.next(); - BackendVariant::Legacy - } - _ => BackendVariant::Any, - }; - let mut isa_builder = match isa::lookup_variant(triple, variant) { + let mut isa_builder = match isa::lookup_variant(triple) { Err(isa::LookupError::SupportDisabled) => { continue; } diff --git a/cranelift/tests/bugpoint_test.clif b/cranelift/tests/bugpoint_test.clif index b2e9acc37e..ced5b9e809 100644 --- a/cranelift/tests/bugpoint_test.clif +++ b/cranelift/tests/bugpoint_test.clif @@ -300,7 +300,8 @@ block0(v0: i64, v1: i64, v2: i64): v241 -> v1 v256 -> v1 v262 -> v1 - v3, v4 = x86_sdivmodx v0, v1, v2 + v3 = imul v0, v1 + v4 = imul v1, v2 store aligned v4, v3 v5 = load.i64 aligned v2+8 store aligned v5, v3+8 diff --git a/crates/bench-api/Cargo.toml b/crates/bench-api/Cargo.toml index a67b48c501..2edf20800c 100644 --- a/crates/bench-api/Cargo.toml +++ b/crates/bench-api/Cargo.toml @@ -31,4 +31,3 @@ wat = "1.0" default = ["shuffling-allocator"] wasi-crypto = ["wasmtime-wasi-crypto"] wasi-nn = ["wasmtime-wasi-nn"] -old-x86-backend = ["wasmtime/old-x86-backend"] diff --git a/crates/cranelift/Cargo.toml b/crates/cranelift/Cargo.toml index 49a9069dc1..331f660f79 100644 --- a/crates/cranelift/Cargo.toml +++ b/crates/cranelift/Cargo.toml @@ -27,4 +27,3 @@ thiserror = "1.0.4" [features] all-arch = ["cranelift-codegen/all-arch"] -old-x86-backend = ["cranelift-codegen/old-x86-backend"] diff --git a/crates/wasmtime/Cargo.toml b/crates/wasmtime/Cargo.toml index badd61a737..b38205bfeb 100644 --- a/crates/wasmtime/Cargo.toml +++ b/crates/wasmtime/Cargo.toml @@ -71,9 +71,6 @@ parallel-compilation = ["rayon"] # Enables support for automatic cache configuration to be enabled in `Config`. cache = ["wasmtime-cache"] -# Use Cranelift's old x86 backend. -old-x86-backend = ["wasmtime-cranelift/old-x86-backend"] - # Enables support for "async stores" as well as defining host functions as # `async fn` and calling functions asynchronously. async = ["wasmtime-fiber", "wasmtime-runtime/async"] diff --git a/crates/wasmtime/src/func.rs b/crates/wasmtime/src/func.rs index 5615d438c2..dd8877e8f9 100644 --- a/crates/wasmtime/src/func.rs +++ b/crates/wasmtime/src/func.rs @@ -1129,9 +1129,7 @@ impl Func { /// and similarly if a function has multiple results you can bind that too /// /// ``` - /// # #[cfg(not(feature = "old-x86-backend"))] /// # use wasmtime::*; - /// # #[cfg(not(feature = "old-x86-backend"))] /// # fn foo(add_with_overflow: &Func, mut store: Store<()>) -> anyhow::Result<()> { /// let typed = add_with_overflow.typed::<(u32, u32), (u32, i32), _>(&store)?; /// let (result, overflow) = typed.call(&mut store, (u32::max_value(), 2))?; @@ -1564,12 +1562,10 @@ macro_rules! impl_host_abi { #[doc(hidden)] #[allow(non_snake_case)] #[repr(C)] - #[cfg(not(feature = "old-x86-backend"))] pub struct []<$($u,)*> { $($u: $u,)* } - #[cfg(not(feature = "old-x86-backend"))] #[allow(non_snake_case, unused_assignments)] impl<$t: Copy, $($u: Copy,)*> HostAbi for ($t, $($u,)*) { type Abi = $t; diff --git a/crates/wasmtime/src/module/registry.rs b/crates/wasmtime/src/module/registry.rs index 89f851c488..2c5e05df89 100644 --- a/crates/wasmtime/src/module/registry.rs +++ b/crates/wasmtime/src/module/registry.rs @@ -134,54 +134,7 @@ impl ModuleInfo for RegisteredModule { // Because we know we are in Wasm code, and we must be at some kind // of call/safepoint, then the Cranelift backend must have avoided // emitting a stack map for this location because no refs were live. - #[cfg(not(feature = "old-x86-backend"))] Err(_) => return None, - - // ### Old x86_64 backend specific code. - // - // Because GC safepoints are technically only associated with a - // single PC, we should ideally only care about `Ok(index)` values - // returned from the binary search. However, safepoints are inserted - // right before calls, and there are two things that can disturb the - // PC/offset associated with the safepoint versus the PC we actually - // use to query for the stack map: - // - // 1. The `backtrace` crate gives us the PC in a frame that will be - // *returned to*, and where execution will continue from, rather than - // the PC of the call we are currently at. So we would need to - // disassemble one instruction backwards to query the actual PC for - // the stack map. - // - // TODO: One thing we *could* do to make this a little less error - // prone, would be to assert/check that the nearest GC safepoint - // found is within `max_encoded_size(any kind of call instruction)` - // our queried PC for the target architecture. - // - // 2. Cranelift's stack maps only handle the stack, not - // registers. However, some references that are arguments to a call - // may need to be in registers. In these cases, what Cranelift will - // do is: - // - // a. spill all the live references, - // b. insert a GC safepoint for those references, - // c. reload the references into registers, and finally - // d. make the call. - // - // Step (c) adds drift between the GC safepoint and the location of - // the call, which is where we actually walk the stack frame and - // collect its live references. - // - // Luckily, the spill stack slots for the live references are still - // up to date, so we can still find all the on-stack roots. - // Furthermore, we do not have a moving GC, so we don't need to worry - // whether the following code will reuse the references in registers - // (which would not have been updated to point to the moved objects) - // or reload from the stack slots (which would have been updated to - // point to the moved objects). - #[cfg(feature = "old-x86-backend")] - Err(0) => return None, - #[cfg(feature = "old-x86-backend")] - Err(i) => i - 1, }; Some(&info.stack_maps[index].stack_map) diff --git a/examples/multi.rs b/examples/multi.rs index df36671ceb..b243d83cd7 100644 --- a/examples/multi.rs +++ b/examples/multi.rs @@ -9,7 +9,6 @@ use anyhow::Result; -#[cfg(not(feature = "old-x86-backend"))] fn main() -> Result<()> { use wasmtime::*; @@ -63,11 +62,3 @@ fn main() -> Result<()> { Ok(()) } - -// Note that this example is not supported in the off-by-default feature of the -// old x86 compiler backend for Cranelift. Wasmtime's default configuration -// supports this example, however. -#[cfg(feature = "old-x86-backend")] -fn main() -> Result<()> { - Ok(()) -} diff --git a/tests/all/debug/lldb.rs b/tests/all/debug/lldb.rs index 3e72eaf49f..dada5deee8 100644 --- a/tests/all/debug/lldb.rs +++ b/tests/all/debug/lldb.rs @@ -133,44 +133,6 @@ check: exited with status Ok(()) } -#[test] -#[ignore] -#[cfg(all( - any(target_os = "linux", target_os = "macos"), - target_pointer_width = "64", - // Ignore test on new backend. The value this is looking for is - // not available at the point that the breakpoint is set when - // compiled by the new backend. - feature = "old-x86-backend", -))] -pub fn test_debug_dwarf_ptr() -> Result<()> { - let output = lldb_with_script( - &[ - "-g", - "--opt-level", - "0", - "tests/all/debug/testsuite/reverse-str.wasm", - ], - r#"b reverse-str.c:9 -r -p __vmctx->set(),&*s -c"#, - )?; - - check_lldb_output( - &output, - r#" -check: Breakpoint 1: no locations (pending) -check: stop reason = breakpoint 1.1 -check: frame #0 -sameln: reverse(s=(__ptr = -check: "Hello, world." -check: resuming -"#, - )?; - Ok(()) -} - #[test] #[ignore] #[cfg(all( diff --git a/tests/all/debug/translate.rs b/tests/all/debug/translate.rs index 2560a71b03..aa1b79343b 100644 --- a/tests/all/debug/translate.rs +++ b/tests/all/debug/translate.rs @@ -109,26 +109,3 @@ check: DW_AT_decl_line (10) ) } -#[test] -#[ignore] -#[cfg(all( - any(target_os = "linux", target_os = "macos"), - target_arch = "x86_64", - target_pointer_width = "64", - // Ignore test on new backend. This is a specific test with hardcoded - // offsets and the new backend compiles the return basic-block at a different - // offset, causing mismatches. - feature = "old-x86-backend", -))] -fn test_debug_dwarf5_translate_lines() -> Result<()> { - check_line_program( - "tests/all/debug/testsuite/fib-wasm-dwarf5.wasm", - r##" -check: Address Line Column File ISA Discriminator Flags -check: 0x000000000000013c 15 3 1 0 0 -# The important point is that the following offset must be _after_ the `ret` instruction. -# FIXME: this +1 increment might vary on other archs. -nextln: 0x000000000000013d 15 3 1 0 0 end_sequence - "##, - ) -} diff --git a/tests/all/func.rs b/tests/all/func.rs index da4c630fa4..eef7a9efd6 100644 --- a/tests/all/func.rs +++ b/tests/all/func.rs @@ -525,7 +525,6 @@ fn pass_cross_store_arg() -> anyhow::Result<()> { } #[test] -#[cfg_attr(feature = "old-x86-backend", ignore)] fn externref_signature_no_reference_types() -> anyhow::Result<()> { let mut config = Config::new(); config.wasm_reference_types(false); @@ -569,7 +568,6 @@ fn trampolines_always_valid() -> anyhow::Result<()> { } #[test] -#[cfg(not(feature = "old-x86-backend"))] fn typed_multiple_results() -> anyhow::Result<()> { let mut store = Store::<()>::default(); let module = Module::new( @@ -647,7 +645,6 @@ fn trap_doesnt_leak() -> anyhow::Result<()> { } #[test] -#[cfg(not(feature = "old-x86-backend"))] fn wrap_multiple_results() -> anyhow::Result<()> { fn test(store: &mut Store<()>, t: T) -> anyhow::Result<()> where diff --git a/tests/all/gc.rs b/tests/all/gc.rs index 73ffe03ece..27c9341ea0 100644 --- a/tests/all/gc.rs +++ b/tests/all/gc.rs @@ -189,7 +189,6 @@ fn many_live_refs() -> anyhow::Result<()> { } #[test] -#[cfg(not(feature = "old-x86-backend"))] // uses atomic instrs not implemented here fn drop_externref_via_table_set() -> anyhow::Result<()> { let (mut store, module) = ref_types_module( r#" @@ -285,7 +284,6 @@ fn global_drops_externref() -> anyhow::Result<()> { } #[test] -#[cfg(not(feature = "old-x86-backend"))] // uses atomic instrs not implemented here fn table_drops_externref() -> anyhow::Result<()> { test_engine(&Engine::default())?; @@ -336,7 +334,6 @@ fn table_drops_externref() -> anyhow::Result<()> { } #[test] -#[cfg(not(feature = "old-x86-backend"))] // uses atomic instrs not implemented here fn gee_i_sure_hope_refcounting_is_atomic() -> anyhow::Result<()> { let mut config = Config::new(); config.wasm_reference_types(true); @@ -426,7 +423,6 @@ fn global_init_no_leak() -> anyhow::Result<()> { } #[test] -#[cfg(not(feature = "old-x86-backend"))] fn no_gc_middle_of_args() -> anyhow::Result<()> { let (mut store, module) = ref_types_module( r#" diff --git a/tests/all/relocs.rs b/tests/all/relocs.rs index 6dab73cd74..fdd0730dfa 100644 --- a/tests/all/relocs.rs +++ b/tests/all/relocs.rs @@ -8,8 +8,6 @@ //! 32-bits, and right now object files aren't supported larger than 4gb anyway //! so we would need a lot of other support necessary to exercise that. -#![cfg(not(feature = "old-x86-backend"))] // multi-value not supported here - use anyhow::Result; use wasmtime::*; diff --git a/tests/all/wast.rs b/tests/all/wast.rs index 675850df36..f700842193 100644 --- a/tests/all/wast.rs +++ b/tests/all/wast.rs @@ -25,12 +25,6 @@ fn run_wast(wast: &str, strategy: Strategy, pooling: bool) -> anyhow::Result<()> // by reference types. let reftypes = simd || feature_found(wast, "reference-types"); - // Threads & simd aren't implemented in the old backend, so skip those - // tests. - if (threads || simd) && cfg!(feature = "old-x86-backend") { - return Ok(()); - } - let mut cfg = Config::new(); cfg.wasm_simd(simd) .wasm_bulk_memory(bulk_mem)