diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs new file mode 100644 index 0000000000..99bfecafc2 --- /dev/null +++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs @@ -0,0 +1,1569 @@ +#![allow(non_snake_case)] + +use std::collections::HashMap; + +use crate::cdsl::encodings::{Encoding, EncodingBuilder}; +use crate::cdsl::instructions::{ + BoundInstruction, InstSpec, Instruction, InstructionGroup, InstructionPredicate, + InstructionPredicateNode, InstructionPredicateRegistry, +}; +use crate::cdsl::recipes::{EncodingRecipe, EncodingRecipeNumber, Recipes}; +use crate::cdsl::settings::{SettingGroup, SettingPredicateNumber}; + +use crate::shared::types::Bool::B1; +use crate::shared::types::Float::{F32, F64}; +use crate::shared::types::Int::{I16, I32, I64, I8}; +use crate::shared::Definitions as SharedDefinitions; + +use super::recipes::{RecipeGroup, Template}; + +pub struct PerCpuModeEncodings { + pub enc32: Vec, + pub enc64: Vec, + pub recipes: Recipes, + recipes_inverse: HashMap, + pub inst_pred_reg: InstructionPredicateRegistry, +} + +impl PerCpuModeEncodings { + fn new() -> Self { + Self { + enc32: Vec::new(), + enc64: Vec::new(), + recipes: Recipes::new(), + recipes_inverse: HashMap::new(), + inst_pred_reg: InstructionPredicateRegistry::new(), + } + } + + fn add_recipe(&mut self, recipe: EncodingRecipe) -> EncodingRecipeNumber { + if let Some(found_index) = self.recipes_inverse.get(&recipe) { + assert!( + self.recipes[*found_index].name == recipe.name, + format!( + "trying to insert different recipes with a same name ({})", + recipe.name + ) + ); + *found_index + } else { + let index = self.recipes.push(recipe.clone()); + self.recipes_inverse.insert(recipe, index); + index + } + } + + fn make_encoding( + &mut self, + inst: InstSpec, + template: Template, + builder_closure: T, + ) -> Encoding + where + T: FnOnce(EncodingBuilder) -> EncodingBuilder, + { + let (recipe, bits) = template.build(); + let recipe_number = self.add_recipe(recipe); + let builder = EncodingBuilder::new(inst.into(), recipe_number, bits); + builder_closure(builder).build(&self.recipes, &mut self.inst_pred_reg) + } + + fn enc32_func(&mut self, inst: impl Into, template: Template, builder_closure: T) + where + T: FnOnce(EncodingBuilder) -> EncodingBuilder, + { + let encoding = self.make_encoding(inst.into(), template, builder_closure); + self.enc32.push(encoding); + } + fn enc32(&mut self, inst: impl Into, template: Template) { + self.enc32_func(inst, template, |x| x); + } + fn enc32_isap( + &mut self, + inst: impl Into, + template: Template, + isap: SettingPredicateNumber, + ) { + self.enc32_func(inst, template, |encoding| encoding.isa_predicate(isap)); + } + fn enc32_instp( + &mut self, + inst: impl Into, + template: Template, + instp: InstructionPredicateNode, + ) { + self.enc32_func(inst, template, |encoding| encoding.inst_predicate(instp)); + } + fn enc32_rec(&mut self, inst: impl Into, recipe: &EncodingRecipe, bits: u16) { + let recipe_number = self.add_recipe(recipe.clone()); + let builder = EncodingBuilder::new(inst.into(), recipe_number, bits); + let encoding = builder.build(&self.recipes, &mut self.inst_pred_reg); + self.enc32.push(encoding); + } + + fn enc64_func(&mut self, inst: impl Into, template: Template, builder_closure: T) + where + T: FnOnce(EncodingBuilder) -> EncodingBuilder, + { + let encoding = self.make_encoding(inst.into(), template, builder_closure); + self.enc64.push(encoding); + } + fn enc64(&mut self, inst: impl Into, template: Template) { + self.enc64_func(inst, template, |x| x); + } + fn enc64_isap( + &mut self, + inst: impl Into, + template: Template, + isap: SettingPredicateNumber, + ) { + self.enc64_func(inst, template, |encoding| encoding.isa_predicate(isap)); + } + fn enc64_instp( + &mut self, + inst: impl Into, + template: Template, + instp: InstructionPredicateNode, + ) { + self.enc64_func(inst, template, |encoding| encoding.inst_predicate(instp)); + } + fn enc64_rec(&mut self, inst: impl Into, recipe: &EncodingRecipe, bits: u16) { + let recipe_number = self.add_recipe(recipe.clone()); + let builder = EncodingBuilder::new(inst.into(), recipe_number, bits); + let encoding = builder.build(&self.recipes, &mut self.inst_pred_reg); + self.enc64.push(encoding); + } + + /// Add encodings for `inst.i32` to X86_32. + /// Add encodings for `inst.i32` to X86_64 with and without REX. + /// Add encodings for `inst.i64` to X86_64 with a REX.W prefix. + fn enc_i32_i64(&mut self, inst: impl Into, template: Template) { + let inst: InstSpec = inst.into(); + self.enc32(inst.bind(I32), template.nonrex()); + + // REX-less encoding must come after REX encoding so we don't use it by default. Otherwise + // reg-alloc would never use r8 and up. + self.enc64(inst.bind(I32), template.rex()); + self.enc64(inst.bind(I32), template.nonrex()); + self.enc64(inst.bind(I64), template.rex().w()); + } + + /// Add encodings for `inst.i32` to X86_32. + /// Add encodings for `inst.i32` to X86_64 with and without REX. + /// Add encodings for `inst.i64` to X86_64 with a REX.W prefix. + fn enc_i32_i64_instp( + &mut self, + inst: &Instruction, + template: Template, + instp: InstructionPredicateNode, + ) { + self.enc32_func(inst.bind(I32), template.nonrex(), |builder| { + builder.inst_predicate(instp.clone()) + }); + + // REX-less encoding must come after REX encoding so we don't use it by default. Otherwise + // reg-alloc would never use r8 and up. + self.enc64_func(inst.bind(I32), template.rex(), |builder| { + builder.inst_predicate(instp.clone()) + }); + self.enc64_func(inst.bind(I32), template.nonrex(), |builder| { + builder.inst_predicate(instp.clone()) + }); + self.enc64_func(inst.bind(I64), template.rex().w(), |builder| { + builder.inst_predicate(instp) + }); + } + + /// Add encodings for `inst` to X86_64 with and without a REX prefix. + fn enc_x86_64(&mut self, inst: impl Into + Clone, template: Template) { + // See above comment about the ordering of rex vs non-rex encodings. + self.enc64(inst.clone(), template.rex()); + self.enc64(inst, template); + } + + /// Add encodings for `inst` to X86_64 with and without a REX prefix. + fn enc_x86_64_instp( + &mut self, + inst: impl Clone + Into, + template: Template, + instp: InstructionPredicateNode, + ) { + // See above comment about the ordering of rex vs non-rex encodings. + self.enc64_func(inst.clone(), template.rex(), |builder| { + builder.inst_predicate(instp.clone()) + }); + self.enc64_func(inst, template, |builder| builder.inst_predicate(instp)); + } + fn enc_x86_64_isap( + &mut self, + inst: impl Clone + Into, + template: Template, + isap: SettingPredicateNumber, + ) { + // See above comment about the ordering of rex vs non-rex encodings. + self.enc64_isap(inst.clone(), template.rex(), isap); + self.enc64_isap(inst, template, isap); + } + + /// Add all three encodings for `inst`: + /// - X86_32 + /// - X86_64 with and without the REX prefix. + fn enc_both(&mut self, inst: impl Clone + Into, template: Template) { + self.enc32(inst.clone(), template.clone()); + self.enc_x86_64(inst, template); + } + fn enc_both_isap( + &mut self, + inst: BoundInstruction, + template: Template, + isap: SettingPredicateNumber, + ) { + self.enc32_isap(inst.clone(), template.clone(), isap); + self.enc_x86_64_isap(inst, template, isap); + } + fn enc_both_instp( + &mut self, + inst: BoundInstruction, + template: Template, + instp: InstructionPredicateNode, + ) { + self.enc32_instp(inst.clone(), template.clone(), instp.clone()); + self.enc_x86_64_instp(inst, template, instp); + } + + /// Add encodings for `inst.i32` to X86_32. + /// Add encodings for `inst.i32` to X86_64 with and without REX. + /// Add encodings for `inst.i64` to X86_64 with a REX prefix, using the `w_bit` + /// argument to determine whether or not to set the REX.W bit. + fn enc_i32_i64_ld_st(&mut self, inst: &Instruction, w_bit: bool, template: Template) { + self.enc32(inst.clone().bind(I32).bind_any(), template.clone()); + + // REX-less encoding must come after REX encoding so we don't use it by + // default. Otherwise reg-alloc would never use r8 and up. + self.enc64(inst.clone().bind(I32).bind_any(), template.clone().rex()); + self.enc64(inst.clone().bind(I32).bind_any(), template.clone()); + + if w_bit { + self.enc64(inst.clone().bind(I64).bind_any(), template.rex().w()); + } else { + self.enc64(inst.clone().bind(I64).bind_any(), template.clone().rex()); + self.enc64(inst.clone().bind(I64).bind_any(), template); + } + } +} + +// Definitions. + +pub fn define( + shared_defs: &SharedDefinitions, + settings: &SettingGroup, + x86: &InstructionGroup, + r: &RecipeGroup, +) -> PerCpuModeEncodings { + let shared = &shared_defs.instructions; + let formats = &shared_defs.format_registry; + + // Shorthands for instructions. + let adjust_sp_down = shared.by_name("adjust_sp_down"); + let adjust_sp_down_imm = shared.by_name("adjust_sp_down_imm"); + let adjust_sp_up_imm = shared.by_name("adjust_sp_up_imm"); + let band = shared.by_name("band"); + let band_imm = shared.by_name("band_imm"); + let band_not = shared.by_name("band_not"); + let bconst = shared.by_name("bconst"); + let bint = shared.by_name("bint"); + let bitcast = shared.by_name("bitcast"); + let bnot = shared.by_name("bnot"); + let bor = shared.by_name("bor"); + let bor_imm = shared.by_name("bor_imm"); + let brff = shared.by_name("brff"); + let brif = shared.by_name("brif"); + let brnz = shared.by_name("brnz"); + let brz = shared.by_name("brz"); + let bxor = shared.by_name("bxor"); + let bxor_imm = shared.by_name("bxor_imm"); + let call = shared.by_name("call"); + let call_indirect = shared.by_name("call_indirect"); + let ceil = shared.by_name("ceil"); + let clz = shared.by_name("clz"); + let copy = shared.by_name("copy"); + let copy_nop = shared.by_name("copy_nop"); + let copy_special = shared.by_name("copy_special"); + let ctz = shared.by_name("ctz"); + let debugtrap = shared.by_name("debugtrap"); + let f32const = shared.by_name("f32const"); + let f64const = shared.by_name("f64const"); + let fadd = shared.by_name("fadd"); + let fcmp = shared.by_name("fcmp"); + let fcvt_from_sint = shared.by_name("fcvt_from_sint"); + let fdemote = shared.by_name("fdemote"); + let fdiv = shared.by_name("fdiv"); + let ffcmp = shared.by_name("ffcmp"); + let fill = shared.by_name("fill"); + let floor = shared.by_name("floor"); + let fmul = shared.by_name("fmul"); + let fpromote = shared.by_name("fpromote"); + let fsub = shared.by_name("fsub"); + let func_addr = shared.by_name("func_addr"); + let iadd = shared.by_name("iadd"); + let iadd_imm = shared.by_name("iadd_imm"); + let icmp = shared.by_name("icmp"); + let icmp_imm = shared.by_name("icmp_imm"); + let iconst = shared.by_name("iconst"); + let ifcmp = shared.by_name("ifcmp"); + let ifcmp_imm = shared.by_name("ifcmp_imm"); + let ifcmp_sp = shared.by_name("ifcmp_sp"); + let imul = shared.by_name("imul"); + let indirect_jump_table_br = shared.by_name("indirect_jump_table_br"); + let ireduce = shared.by_name("ireduce"); + let ishl = shared.by_name("ishl"); + let ishl_imm = shared.by_name("ishl_imm"); + let istore16 = shared.by_name("istore16"); + let istore16_complex = shared.by_name("istore16_complex"); + let istore32 = shared.by_name("istore32"); + let istore32_complex = shared.by_name("istore32_complex"); + let istore8 = shared.by_name("istore8"); + let istore8_complex = shared.by_name("istore8_complex"); + let isub = shared.by_name("isub"); + let jump = shared.by_name("jump"); + let jump_table_base = shared.by_name("jump_table_base"); + let jump_table_entry = shared.by_name("jump_table_entry"); + let load = shared.by_name("load"); + let load_complex = shared.by_name("load_complex"); + let nearest = shared.by_name("nearest"); + let popcnt = shared.by_name("popcnt"); + let regfill = shared.by_name("regfill"); + let regmove = shared.by_name("regmove"); + let regspill = shared.by_name("regspill"); + let return_ = shared.by_name("return"); + let rotl = shared.by_name("rotl"); + let rotl_imm = shared.by_name("rotl_imm"); + let rotr = shared.by_name("rotr"); + let rotr_imm = shared.by_name("rotr_imm"); + let selectif = shared.by_name("selectif"); + let sextend = shared.by_name("sextend"); + let sload16 = shared.by_name("sload16"); + let sload16_complex = shared.by_name("sload16_complex"); + let sload32 = shared.by_name("sload32"); + let sload32_complex = shared.by_name("sload32_complex"); + let sload8 = shared.by_name("sload8"); + let sload8_complex = shared.by_name("sload8_complex"); + let spill = shared.by_name("spill"); + let sqrt = shared.by_name("sqrt"); + let sshr = shared.by_name("sshr"); + let sshr_imm = shared.by_name("sshr_imm"); + let stack_addr = shared.by_name("stack_addr"); + let store = shared.by_name("store"); + let store_complex = shared.by_name("store_complex"); + let symbol_value = shared.by_name("symbol_value"); + let trap = shared.by_name("trap"); + let trapff = shared.by_name("trapff"); + let trapif = shared.by_name("trapif"); + let trueff = shared.by_name("trueff"); + let trueif = shared.by_name("trueif"); + let trunc = shared.by_name("trunc"); + let uextend = shared.by_name("uextend"); + let uload16 = shared.by_name("uload16"); + let uload16_complex = shared.by_name("uload16_complex"); + let uload32 = shared.by_name("uload32"); + let uload32_complex = shared.by_name("uload32_complex"); + let uload8 = shared.by_name("uload8"); + let uload8_complex = shared.by_name("uload8_complex"); + let ushr = shared.by_name("ushr"); + let ushr_imm = shared.by_name("ushr_imm"); + let x86_bsf = x86.by_name("x86_bsf"); + let x86_bsr = x86.by_name("x86_bsr"); + let x86_cvtt2si = x86.by_name("x86_cvtt2si"); + let x86_fmax = x86.by_name("x86_fmax"); + let x86_fmin = x86.by_name("x86_fmin"); + let x86_pop = x86.by_name("x86_pop"); + let x86_push = x86.by_name("x86_push"); + let x86_sdivmodx = x86.by_name("x86_sdivmodx"); + let x86_smulx = x86.by_name("x86_smulx"); + let x86_udivmodx = x86.by_name("x86_udivmodx"); + let x86_umulx = x86.by_name("x86_umulx"); + + // Shorthands for recipes. + let rec_adjustsp = r.template("adjustsp"); + let rec_adjustsp_ib = r.template("adjustsp_ib"); + let rec_adjustsp_id = r.template("adjustsp_id"); + let rec_allones_fnaddr4 = r.template("allones_fnaddr4"); + let rec_allones_fnaddr8 = r.template("allones_fnaddr8"); + let rec_brfb = r.template("brfb"); + let rec_brfd = r.template("brfd"); + let rec_brib = r.template("brib"); + let rec_brid = r.template("brid"); + let rec_bsf_and_bsr = r.template("bsf_and_bsr"); + let rec_call_id = r.template("call_id"); + let rec_call_plt_id = r.template("call_plt_id"); + let rec_call_r = r.template("call_r"); + let rec_cmov = r.template("cmov"); + let rec_copysp = r.template("copysp"); + let rec_div = r.template("div"); + let rec_debugtrap = r.recipe("debugtrap"); + let rec_f32imm_z = r.template("f32imm_z"); + let rec_f64imm_z = r.template("f64imm_z"); + let rec_fa = r.template("fa"); + let rec_fax = r.template("fax"); + let rec_fcmp = r.template("fcmp"); + let rec_fcscc = r.template("fcscc"); + let rec_ffillSib32 = r.template("ffillSib32"); + let rec_fillSib32 = r.template("fillSib32"); + let rec_fld = r.template("fld"); + let rec_fldDisp32 = r.template("fldDisp32"); + let rec_fldDisp8 = r.template("fldDisp8"); + let rec_fldWithIndex = r.template("fldWithIndex"); + let rec_fldWithIndexDisp32 = r.template("fldWithIndexDisp32"); + let rec_fldWithIndexDisp8 = r.template("fldWithIndexDisp8"); + let rec_fnaddr4 = r.template("fnaddr4"); + let rec_fnaddr8 = r.template("fnaddr8"); + let rec_fregfill32 = r.template("fregfill32"); + let rec_fregspill32 = r.template("fregspill32"); + let rec_frmov = r.template("frmov"); + let rec_frurm = r.template("frurm"); + let rec_fspillSib32 = r.template("fspillSib32"); + let rec_fst = r.template("fst"); + let rec_fstDisp32 = r.template("fstDisp32"); + let rec_fstDisp8 = r.template("fstDisp8"); + let rec_fstWithIndex = r.template("fstWithIndex"); + let rec_fstWithIndexDisp32 = r.template("fstWithIndexDisp32"); + let rec_fstWithIndexDisp8 = r.template("fstWithIndexDisp8"); + let rec_furm = r.template("furm"); + let rec_furmi_rnd = r.template("furmi_rnd"); + let rec_got_fnaddr8 = r.template("got_fnaddr8"); + let rec_got_gvaddr8 = r.template("got_gvaddr8"); + let rec_gvaddr4 = r.template("gvaddr4"); + let rec_gvaddr8 = r.template("gvaddr8"); + let rec_icscc = r.template("icscc"); + let rec_icscc_ib = r.template("icscc_ib"); + let rec_icscc_id = r.template("icscc_id"); + let rec_indirect_jmp = r.template("indirect_jmp"); + let rec_jmpb = r.template("jmpb"); + let rec_jmpd = r.template("jmpd"); + let rec_jt_base = r.template("jt_base"); + let rec_jt_entry = r.template("jt_entry"); + let rec_ld = r.template("ld"); + let rec_ldDisp32 = r.template("ldDisp32"); + let rec_ldDisp8 = r.template("ldDisp8"); + let rec_ldWithIndex = r.template("ldWithIndex"); + let rec_ldWithIndexDisp32 = r.template("ldWithIndexDisp32"); + let rec_ldWithIndexDisp8 = r.template("ldWithIndexDisp8"); + let rec_mulx = r.template("mulx"); + let rec_null = r.recipe("null"); + let rec_pcrel_fnaddr8 = r.template("pcrel_fnaddr8"); + let rec_pcrel_gvaddr8 = r.template("pcrel_gvaddr8"); + let rec_popq = r.template("popq"); + let rec_pu_id = r.template("pu_id"); + let rec_pu_id_bool = r.template("pu_id_bool"); + let rec_pu_iq = r.template("pu_iq"); + let rec_pushq = r.template("pushq"); + let rec_ret = r.template("ret"); + let rec_r_ib = r.template("r_ib"); + let rec_r_id = r.template("r_id"); + let rec_rcmp = r.template("rcmp"); + let rec_rcmp_ib = r.template("rcmp_ib"); + let rec_rcmp_id = r.template("rcmp_id"); + let rec_rcmp_sp = r.template("rcmp_sp"); + let rec_regfill32 = r.template("regfill32"); + let rec_regspill32 = r.template("regspill32"); + let rec_rc = r.template("rc"); + let rec_rfumr = r.template("rfumr"); + let rec_rfurm = r.template("rfurm"); + let rec_rmov = r.template("rmov"); + let rec_rr = r.template("rr"); + let rec_rrx = r.template("rrx"); + let rec_setf_abcd = r.template("setf_abcd"); + let rec_seti_abcd = r.template("seti_abcd"); + let rec_spaddr4_id = r.template("spaddr4_id"); + let rec_spaddr8_id = r.template("spaddr8_id"); + let rec_spillSib32 = r.template("spillSib32"); + let rec_st = r.template("st"); + let rec_stacknull = r.recipe("stacknull"); + let rec_stDisp32 = r.template("stDisp32"); + let rec_stDisp32_abcd = r.template("stDisp32_abcd"); + let rec_stDisp8 = r.template("stDisp8"); + let rec_stDisp8_abcd = r.template("stDisp8_abcd"); + let rec_stWithIndex = r.template("stWithIndex"); + let rec_stWithIndexDisp32 = r.template("stWithIndexDisp32"); + let rec_stWithIndexDisp32_abcd = r.template("stWithIndexDisp32_abcd"); + let rec_stWithIndexDisp8 = r.template("stWithIndexDisp8"); + let rec_stWithIndexDisp8_abcd = r.template("stWithIndexDisp8_abcd"); + let rec_stWithIndex_abcd = r.template("stWithIndex_abcd"); + let rec_st_abcd = r.template("st_abcd"); + let rec_t8jccb_abcd = r.template("t8jccb_abcd"); + let rec_t8jccd_abcd = r.template("t8jccd_abcd"); + let rec_t8jccd_long = r.template("t8jccd_long"); + let rec_tjccb = r.template("tjccb"); + let rec_tjccd = r.template("tjccd"); + let rec_trap = r.template("trap"); + let rec_trapif = r.recipe("trapif"); + let rec_trapff = r.recipe("trapff"); + let rec_u_id = r.template("u_id"); + let rec_umr = r.template("umr"); + let rec_ur = r.template("ur"); + let rec_urm = r.template("urm"); + let rec_urm_noflags = r.template("urm_noflags"); + let rec_urm_noflags_abcd = r.template("urm_noflags_abcd"); + + // Predicates shorthands. + let all_ones_funcaddrs_and_not_is_pic = + settings.predicate_by_name("all_ones_funcaddrs_and_not_is_pic"); + let is_pic = settings.predicate_by_name("is_pic"); + let not_all_ones_funcaddrs_and_not_is_pic = + settings.predicate_by_name("not_all_ones_funcaddrs_and_not_is_pic"); + let not_is_pic = settings.predicate_by_name("not_is_pic"); + let use_popcnt = settings.predicate_by_name("use_popcnt"); + let use_lzcnt = settings.predicate_by_name("use_lzcnt"); + let use_bmi1 = settings.predicate_by_name("use_bmi1"); + let use_sse41 = settings.predicate_by_name("use_sse41"); + + // Definitions. + let mut e = PerCpuModeEncodings::new(); + + e.enc_i32_i64(iadd, rec_rr.opcodes(vec![0x01])); + e.enc_i32_i64(isub, rec_rr.opcodes(vec![0x29])); + e.enc_i32_i64(band, rec_rr.opcodes(vec![0x21])); + e.enc_i32_i64(bor, rec_rr.opcodes(vec![0x09])); + e.enc_i32_i64(bxor, rec_rr.opcodes(vec![0x31])); + + // x86 has a bitwise not instruction NOT. + e.enc_i32_i64(bnot, rec_ur.opcodes(vec![0xf7]).rrr(2)); + + // Also add a `b1` encodings for the logic instructions. + // TODO: Should this be done with 8-bit instructions? It would improve partial register + // dependencies. + e.enc_both(band.bind(B1), rec_rr.opcodes(vec![0x21])); + e.enc_both(bor.bind(B1), rec_rr.opcodes(vec![0x09])); + e.enc_both(bxor.bind(B1), rec_rr.opcodes(vec![0x31])); + + e.enc_i32_i64(imul, rec_rrx.opcodes(vec![0x0f, 0xaf])); + e.enc_i32_i64(x86_sdivmodx, rec_div.opcodes(vec![0xf7]).rrr(7)); + e.enc_i32_i64(x86_udivmodx, rec_div.opcodes(vec![0xf7]).rrr(6)); + + e.enc_i32_i64(x86_smulx, rec_mulx.opcodes(vec![0xf7]).rrr(5)); + e.enc_i32_i64(x86_umulx, rec_mulx.opcodes(vec![0xf7]).rrr(4)); + + e.enc_i32_i64(copy, rec_umr.opcodes(vec![0x89])); + e.enc_both(copy.bind(B1), rec_umr.opcodes(vec![0x89])); + e.enc_both(copy.bind(I8), rec_umr.opcodes(vec![0x89])); + e.enc_both(copy.bind(I16), rec_umr.opcodes(vec![0x89])); + + // TODO For x86-64, only define REX forms for now, since we can't describe the + // special regunit immediate operands with the current constraint language. + for &ty in &[I8, I16, I32] { + e.enc32(regmove.bind(ty), rec_rmov.opcodes(vec![0x89])); + e.enc64(regmove.bind(ty), rec_rmov.opcodes(vec![0x89]).rex()); + } + e.enc64(regmove.bind(I64), rec_rmov.opcodes(vec![0x89]).rex().w()); + e.enc_both(regmove.bind(B1), rec_rmov.opcodes(vec![0x89])); + e.enc_both(regmove.bind(I8), rec_rmov.opcodes(vec![0x89])); + + e.enc_i32_i64(iadd_imm, rec_r_ib.opcodes(vec![0x83]).rrr(0)); + e.enc_i32_i64(iadd_imm, rec_r_id.opcodes(vec![0x81]).rrr(0)); + + e.enc_i32_i64(band_imm, rec_r_ib.opcodes(vec![0x83]).rrr(4)); + e.enc_i32_i64(band_imm, rec_r_id.opcodes(vec![0x81]).rrr(4)); + + e.enc_i32_i64(bor_imm, rec_r_ib.opcodes(vec![0x83]).rrr(1)); + e.enc_i32_i64(bor_imm, rec_r_id.opcodes(vec![0x81]).rrr(1)); + + e.enc_i32_i64(bxor_imm, rec_r_ib.opcodes(vec![0x83]).rrr(6)); + e.enc_i32_i64(bxor_imm, rec_r_id.opcodes(vec![0x81]).rrr(6)); + + // TODO: band_imm.i64 with an unsigned 32-bit immediate can be encoded as band_imm.i32. Can + // even use the single-byte immediate for 0xffff_ffXX masks. + + // Immediate constants. + e.enc32(iconst.bind(I32), rec_pu_id.opcodes(vec![0xb8])); + + e.enc64(iconst.bind(I32), rec_pu_id.rex().opcodes(vec![0xb8])); + e.enc64(iconst.bind(I32), rec_pu_id.opcodes(vec![0xb8])); + + // The 32-bit immediate movl also zero-extends to 64 bits. + let f_unary_imm = formats.get(formats.by_name("UnaryImm")); + let is_unsigned_int32 = InstructionPredicate::new_is_unsigned_int(f_unary_imm, "imm", 32, 0); + + e.enc64_func( + iconst.bind(I64), + rec_pu_id.opcodes(vec![0xb8]).rex(), + |encoding| encoding.inst_predicate(is_unsigned_int32.clone()), + ); + e.enc64_func( + iconst.bind(I64), + rec_pu_id.opcodes(vec![0xb8]), + |encoding| encoding.inst_predicate(is_unsigned_int32), + ); + + // Sign-extended 32-bit immediate. + e.enc64( + iconst.bind(I64), + rec_u_id.rex().opcodes(vec![0xc7]).rrr(0).w(), + ); + + // Finally, the 0xb8 opcode takes an 8-byte immediate with a REX.W prefix. + e.enc64(iconst.bind(I64), rec_pu_iq.opcodes(vec![0xb8]).rex().w()); + + // Bool constants. + e.enc_both(bconst.bind(B1), rec_pu_id_bool.opcodes(vec![0xb8])); + + // Shifts and rotates. + // Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit + // and 16-bit shifts would need explicit masking. + + for &(inst, rrr) in &[(rotl, 0), (rotr, 1), (ishl, 4), (ushr, 5), (sshr, 7)] { + // Cannot use enc_i32_i64 for this pattern because instructions require + // to bind any. + e.enc32( + inst.bind(I32).bind_any(), + rec_rc.opcodes(vec![0xd3]).rrr(rrr), + ); + e.enc64( + inst.bind(I64).bind_any(), + rec_rc.opcodes(vec![0xd3]).rrr(rrr).rex().w(), + ); + e.enc64( + inst.bind(I32).bind_any(), + rec_rc.opcodes(vec![0xd3]).rrr(rrr).rex(), + ); + e.enc64( + inst.bind(I32).bind_any(), + rec_rc.opcodes(vec![0xd3]).rrr(rrr), + ); + } + + for &(inst, rrr) in &[ + (rotl_imm, 0), + (rotr_imm, 1), + (ishl_imm, 4), + (ushr_imm, 5), + (sshr_imm, 7), + ] { + e.enc_i32_i64(inst, rec_r_ib.opcodes(vec![0xc1]).rrr(rrr)); + } + + // Population count. + e.enc32_isap( + popcnt.bind(I32), + rec_urm.opcodes(vec![0xf3, 0x0f, 0xb8]), + use_popcnt, + ); + e.enc64_isap( + popcnt.bind(I64), + rec_urm.opcodes(vec![0xf3, 0x0f, 0xb8]).rex().w(), + use_popcnt, + ); + e.enc64_isap( + popcnt.bind(I32), + rec_urm.opcodes(vec![0xf3, 0x0f, 0xb8]).rex(), + use_popcnt, + ); + e.enc64_isap( + popcnt.bind(I32), + rec_urm.opcodes(vec![0xf3, 0x0f, 0xb8]), + use_popcnt, + ); + + // Count leading zero bits. + e.enc32_isap( + clz.bind(I32), + rec_urm.opcodes(vec![0xf3, 0x0f, 0xbd]), + use_lzcnt, + ); + e.enc64_isap( + clz.bind(I64), + rec_urm.opcodes(vec![0xf3, 0x0f, 0xbd]).rex().w(), + use_lzcnt, + ); + e.enc64_isap( + clz.bind(I32), + rec_urm.opcodes(vec![0xf3, 0x0f, 0xbd]).rex(), + use_lzcnt, + ); + e.enc64_isap( + clz.bind(I32), + rec_urm.opcodes(vec![0xf3, 0x0f, 0xbd]), + use_lzcnt, + ); + + // Count trailing zero bits. + e.enc32_isap( + ctz.bind(I32), + rec_urm.opcodes(vec![0xf3, 0x0f, 0xbc]), + use_bmi1, + ); + e.enc64_isap( + ctz.bind(I64), + rec_urm.opcodes(vec![0xf3, 0x0f, 0xbc]).rex().w(), + use_bmi1, + ); + e.enc64_isap( + ctz.bind(I32), + rec_urm.opcodes(vec![0xf3, 0x0f, 0xbc]).rex(), + use_bmi1, + ); + e.enc64_isap( + ctz.bind(I32), + rec_urm.opcodes(vec![0xf3, 0x0f, 0xbc]), + use_bmi1, + ); + + // Loads and stores. + let f_load_complex = formats.get(formats.by_name("LoadComplex")); + let is_load_complex_length_two = InstructionPredicate::new_length_equals(f_load_complex, 2); + + for recipe in &[rec_ldWithIndex, rec_ldWithIndexDisp8, rec_ldWithIndexDisp32] { + e.enc_i32_i64_instp( + load_complex, + recipe.opcodes(vec![0x8b]), + is_load_complex_length_two.clone(), + ); + e.enc_x86_64_instp( + uload32_complex, + recipe.opcodes(vec![0x8b]), + is_load_complex_length_two.clone(), + ); + + e.enc64_instp( + sload32_complex, + recipe.opcodes(vec![0x63]).rex().w(), + is_load_complex_length_two.clone(), + ); + + e.enc_i32_i64_instp( + uload16_complex, + recipe.opcodes(vec![0x0f, 0xb7]), + is_load_complex_length_two.clone(), + ); + e.enc_i32_i64_instp( + sload16_complex, + recipe.opcodes(vec![0x0f, 0xbf]), + is_load_complex_length_two.clone(), + ); + + e.enc_i32_i64_instp( + uload8_complex, + recipe.opcodes(vec![0x0f, 0xb6]), + is_load_complex_length_two.clone(), + ); + + e.enc_i32_i64_instp( + sload8_complex, + recipe.opcodes(vec![0x0f, 0xbe]), + is_load_complex_length_two.clone(), + ); + } + + let f_store_complex = formats.get(formats.by_name("StoreComplex")); + let is_store_complex_length_three = InstructionPredicate::new_length_equals(f_store_complex, 3); + + for recipe in &[rec_stWithIndex, rec_stWithIndexDisp8, rec_stWithIndexDisp32] { + e.enc_i32_i64_instp( + store_complex, + recipe.opcodes(vec![0x89]), + is_store_complex_length_three.clone(), + ); + e.enc_x86_64_instp( + istore32_complex, + recipe.opcodes(vec![0x89]), + is_store_complex_length_three.clone(), + ); + e.enc_both_instp( + istore16_complex.bind(I32), + recipe.opcodes(vec![0x66, 0x89]), + is_store_complex_length_three.clone(), + ); + e.enc_x86_64_instp( + istore16_complex.bind(I64), + recipe.opcodes(vec![0x66, 0x89]), + is_store_complex_length_three.clone(), + ); + } + + for recipe in &[ + rec_stWithIndex_abcd, + rec_stWithIndexDisp8_abcd, + rec_stWithIndexDisp32_abcd, + ] { + e.enc_both_instp( + istore8_complex.bind(I32), + recipe.opcodes(vec![0x88]), + is_store_complex_length_three.clone(), + ); + e.enc_x86_64_instp( + istore8_complex.bind(I64), + recipe.opcodes(vec![0x88]), + is_store_complex_length_three.clone(), + ); + } + + for recipe in &[rec_st, rec_stDisp8, rec_stDisp32] { + e.enc_i32_i64_ld_st(store, true, recipe.opcodes(vec![0x89])); + e.enc_x86_64(istore32.bind(I64).bind_any(), recipe.opcodes(vec![0x89])); + e.enc_i32_i64_ld_st(istore16, false, recipe.opcodes(vec![0x66, 0x89])); + } + + // Byte stores are more complicated because the registers they can address + // depends of the presence of a REX prefix. The st*_abcd recipes fall back to + // the corresponding st* recipes when a REX prefix is applied. + + for recipe in &[rec_st_abcd, rec_stDisp8_abcd, rec_stDisp32_abcd] { + e.enc_both(istore8.bind(I32).bind_any(), recipe.opcodes(vec![0x88])); + e.enc_x86_64(istore8.bind(I64).bind_any(), recipe.opcodes(vec![0x88])); + } + + e.enc_i32_i64(spill, rec_spillSib32.opcodes(vec![0x89])); + e.enc_i32_i64(regspill, rec_regspill32.opcodes(vec![0x89])); + + // Use a 32-bit write for spilling `b1`, `i8` and `i16` to avoid + // constraining the permitted registers. + // See MIN_SPILL_SLOT_SIZE which makes this safe. + + e.enc_both(spill.bind(B1), rec_spillSib32.opcodes(vec![0x89])); + e.enc_both(regspill.bind(B1), rec_regspill32.opcodes(vec![0x89])); + for &ty in &[I8, I16] { + e.enc_both(spill.bind(ty), rec_spillSib32.opcodes(vec![0x89])); + e.enc_both(regspill.bind(ty), rec_regspill32.opcodes(vec![0x89])); + } + + for recipe in &[rec_ld, rec_ldDisp8, rec_ldDisp32] { + e.enc_i32_i64_ld_st(load, true, recipe.opcodes(vec![0x8b])); + e.enc_x86_64(uload32.bind(I64), recipe.opcodes(vec![0x8b])); + e.enc64(sload32.bind(I64), recipe.opcodes(vec![0x63]).rex().w()); + e.enc_i32_i64_ld_st(uload16, true, recipe.opcodes(vec![0x0f, 0xb7])); + e.enc_i32_i64_ld_st(sload16, true, recipe.opcodes(vec![0x0f, 0xbf])); + e.enc_i32_i64_ld_st(uload8, true, recipe.opcodes(vec![0x0f, 0xb6])); + e.enc_i32_i64_ld_st(sload8, true, recipe.opcodes(vec![0x0f, 0xbe])); + } + + e.enc_i32_i64(fill, rec_fillSib32.opcodes(vec![0x8b])); + e.enc_i32_i64(regfill, rec_regfill32.opcodes(vec![0x8b])); + + // Load 32 bits from `b1`, `i8` and `i16` spill slots. See `spill.b1` above. + + e.enc_both(fill.bind(B1), rec_fillSib32.opcodes(vec![0x8b])); + e.enc_both(regfill.bind(B1), rec_regfill32.opcodes(vec![0x8b])); + for &ty in &[I8, I16] { + e.enc_both(fill.bind(ty), rec_fillSib32.opcodes(vec![0x8b])); + e.enc_both(regfill.bind(ty), rec_regfill32.opcodes(vec![0x8b])); + } + + // Push and Pop. + e.enc32(x86_push.bind(I32), rec_pushq.opcodes(vec![0x50])); + e.enc_x86_64(x86_push.bind(I64), rec_pushq.opcodes(vec![0x50])); + + e.enc32(x86_pop.bind(I32), rec_popq.opcodes(vec![0x58])); + e.enc_x86_64(x86_pop.bind(I64), rec_popq.opcodes(vec![0x58])); + + // Copy Special + // For x86-64, only define REX forms for now, since we can't describe the + // special regunit immediate operands with the current constraint language. + e.enc64(copy_special, rec_copysp.opcodes(vec![0x89]).rex().w()); + e.enc32(copy_special, rec_copysp.opcodes(vec![0x89])); + + // Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn + // into a no-op. + // The same encoding is generated for both the 64- and 32-bit architectures. + for &ty in &[I64, I32, I16, I8] { + e.enc64_rec(copy_nop.bind(ty), rec_stacknull, 0); + e.enc32_rec(copy_nop.bind(ty), rec_stacknull, 0); + } + for &ty in &[F64, F32] { + e.enc64_rec(copy_nop.bind(ty), rec_stacknull, 0); + e.enc32_rec(copy_nop.bind(ty), rec_stacknull, 0); + } + + // Adjust SP down by a dynamic value (or up, with a negative operand). + e.enc32(adjust_sp_down.bind(I32), rec_adjustsp.opcodes(vec![0x29])); + e.enc64( + adjust_sp_down.bind(I64), + rec_adjustsp.opcodes(vec![0x29]).rex().w(), + ); + + // Adjust SP up by an immediate (or down, with a negative immediate). + e.enc32(adjust_sp_up_imm, rec_adjustsp_ib.opcodes(vec![0x83])); + e.enc32(adjust_sp_up_imm, rec_adjustsp_id.opcodes(vec![0x81])); + e.enc64( + adjust_sp_up_imm, + rec_adjustsp_ib.opcodes(vec![0x83]).rex().w(), + ); + e.enc64( + adjust_sp_up_imm, + rec_adjustsp_id.opcodes(vec![0x81]).rex().w(), + ); + + // Adjust SP down by an immediate (or up, with a negative immediate). + e.enc32( + adjust_sp_down_imm, + rec_adjustsp_ib.opcodes(vec![0x83]).rrr(5), + ); + e.enc32( + adjust_sp_down_imm, + rec_adjustsp_id.opcodes(vec![0x81]).rrr(5), + ); + e.enc64( + adjust_sp_down_imm, + rec_adjustsp_ib.opcodes(vec![0x83]).rrr(5).rex().w(), + ); + e.enc64( + adjust_sp_down_imm, + rec_adjustsp_id.opcodes(vec![0x81]).rrr(5).rex().w(), + ); + + // Float loads and stores. + e.enc_both( + load.bind(F32).bind_any(), + rec_fld.opcodes(vec![0xf3, 0x0f, 0x10]), + ); + e.enc_both( + load.bind(F32).bind_any(), + rec_fldDisp8.opcodes(vec![0xf3, 0x0f, 0x10]), + ); + e.enc_both( + load.bind(F32).bind_any(), + rec_fldDisp32.opcodes(vec![0xf3, 0x0f, 0x10]), + ); + + e.enc_both( + load_complex.bind(F32), + rec_fldWithIndex.opcodes(vec![0xf3, 0x0f, 0x10]), + ); + e.enc_both( + load_complex.bind(F32), + rec_fldWithIndexDisp8.opcodes(vec![0xf3, 0x0f, 0x10]), + ); + e.enc_both( + load_complex.bind(F32), + rec_fldWithIndexDisp32.opcodes(vec![0xf3, 0x0f, 0x10]), + ); + + e.enc_both( + load.bind(F64).bind_any(), + rec_fld.opcodes(vec![0xf2, 0x0f, 0x10]), + ); + e.enc_both( + load.bind(F64).bind_any(), + rec_fldDisp8.opcodes(vec![0xf2, 0x0f, 0x10]), + ); + e.enc_both( + load.bind(F64).bind_any(), + rec_fldDisp32.opcodes(vec![0xf2, 0x0f, 0x10]), + ); + + e.enc_both( + load_complex.bind(F64), + rec_fldWithIndex.opcodes(vec![0xf2, 0x0f, 0x10]), + ); + e.enc_both( + load_complex.bind(F64), + rec_fldWithIndexDisp8.opcodes(vec![0xf2, 0x0f, 0x10]), + ); + e.enc_both( + load_complex.bind(F64), + rec_fldWithIndexDisp32.opcodes(vec![0xf2, 0x0f, 0x10]), + ); + + e.enc_both( + store.bind(F32).bind_any(), + rec_fst.opcodes(vec![0xf3, 0x0f, 0x11]), + ); + e.enc_both( + store.bind(F32).bind_any(), + rec_fstDisp8.opcodes(vec![0xf3, 0x0f, 0x11]), + ); + e.enc_both( + store.bind(F32).bind_any(), + rec_fstDisp32.opcodes(vec![0xf3, 0x0f, 0x11]), + ); + + e.enc_both( + store_complex.bind(F32), + rec_fstWithIndex.opcodes(vec![0xf3, 0x0f, 0x11]), + ); + e.enc_both( + store_complex.bind(F32), + rec_fstWithIndexDisp8.opcodes(vec![0xf3, 0x0f, 0x11]), + ); + e.enc_both( + store_complex.bind(F32), + rec_fstWithIndexDisp32.opcodes(vec![0xf3, 0x0f, 0x11]), + ); + + e.enc_both( + store.bind(F64).bind_any(), + rec_fst.opcodes(vec![0xf2, 0x0f, 0x11]), + ); + e.enc_both( + store.bind(F64).bind_any(), + rec_fstDisp8.opcodes(vec![0xf2, 0x0f, 0x11]), + ); + e.enc_both( + store.bind(F64).bind_any(), + rec_fstDisp32.opcodes(vec![0xf2, 0x0f, 0x11]), + ); + + e.enc_both( + store_complex.bind(F64), + rec_fstWithIndex.opcodes(vec![0xf2, 0x0f, 0x11]), + ); + e.enc_both( + store_complex.bind(F64), + rec_fstWithIndexDisp8.opcodes(vec![0xf2, 0x0f, 0x11]), + ); + e.enc_both( + store_complex.bind(F64), + rec_fstWithIndexDisp32.opcodes(vec![0xf2, 0x0f, 0x11]), + ); + + e.enc_both( + fill.bind(F32), + rec_ffillSib32.opcodes(vec![0xf3, 0x0f, 0x10]), + ); + e.enc_both( + regfill.bind(F32), + rec_fregfill32.opcodes(vec![0xf3, 0x0f, 0x10]), + ); + e.enc_both( + fill.bind(F64), + rec_ffillSib32.opcodes(vec![0xf2, 0x0f, 0x10]), + ); + e.enc_both( + regfill.bind(F64), + rec_fregfill32.opcodes(vec![0xf2, 0x0f, 0x10]), + ); + + e.enc_both( + spill.bind(F32), + rec_fspillSib32.opcodes(vec![0xf3, 0x0f, 0x11]), + ); + e.enc_both( + regspill.bind(F32), + rec_fregspill32.opcodes(vec![0xf3, 0x0f, 0x11]), + ); + e.enc_both( + spill.bind(F64), + rec_fspillSib32.opcodes(vec![0xf2, 0x0f, 0x11]), + ); + e.enc_both( + regspill.bind(F64), + rec_fregspill32.opcodes(vec![0xf2, 0x0f, 0x11]), + ); + + // Function addresses. + + // Non-PIC, all-ones funcaddresses. + e.enc32_isap( + func_addr.bind(I32), + rec_fnaddr4.opcodes(vec![0xb8]), + not_all_ones_funcaddrs_and_not_is_pic, + ); + e.enc64_isap( + func_addr.bind(I64), + rec_fnaddr8.opcodes(vec![0xb8]).rex().w(), + not_all_ones_funcaddrs_and_not_is_pic, + ); + + // Non-PIC, all-zeros funcaddresses. + e.enc32_isap( + func_addr.bind(I32), + rec_allones_fnaddr4.opcodes(vec![0xb8]), + all_ones_funcaddrs_and_not_is_pic, + ); + e.enc64_isap( + func_addr.bind(I64), + rec_allones_fnaddr8.opcodes(vec![0xb8]).rex().w(), + all_ones_funcaddrs_and_not_is_pic, + ); + + // 64-bit, colocated, both PIC and non-PIC. Use the lea instruction's pc-relative field. + let f_func_addr = formats.get(formats.by_name("FuncAddr")); + let is_colocated_func = InstructionPredicate::new_is_colocated_func(f_func_addr, "func_ref"); + e.enc64_instp( + func_addr.bind(I64), + rec_pcrel_fnaddr8.opcodes(vec![0x8d]).rex().w(), + is_colocated_func, + ); + + // 64-bit, non-colocated, PIC. + e.enc64_isap( + func_addr.bind(I64), + rec_got_fnaddr8.opcodes(vec![0x8b]).rex().w(), + is_pic, + ); + + // Global addresses. + + // Non-PIC. + e.enc32_isap( + symbol_value.bind(I32), + rec_gvaddr4.opcodes(vec![0xb8]), + not_is_pic, + ); + e.enc64_isap( + symbol_value.bind(I64), + rec_gvaddr8.opcodes(vec![0xb8]).rex().w(), + not_is_pic, + ); + + // PIC, colocated. + e.enc64_func( + symbol_value.bind(I64), + rec_pcrel_gvaddr8.opcodes(vec![0x8d]).rex().w(), + |encoding| { + encoding + .isa_predicate(is_pic) + .inst_predicate(InstructionPredicate::new_is_colocated_data(formats)) + }, + ); + + // PIC, non-colocated. + e.enc64_isap( + symbol_value.bind(I64), + rec_got_gvaddr8.opcodes(vec![0x8b]).rex().w(), + is_pic, + ); + + // Stack addresses. + // + // TODO: Add encoding rules for stack_load and stack_store, so that they + // don't get legalized to stack_addr + load/store. + e.enc32(stack_addr.bind(I32), rec_spaddr4_id.opcodes(vec![0x8d])); + e.enc64( + stack_addr.bind(I64), + rec_spaddr8_id.opcodes(vec![0x8d]).rex().w(), + ); + + // Call/return + + // 32-bit, both PIC and non-PIC. + e.enc32(call, rec_call_id.opcodes(vec![0xe8])); + + // 64-bit, colocated, both PIC and non-PIC. Use the call instruction's pc-relative field. + let f_call = formats.get(formats.by_name("Call")); + let is_colocated_func = InstructionPredicate::new_is_colocated_func(f_call, "func_ref"); + e.enc64_instp(call, rec_call_id.opcodes(vec![0xe8]), is_colocated_func); + + // 64-bit, non-colocated, PIC. There is no 64-bit non-colocated non-PIC version, since non-PIC + // is currently using the large model, which requires calls be lowered to + // func_addr+call_indirect. + e.enc64_isap(call, rec_call_plt_id.opcodes(vec![0xe8]), is_pic); + + e.enc32( + call_indirect.bind(I32), + rec_call_r.opcodes(vec![0xff]).rrr(2), + ); + e.enc64( + call_indirect.bind(I64), + rec_call_r.opcodes(vec![0xff]).rrr(2).rex(), + ); + e.enc64( + call_indirect.bind(I64), + rec_call_r.opcodes(vec![0xff]).rrr(2), + ); + + e.enc32(return_, rec_ret.opcodes(vec![0xc3])); + e.enc64(return_, rec_ret.opcodes(vec![0xc3])); + + // Branches. + e.enc32(jump, rec_jmpb.opcodes(vec![0xeb])); + e.enc64(jump, rec_jmpb.opcodes(vec![0xeb])); + e.enc32(jump, rec_jmpd.opcodes(vec![0xe9])); + e.enc64(jump, rec_jmpd.opcodes(vec![0xe9])); + + e.enc_both(brif, rec_brib.opcodes(vec![0x70])); + e.enc_both(brif, rec_brid.opcodes(vec![0x0f, 0x80])); + + // Not all float condition codes are legal, see `supported_floatccs`. + e.enc_both(brff, rec_brfb.opcodes(vec![0x70])); + e.enc_both(brff, rec_brfd.opcodes(vec![0x0f, 0x80])); + + // Note that the tjccd opcode will be prefixed with 0x0f. + e.enc_i32_i64(brz, rec_tjccb.opcodes(vec![0x74])); + e.enc_i32_i64(brz, rec_tjccd.opcodes(vec![0x84])); + e.enc_i32_i64(brnz, rec_tjccb.opcodes(vec![0x75])); + e.enc_i32_i64(brnz, rec_tjccd.opcodes(vec![0x85])); + + // Branch on a b1 value in a register only looks at the low 8 bits. See also + // bint encodings below. + // + // Start with the worst-case encoding for X86_32 only. The register allocator + // can't handle a branch with an ABCD-constrained operand. + e.enc32(brz.bind(B1), rec_t8jccd_long.opcodes(vec![0x84])); + e.enc32(brnz.bind(B1), rec_t8jccd_long.opcodes(vec![0x85])); + + e.enc_both(brz.bind(B1), rec_t8jccb_abcd.opcodes(vec![0x74])); + e.enc_both(brz.bind(B1), rec_t8jccd_abcd.opcodes(vec![0x84])); + e.enc_both(brnz.bind(B1), rec_t8jccb_abcd.opcodes(vec![0x75])); + e.enc_both(brnz.bind(B1), rec_t8jccd_abcd.opcodes(vec![0x85])); + + // Jump tables. + e.enc64( + jump_table_entry.bind(I64).bind_any().bind_any(), + rec_jt_entry.opcodes(vec![0x63]).rex().w(), + ); + e.enc32( + jump_table_entry.bind(I32).bind_any().bind_any(), + rec_jt_entry.opcodes(vec![0x8b]), + ); + + e.enc64( + jump_table_base.bind(I64), + rec_jt_base.opcodes(vec![0x8d]).rex().w(), + ); + e.enc32(jump_table_base.bind(I32), rec_jt_base.opcodes(vec![0x8d])); + + e.enc_x86_64( + indirect_jump_table_br.bind(I64), + rec_indirect_jmp.opcodes(vec![0xff]).rrr(4), + ); + e.enc32( + indirect_jump_table_br.bind(I32), + rec_indirect_jmp.opcodes(vec![0xff]).rrr(4), + ); + + // Trap as ud2 + e.enc32(trap, rec_trap.opcodes(vec![0x0f, 0x0b])); + e.enc64(trap, rec_trap.opcodes(vec![0x0f, 0x0b])); + + // Debug trap as int3 + e.enc32_rec(debugtrap, rec_debugtrap, 0); + e.enc64_rec(debugtrap, rec_debugtrap, 0); + + e.enc32_rec(trapif, rec_trapif, 0); + e.enc64_rec(trapif, rec_trapif, 0); + e.enc32_rec(trapff, rec_trapff, 0); + e.enc64_rec(trapff, rec_trapff, 0); + + // Comparisons + e.enc_i32_i64(icmp, rec_icscc.opcodes(vec![0x39])); + e.enc_i32_i64(icmp_imm, rec_icscc_ib.opcodes(vec![0x83]).rrr(7)); + e.enc_i32_i64(icmp_imm, rec_icscc_id.opcodes(vec![0x81]).rrr(7)); + e.enc_i32_i64(ifcmp, rec_rcmp.opcodes(vec![0x39])); + e.enc_i32_i64(ifcmp_imm, rec_rcmp_ib.opcodes(vec![0x83]).rrr(7)); + e.enc_i32_i64(ifcmp_imm, rec_rcmp_id.opcodes(vec![0x81]).rrr(7)); + // TODO: We could special-case ifcmp_imm(x, 0) to TEST(x, x). + + e.enc32(ifcmp_sp.bind(I32), rec_rcmp_sp.opcodes(vec![0x39])); + e.enc64( + ifcmp_sp.bind(I64), + rec_rcmp_sp.opcodes(vec![0x39]).rex().w(), + ); + + // Convert flags to bool. + // This encodes `b1` as an 8-bit low register with the value 0 or 1. + e.enc_both(trueif, rec_seti_abcd.opcodes(vec![0x0f, 0x90])); + e.enc_both(trueff, rec_setf_abcd.opcodes(vec![0x0f, 0x90])); + + // Conditional move (a.k.a integer select). + e.enc_i32_i64(selectif, rec_cmov.opcodes(vec![0x0f, 0x40])); + + // Bit scan forwards and reverse + e.enc_i32_i64(x86_bsf, rec_bsf_and_bsr.opcodes(vec![0x0f, 0xbc])); + e.enc_i32_i64(x86_bsr, rec_bsf_and_bsr.opcodes(vec![0x0f, 0xbd])); + + // Convert bool to int. + // + // This assumes that b1 is represented as an 8-bit low register with the value 0 + // or 1. + // + // Encode movzbq as movzbl, because it's equivalent and shorter. + e.enc32( + bint.bind(I32).bind(B1), + rec_urm_noflags_abcd.opcodes(vec![0x0f, 0xb6]), + ); + + e.enc64( + bint.bind(I64).bind(B1), + rec_urm_noflags.opcodes(vec![0x0f, 0xb6]).rex(), + ); + e.enc64( + bint.bind(I64).bind(B1), + rec_urm_noflags_abcd.opcodes(vec![0x0f, 0xb6]), + ); + e.enc64( + bint.bind(I32).bind(B1), + rec_urm_noflags.opcodes(vec![0x0f, 0xb6]).rex(), + ); + e.enc64( + bint.bind(I32).bind(B1), + rec_urm_noflags_abcd.opcodes(vec![0x0f, 0xb6]), + ); + + // Numerical conversions. + + // Reducing an integer is a no-op. + e.enc32_rec(ireduce.bind(I8).bind(I16), rec_null, 0); + e.enc32_rec(ireduce.bind(I8).bind(I32), rec_null, 0); + e.enc32_rec(ireduce.bind(I16).bind(I32), rec_null, 0); + + e.enc64_rec(ireduce.bind(I8).bind(I16), rec_null, 0); + e.enc64_rec(ireduce.bind(I8).bind(I32), rec_null, 0); + e.enc64_rec(ireduce.bind(I16).bind(I32), rec_null, 0); + e.enc64_rec(ireduce.bind(I8).bind(I64), rec_null, 0); + e.enc64_rec(ireduce.bind(I16).bind(I64), rec_null, 0); + e.enc64_rec(ireduce.bind(I32).bind(I64), rec_null, 0); + + // TODO: Add encodings for cbw, cwde, cdqe, which are sign-extending + // instructions for %al/%ax/%eax to %ax/%eax/%rax. + + // movsbl + e.enc32( + sextend.bind(I32).bind(I8), + rec_urm_noflags_abcd.opcodes(vec![0x0f, 0xbe]), + ); + e.enc64( + sextend.bind(I32).bind(I8), + rec_urm_noflags.opcodes(vec![0x0f, 0xbe]).rex(), + ); + e.enc64( + sextend.bind(I32).bind(I8), + rec_urm_noflags_abcd.opcodes(vec![0x0f, 0xbe]), + ); + + // movswl + e.enc32( + sextend.bind(I32).bind(I16), + rec_urm_noflags.opcodes(vec![0x0f, 0xbf]), + ); + e.enc64( + sextend.bind(I32).bind(I16), + rec_urm_noflags.opcodes(vec![0x0f, 0xbf]).rex(), + ); + e.enc64( + sextend.bind(I32).bind(I16), + rec_urm_noflags.opcodes(vec![0x0f, 0xbf]), + ); + + // movsbq + e.enc64( + sextend.bind(I64).bind(I8), + rec_urm_noflags.opcodes(vec![0x0f, 0xbe]).rex().w(), + ); + + // movswq + e.enc64( + sextend.bind(I64).bind(I16), + rec_urm_noflags.opcodes(vec![0x0f, 0xbf]).rex().w(), + ); + + // movslq + e.enc64( + sextend.bind(I64).bind(I32), + rec_urm_noflags.opcodes(vec![0x63]).rex().w(), + ); + + // movzbl + e.enc32( + uextend.bind(I32).bind(I8), + rec_urm_noflags_abcd.opcodes(vec![0x0f, 0xb6]), + ); + e.enc64( + uextend.bind(I32).bind(I8), + rec_urm_noflags.opcodes(vec![0x0f, 0xb6]).rex(), + ); + e.enc64( + uextend.bind(I32).bind(I8), + rec_urm_noflags_abcd.opcodes(vec![0x0f, 0xb6]), + ); + + // movzwl + e.enc32( + uextend.bind(I32).bind(I16), + rec_urm_noflags.opcodes(vec![0x0f, 0xb7]), + ); + e.enc64( + uextend.bind(I32).bind(I16), + rec_urm_noflags.opcodes(vec![0x0f, 0xb7]).rex(), + ); + e.enc64( + uextend.bind(I32).bind(I16), + rec_urm_noflags.opcodes(vec![0x0f, 0xb7]), + ); + + // movzbq, encoded as movzbl because it's equivalent and shorter. + e.enc64( + uextend.bind(I64).bind(I8), + rec_urm_noflags.opcodes(vec![0x0f, 0xb6]).rex(), + ); + e.enc64( + uextend.bind(I64).bind(I8), + rec_urm_noflags_abcd.opcodes(vec![0x0f, 0xb6]), + ); + + // movzwq, encoded as movzwl because it's equivalent and shorter + e.enc64( + uextend.bind(I64).bind(I16), + rec_urm_noflags.opcodes(vec![0x0f, 0xb7]).rex(), + ); + e.enc64( + uextend.bind(I64).bind(I16), + rec_urm_noflags.opcodes(vec![0x0f, 0xb7]), + ); + + // A 32-bit register copy clears the high 32 bits. + e.enc64( + uextend.bind(I64).bind(I32), + rec_umr.opcodes(vec![0x89]).rex(), + ); + e.enc64(uextend.bind(I64).bind(I32), rec_umr.opcodes(vec![0x89])); + + // Floating point + + // Floating-point constants equal to 0.0 can be encoded using either `xorps` or `xorpd`, for + // 32-bit and 64-bit floats respectively. + let f_unary_ieee32 = formats.get(formats.by_name("UnaryIeee32")); + let is_zero_32_bit_float = InstructionPredicate::new_is_zero_32bit_float(f_unary_ieee32, "imm"); + e.enc32_instp( + f32const, + rec_f32imm_z.opcodes(vec![0x0f, 0x57]), + is_zero_32_bit_float.clone(), + ); + + let f_unary_ieee64 = formats.get(formats.by_name("UnaryIeee64")); + let is_zero_64_bit_float = InstructionPredicate::new_is_zero_64bit_float(f_unary_ieee64, "imm"); + e.enc32_instp( + f64const, + rec_f64imm_z.opcodes(vec![0x66, 0x0f, 0x57]), + is_zero_64_bit_float.clone(), + ); + + e.enc_x86_64_instp( + f32const, + rec_f32imm_z.opcodes(vec![0x0f, 0x57]), + is_zero_32_bit_float, + ); + e.enc_x86_64_instp( + f64const, + rec_f64imm_z.opcodes(vec![0x66, 0x0f, 0x57]), + is_zero_64_bit_float, + ); + + // movd + e.enc_both( + bitcast.bind(F32).bind(I32), + rec_frurm.opcodes(vec![0x66, 0x0f, 0x6e]), + ); + e.enc_both( + bitcast.bind(I32).bind(F32), + rec_rfumr.opcodes(vec![0x66, 0x0f, 0x7e]), + ); + + // movq + e.enc64( + bitcast.bind(F64).bind(I64), + rec_frurm.opcodes(vec![0x66, 0x0f, 0x6e]).rex().w(), + ); + e.enc64( + bitcast.bind(I64).bind(F64), + rec_rfumr.opcodes(vec![0x66, 0x0f, 0x7e]).rex().w(), + ); + + // movaps + e.enc_both(copy.bind(F32), rec_furm.opcodes(vec![0x0f, 0x28])); + e.enc_both(copy.bind(F64), rec_furm.opcodes(vec![0x0f, 0x28])); + + // TODO For x86-64, only define REX forms for now, since we can't describe the special regunit + // immediate operands with the current constraint language. + e.enc32(regmove.bind(F32), rec_frmov.opcodes(vec![0x0f, 0x28])); + e.enc64(regmove.bind(F32), rec_frmov.opcodes(vec![0x0f, 0x28]).rex()); + + // TODO For x86-64, only define REX forms for now, since we can't describe the special regunit + // immediate operands with the current constraint language. + e.enc32(regmove.bind(F64), rec_frmov.opcodes(vec![0x0f, 0x28])); + e.enc64(regmove.bind(F64), rec_frmov.opcodes(vec![0x0f, 0x28]).rex()); + + // cvtsi2ss + e.enc_i32_i64( + fcvt_from_sint.bind(F32), + rec_frurm.opcodes(vec![0xf3, 0x0f, 0x2a]), + ); + + // cvtsi2sd + e.enc_i32_i64( + fcvt_from_sint.bind(F64), + rec_frurm.opcodes(vec![0xf2, 0x0f, 0x2a]), + ); + + // cvtss2sd + e.enc_both( + fpromote.bind(F64).bind(F32), + rec_furm.opcodes(vec![0xf3, 0x0f, 0x5a]), + ); + + // cvtsd2ss + e.enc_both( + fdemote.bind(F32).bind(F64), + rec_furm.opcodes(vec![0xf2, 0x0f, 0x5a]), + ); + + // cvttss2si + e.enc_both( + x86_cvtt2si.bind(I32).bind(F32), + rec_rfurm.opcodes(vec![0xf3, 0x0f, 0x2c]), + ); + e.enc64( + x86_cvtt2si.bind(I64).bind(F32), + rec_rfurm.opcodes(vec![0xf3, 0x0f, 0x2c]).rex().w(), + ); + + // cvttsd2si + e.enc_both( + x86_cvtt2si.bind(I32).bind(F64), + rec_rfurm.opcodes(vec![0xf2, 0x0f, 0x2c]), + ); + e.enc64( + x86_cvtt2si.bind(I64).bind(F64), + rec_rfurm.opcodes(vec![0xf2, 0x0f, 0x2c]).rex().w(), + ); + + // Exact square roots. + e.enc_both(sqrt.bind(F32), rec_furm.opcodes(vec![0xf3, 0x0f, 0x51])); + e.enc_both(sqrt.bind(F64), rec_furm.opcodes(vec![0xf2, 0x0f, 0x51])); + + // Rounding. The recipe looks at the opcode to pick an immediate. + for inst in &[nearest, floor, ceil, trunc] { + e.enc_both_isap( + inst.bind(F32), + rec_furmi_rnd.opcodes(vec![0x66, 0x0f, 0x3a, 0x0a]), + use_sse41, + ); + e.enc_both_isap( + inst.bind(F64), + rec_furmi_rnd.opcodes(vec![0x66, 0x0f, 0x3a, 0x0b]), + use_sse41, + ); + } + + // Binary arithmetic ops. + for &(inst, opc) in &[ + (fadd, 0x58), + (fsub, 0x5c), + (fmul, 0x59), + (fdiv, 0x5e), + (x86_fmin, 0x5d), + (x86_fmax, 0x5f), + ] { + e.enc_both(inst.bind(F32), rec_fa.opcodes(vec![0xf3, 0x0f, opc])); + e.enc_both(inst.bind(F64), rec_fa.opcodes(vec![0xf2, 0x0f, opc])); + } + + // Binary bitwise ops. + for &(inst, opc) in &[(band, 0x54), (bor, 0x56), (bxor, 0x57)] { + e.enc_both(inst.bind(F32), rec_fa.opcodes(vec![0x0f, opc])); + e.enc_both(inst.bind(F64), rec_fa.opcodes(vec![0x0f, opc])); + } + + // The `andnps(x,y)` instruction computes `~x&y`, while band_not(x,y)` is `x&~y. + e.enc_both(band_not.bind(F32), rec_fax.opcodes(vec![0x0f, 0x55])); + e.enc_both(band_not.bind(F64), rec_fax.opcodes(vec![0x0f, 0x55])); + + // Comparisons. + // + // This only covers the condition codes in `supported_floatccs`, the rest are + // handled by legalization patterns. + e.enc_both(fcmp.bind(F32), rec_fcscc.opcodes(vec![0x0f, 0x2e])); + e.enc_both(fcmp.bind(F64), rec_fcscc.opcodes(vec![0x66, 0x0f, 0x2e])); + e.enc_both(ffcmp.bind(F32), rec_fcmp.opcodes(vec![0x0f, 0x2e])); + e.enc_both(ffcmp.bind(F64), rec_fcmp.opcodes(vec![0x66, 0x0f, 0x2e])); + + e +} diff --git a/cranelift/codegen/meta/src/isa/x86/mod.rs b/cranelift/codegen/meta/src/isa/x86/mod.rs index 5407ec2e87..a8002890f7 100644 --- a/cranelift/codegen/meta/src/isa/x86/mod.rs +++ b/cranelift/codegen/meta/src/isa/x86/mod.rs @@ -1,15 +1,15 @@ use crate::cdsl::cpu_modes::CpuMode; -use crate::cdsl::instructions::InstructionPredicateMap; use crate::cdsl::isa::TargetIsa; -use crate::cdsl::recipes::Recipes; use crate::shared::types::Bool::B1; use crate::shared::types::Float::{F32, F64}; use crate::shared::types::Int::{I16, I32, I64, I8}; use crate::shared::Definitions as SharedDefinitions; +mod encodings; mod instructions; mod legalize; +mod recipes; mod registers; mod settings; @@ -51,12 +51,17 @@ pub fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa { x86_64.legalize_type(F32, x86_expand); x86_64.legalize_type(F64, x86_expand); + let recipes = recipes::define(shared_defs, &settings, ®s); + + let encodings = encodings::define(shared_defs, &settings, &inst_group, &recipes); + x86_32.set_encodings(encodings.enc32); + x86_64.set_encodings(encodings.enc64); + let encodings_predicates = encodings.inst_pred_reg.extract(); + + let recipes = encodings.recipes; + let cpu_modes = vec![x86_64, x86_32]; - let recipes = Recipes::new(); - - let encodings_predicates = InstructionPredicateMap::new(); - TargetIsa::new( "x86", inst_group, diff --git a/cranelift/codegen/meta/src/isa/x86/recipes.rs b/cranelift/codegen/meta/src/isa/x86/recipes.rs new file mode 100644 index 0000000000..5c1d28ef20 --- /dev/null +++ b/cranelift/codegen/meta/src/isa/x86/recipes.rs @@ -0,0 +1,2805 @@ +use std::rc::Rc; + +use crate::cdsl::ast::Literal; +use crate::cdsl::formats::{FormatRegistry, InstructionFormat}; +use crate::cdsl::instructions::InstructionPredicate; +use crate::cdsl::recipes::{ + EncodingRecipe, EncodingRecipeBuilder, OperandConstraint, Register, Stack, +}; +use crate::cdsl::regs::IsaRegs; +use crate::cdsl::settings::SettingGroup; +use crate::shared::Definitions as SharedDefinitions; + +/// Helper data structure to create recipes and template recipes. +/// It contains all the recipes and recipe templates that might be used in the encodings crate of +/// this same directory. +pub struct RecipeGroup<'builder> { + /// Memoized format pointer, to pass it to builders later. + formats: &'builder FormatRegistry, + + /// Memoized registers description, to pass it to builders later. + regs: &'builder IsaRegs, + + /// All the recipes explicitly created in this file. This is different from the final set of + /// recipes, which is definitive only once encodings have generated new recipes on the fly. + recipes: Vec, + + /// All the recipe templates created in this file. + templates: Vec>>, +} + +impl<'builder> RecipeGroup<'builder> { + fn new(formats: &'builder FormatRegistry, regs: &'builder IsaRegs) -> Self { + Self { + formats, + regs, + recipes: Vec::new(), + templates: Vec::new(), + } + } + fn add_recipe(&mut self, recipe: EncodingRecipeBuilder) { + self.recipes.push(recipe.build(self.formats)); + } + fn add_template_recipe(&mut self, recipe: EncodingRecipeBuilder) -> Rc> { + let template = Rc::new(Template::new(recipe, self.formats, self.regs)); + self.templates.push(template.clone()); + template + } + fn add_template(&mut self, template: Template<'builder>) -> Rc> { + let template = Rc::new(template); + self.templates.push(template.clone()); + template + } + pub fn recipe(&self, name: &str) -> &EncodingRecipe { + self.recipes + .iter() + .find(|recipe| recipe.name == name) + .expect(&format!("unknown recipe name: {}. Try template?", name)) + } + pub fn template(&self, name: &str) -> &Template { + self.templates + .iter() + .find(|recipe| recipe.name() == name) + .expect(&format!("unknown tail recipe name: {}. Try recipe?", name)) + } +} + +// Opcode representation. +// +// Cranelift requires each recipe to have a single encoding size in bytes, and x86 opcodes are +// variable length, so we use separate recipes for different styles of opcodes and prefixes. The +// opcode format is indicated by the recipe name prefix. +// +// The match case below does not include the REX prefix which goes after the mandatory prefix. +// VEX/XOP and EVEX prefixes are not yet supported. Encodings using any of these prefixes are +// represented by separate recipes. +// +// The encoding bits are: +// +// 0-7: The opcode byte . +// 8-9: pp, mandatory prefix: +// 00 none (Op*) +// 01 66 (Mp*) +// 10 F3 (Mp*) +// 11 F2 (Mp*) +// 10-11: mm, opcode map: +// 00 (Op1/Mp1) +// 01 0F (Op2/Mp2) +// 10 0F 38 (Op3/Mp3) +// 11 0F 3A (Op3/Mp3) +// 12-14 rrr, opcode bits for the ModR/M byte for certain opcodes. +// 15: REX.W bit (or VEX.W/E) +// +// There is some redundancy between bits 8-11 and the recipe names, but we have enough bits, and +// the pp+mm format is ready for supporting VEX prefixes. +// +// TODO Cranelift doesn't actually require recipe to have different encoding sizes anymore, so this +// could be simplified. + +/// Given a sequence of opcode bytes, compute the recipe name prefix and encoding bits. +fn decode_opcodes(op_bytes: &[u8], rrr: u16, w: u16) -> (&'static str, u16) { + assert!(op_bytes.len() >= 1, "at least one opcode byte"); + + let prefix_bytes = &op_bytes[..op_bytes.len() - 1]; + let (name, mmpp) = match prefix_bytes { + [] => ("Op1", 0b000), + [0x66] => ("Mp1", 0b0001), + [0xf3] => ("Mp1", 0b0010), + [0xf2] => ("Mp1", 0b0011), + [0x0f] => ("Op2", 0b0100), + [0x66, 0x0f] => ("Mp2", 0b0101), + [0xf3, 0x0f] => ("Mp2", 0b0110), + [0xf2, 0x0f] => ("Mp2", 0b0111), + [0x0f, 0x38] => ("Op3", 0b1000), + [0x66, 0x0f, 0x38] => ("Mp3", 0b1001), + [0xf3, 0x0f, 0x38] => ("Mp3", 0b1010), + [0xf2, 0x0f, 0x38] => ("Mp3", 0b1011), + [0x0f, 0x3a] => ("Op3", 0b1100), + [0x66, 0x0f, 0x3a] => ("Mp3", 0b1101), + [0xf3, 0x0f, 0x3a] => ("Mp3", 0b1110), + [0xf2, 0x0f, 0x3a] => ("Mp3", 0b1111), + _ => { + panic!("unexpected opcode sequence: {:?}", op_bytes); + } + }; + + let opcode_byte = op_bytes[op_bytes.len() - 1] as u16; + (name, opcode_byte | (mmpp << 8) | (rrr << 12) | w << 15) +} + +/// Given a snippet of Rust code (or None), replace the `PUT_OP` macro with the +/// corresponding `put_*` function from the `binemit.rs` module. +fn replace_put_op(code: Option, prefix: &str) -> Option { + code.map(|code| code.replace("{{PUT_OP}}", &format!("put_{}", prefix.to_lowercase()))) +} + +/// Replaces constraints to a REX-prefixed register class by the equivalent non-REX register class. +fn replace_nonrex_constraints( + regs: &IsaRegs, + constraints: Vec, +) -> Vec { + constraints + .into_iter() + .map(|constraint| match constraint { + OperandConstraint::RegClass(rc_index) => { + let new_rc_index = if rc_index == regs.class_by_name("GPR") { + regs.class_by_name("GPR8") + } else if rc_index == regs.class_by_name("FPR") { + regs.class_by_name("FPR8") + } else { + rc_index + }; + OperandConstraint::RegClass(new_rc_index) + } + _ => constraint, + }) + .collect() +} + +/// Previously called a TailRecipe in the Python meta language, this allows to create multiple +/// variants of a single base EncodingRecipe (rex prefix, specialized w/rrr bits, different +/// opcodes). It serves as a prototype of an EncodingRecipe, which is then used when actually creating +/// Encodings, in encodings.rs. This is an idiosyncrasy of the x86 meta-language, and could be +/// reconsidered later. +#[derive(Clone)] +pub struct Template<'builder> { + /// Mapping of format indexes to format data, used in the build() method. + formats: &'builder FormatRegistry, + + /// Description of registers, used in the build() method. + regs: &'builder IsaRegs, + + /// The recipe template, which is to be specialized (by copy). + recipe: EncodingRecipeBuilder, + + /// Does this recipe requires a REX prefix? + requires_prefix: bool, + + /// Other recipe to use when REX-prefixed. + when_prefixed: Option>>, + + // Specialized parameters. + /// Should we include the REX prefix? + rex: bool, + /// Value of the W bit (0 or 1). + w_bit: u16, + /// Value of the RRR bits (between 0 and 0b111). + rrr_bits: u16, + /// Opcode bytes. + op_bytes: Vec, +} + +impl<'builder> Template<'builder> { + fn new( + recipe: EncodingRecipeBuilder, + formats: &'builder FormatRegistry, + regs: &'builder IsaRegs, + ) -> Self { + Self { + formats, + regs, + recipe, + requires_prefix: false, + when_prefixed: None, + rex: false, + w_bit: 0, + rrr_bits: 0, + op_bytes: Vec::new(), + } + } + + fn name(&self) -> &str { + &self.recipe.name + } + fn requires_prefix(self, value: bool) -> Self { + Self { + requires_prefix: value, + ..self + } + } + fn when_prefixed(self, template: Rc>) -> Self { + assert!(self.when_prefixed.is_none()); + Self { + when_prefixed: Some(template), + ..self + } + } + + // Copy setters. + pub fn opcodes(&self, op_bytes: Vec) -> Self { + assert!(!op_bytes.is_empty()); + let mut copy = self.clone(); + copy.op_bytes = op_bytes; + copy + } + pub fn w(&self) -> Self { + let mut copy = self.clone(); + copy.w_bit = 1; + copy + } + pub fn rrr(&self, value: u16) -> Self { + assert!(value <= 0b111); + let mut copy = self.clone(); + copy.rrr_bits = value; + copy + } + pub fn nonrex(&self) -> Self { + assert!(!self.requires_prefix, "Tail recipe requires REX prefix."); + let mut copy = self.clone(); + copy.rex = false; + copy + } + pub fn rex(&self) -> Self { + if let Some(prefixed) = &self.when_prefixed { + let mut ret = prefixed.rex(); + // Forward specialized parameters. + ret.op_bytes = self.op_bytes.clone(); + ret.w_bit = self.w_bit; + ret.rrr_bits = self.rrr_bits; + return ret; + } + let mut copy = self.clone(); + copy.rex = true; + copy + } + + pub fn build(mut self) -> (EncodingRecipe, u16) { + let (name, bits) = decode_opcodes(&self.op_bytes, self.rrr_bits, self.w_bit); + + let (name, rex_prefix_size) = if self.rex { + ("Rex".to_string() + name, 1) + } else { + (name.into(), 0) + }; + + let size_addendum = self.op_bytes.len() as u64 + rex_prefix_size; + self.recipe.base_size += size_addendum; + + // Branch ranges are relative to the end of the instruction. + self.recipe + .branch_range + .as_mut() + .map(|range| range.inst_size += size_addendum); + + self.recipe.emit = replace_put_op(self.recipe.emit, &name); + self.recipe.name = name + &self.recipe.name; + + if !self.rex { + let operands_in = self.recipe.operands_in.unwrap_or(Vec::new()); + self.recipe.operands_in = Some(replace_nonrex_constraints(self.regs, operands_in)); + let operands_out = self.recipe.operands_out.unwrap_or(Vec::new()); + self.recipe.operands_out = Some(replace_nonrex_constraints(self.regs, operands_out)); + } + + (self.recipe.build(self.formats), bits) + } +} + +/// Returns a predicate checking that the "cond" field of the instruction contains one of the +/// directly supported floating point condition codes. +fn supported_floatccs_predicate( + supported_cc: &[Literal], + format: &InstructionFormat, +) -> InstructionPredicate { + supported_cc + .iter() + .fold(InstructionPredicate::new(), |pred, literal| { + pred.or(InstructionPredicate::new_is_field_equal( + format, + "cond", + literal.to_rust_code(), + )) + }) +} + +/// Return an instruction predicate that checks if `iform.imm` is a valid `scale` for a SIB byte. +fn valid_scale(format: &InstructionFormat) -> InstructionPredicate { + ["1", "2", "4", "8"] + .iter() + .fold(InstructionPredicate::new(), |pred, &literal| { + pred.or(InstructionPredicate::new_is_field_equal( + format, + "imm", + literal.into(), + )) + }) +} + +pub fn define<'shared>( + shared_defs: &'shared SharedDefinitions, + settings: &'shared SettingGroup, + regs: &'shared IsaRegs, +) -> RecipeGroup<'shared> { + // The set of floating point condition codes that are directly supported. + // Other condition codes need to be reversed or expressed as two tests. + let floatcc = shared_defs.operand_kinds.by_name("floatcc"); + let supported_floatccs: Vec = ["ord", "uno", "one", "ueq", "gt", "ge", "ult", "ule"] + .iter() + .map(|name| Literal::enumerator_for(floatcc, name)) + .collect(); + + let formats = &shared_defs.format_registry; + + // Register classes shorthands. + let abcd = regs.class_by_name("ABCD"); + let gpr = regs.class_by_name("GPR"); + let fpr = regs.class_by_name("FPR"); + let flag = regs.class_by_name("FLAG"); + + // Operand constraints shorthands. + let reg_rflags = Register::new(flag, regs.regunit_by_name(flag, "rflags")); + let reg_rax = Register::new(gpr, regs.regunit_by_name(gpr, "rax")); + let reg_rcx = Register::new(gpr, regs.regunit_by_name(gpr, "rcx")); + let reg_rdx = Register::new(gpr, regs.regunit_by_name(gpr, "rdx")); + + // Stack operand with a 32-bit signed displacement from either RBP or RSP. + let stack_gpr32 = Stack::new(gpr); + let stack_fpr32 = Stack::new(fpr); + + // Format shorthands, prefixed with f_. + let f_binary = formats.by_name("Binary"); + let f_binary_imm = formats.by_name("BinaryImm"); + let f_branch = formats.by_name("Branch"); + let f_branch_float = formats.by_name("BranchFloat"); + let f_branch_int = formats.by_name("BranchInt"); + let f_branch_table_entry = formats.by_name("BranchTableEntry"); + let f_branch_table_base = formats.by_name("BranchTableBase"); + let f_call = formats.by_name("Call"); + let f_call_indirect = formats.by_name("CallIndirect"); + let f_copy_special = formats.by_name("CopySpecial"); + let f_float_compare = formats.by_name("FloatCompare"); + let f_float_cond = formats.by_name("FloatCond"); + let f_float_cond_trap = formats.by_name("FloatCondTrap"); + let f_func_addr = formats.by_name("FuncAddr"); + let f_indirect_jump = formats.by_name("IndirectJump"); + let f_int_compare = formats.by_name("IntCompare"); + let f_int_compare_imm = formats.by_name("IntCompareImm"); + let f_int_cond = formats.by_name("IntCond"); + let f_int_cond_trap = formats.by_name("IntCondTrap"); + let f_int_select = formats.by_name("IntSelect"); + let f_jump = formats.by_name("Jump"); + let f_load = formats.by_name("Load"); + let f_load_complex = formats.by_name("LoadComplex"); + let f_multiary = formats.by_name("MultiAry"); + let f_nullary = formats.by_name("NullAry"); + let f_reg_fill = formats.by_name("RegFill"); + let f_reg_move = formats.by_name("RegMove"); + let f_reg_spill = formats.by_name("RegSpill"); + let f_stack_load = formats.by_name("StackLoad"); + let f_store = formats.by_name("Store"); + let f_store_complex = formats.by_name("StoreComplex"); + let f_ternary = formats.by_name("Ternary"); + let f_trap = formats.by_name("Trap"); + let f_unary = formats.by_name("Unary"); + let f_unary_bool = formats.by_name("UnaryBool"); + let f_unary_global_value = formats.by_name("UnaryGlobalValue"); + let f_unary_ieee32 = formats.by_name("UnaryIeee32"); + let f_unary_ieee64 = formats.by_name("UnaryIeee64"); + let f_unary_imm = formats.by_name("UnaryImm"); + + // Predicates shorthands. + let use_sse41 = settings.predicate_by_name("use_sse41"); + + // Definitions. + let mut recipes = RecipeGroup::new(formats, regs); + + // A null unary instruction that takes a GPR register. Can be used for identity copies and + // no-op conversions. + recipes.add_recipe( + EncodingRecipeBuilder::new("null", f_unary, 0) + .operands_in(vec![gpr]) + .operands_out(vec![0]) + .emit(""), + ); + recipes.add_recipe( + EncodingRecipeBuilder::new("stacknull", f_unary, 0) + .operands_in(vec![stack_gpr32]) + .operands_out(vec![stack_gpr32]) + .emit(""), + ); + + recipes + .add_recipe(EncodingRecipeBuilder::new("debugtrap", f_nullary, 1).emit("sink.put1(0xcc);")); + + // XX opcode, no ModR/M. + recipes.add_template_recipe(EncodingRecipeBuilder::new("trap", f_trap, 0).emit( + r#" + sink.trap(code, func.srclocs[inst]); + {{PUT_OP}}(bits, BASE_REX, sink); + "#, + )); + + // Macro: conditional jump over a ud2. + recipes.add_recipe( + EncodingRecipeBuilder::new("trapif", f_int_cond_trap, 4) + .operands_in(vec![reg_rflags]) + .clobbers_flags(false) + .emit( + r#" + // Jump over a 2-byte ud2. + sink.put1(0x70 | (icc2opc(cond.inverse()) as u8)); + sink.put1(2); + // ud2. + sink.trap(code, func.srclocs[inst]); + sink.put1(0x0f); + sink.put1(0x0b); + "#, + ), + ); + + recipes.add_recipe( + EncodingRecipeBuilder::new("trapff", f_float_cond_trap, 4) + .operands_in(vec![reg_rflags]) + .clobbers_flags(false) + .inst_predicate(supported_floatccs_predicate( + &supported_floatccs, + formats.get(f_float_cond_trap), + )) + .emit( + r#" + // Jump over a 2-byte ud2. + sink.put1(0x70 | (fcc2opc(cond.inverse()) as u8)); + sink.put1(2); + // ud2. + sink.trap(code, func.srclocs[inst]); + sink.put1(0x0f); + sink.put1(0x0b); + "#, + ), + ); + + // XX /r + recipes.add_template_recipe( + EncodingRecipeBuilder::new("rr", f_binary, 1) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![0]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); + modrm_rr(in_reg0, in_reg1, sink); + "#, + ), + ); + + // XX /r with operands swapped. (RM form). + recipes.add_template_recipe( + EncodingRecipeBuilder::new("rrx", f_binary, 1) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![0]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + modrm_rr(in_reg1, in_reg0, sink); + "#, + ), + ); + + // XX /r with FPR ins and outs. A form. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fa", f_binary, 1) + .operands_in(vec![fpr, fpr]) + .operands_out(vec![0]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + modrm_rr(in_reg1, in_reg0, sink); + "#, + ), + ); + + // XX /r with FPR ins and outs. A form with input operands swapped. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fax", f_binary, 1) + .operands_in(vec![fpr, fpr]) + .operands_out(vec![1]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); + modrm_rr(in_reg0, in_reg1, sink); + "#, + ), + ); + + // XX /n for a unary operation with extension bits. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("ur", f_unary, 1) + .operands_in(vec![gpr]) + .operands_out(vec![0]) + .emit( + r#" + {{PUT_OP}}(bits, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + "#, + ), + ); + + // XX /r, but for a unary operator with separate input/output register, like + // copies. MR form, preserving flags. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("umr", f_unary, 1) + .operands_in(vec![gpr]) + .operands_out(vec![gpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(out_reg0, in_reg0), sink); + modrm_rr(out_reg0, in_reg0, sink); + "#, + ), + ); + + // Same as umr, but with FPR -> GPR registers. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("rfumr", f_unary, 1) + .operands_in(vec![fpr]) + .operands_out(vec![gpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(out_reg0, in_reg0), sink); + modrm_rr(out_reg0, in_reg0, sink); + "#, + ), + ); + + // XX /r, but for a unary operator with separate input/output register. + // RM form. Clobbers FLAGS. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("urm", f_unary, 1) + .operands_in(vec![gpr]) + .operands_out(vec![gpr]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + modrm_rr(in_reg0, out_reg0, sink); + "#, + ), + ); + + // XX /r. Same as urm, but doesn't clobber FLAGS. + let urm_noflags = recipes.add_template_recipe( + EncodingRecipeBuilder::new("urm_noflags", f_unary, 1) + .operands_in(vec![gpr]) + .operands_out(vec![gpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + modrm_rr(in_reg0, out_reg0, sink); + "#, + ), + ); + + // XX /r. Same as urm_noflags, but input limited to ABCD. + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("urm_noflags_abcd", f_unary, 1) + .operands_in(vec![abcd]) + .operands_out(vec![gpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + modrm_rr(in_reg0, out_reg0, sink); + "#, + ), + formats, + regs, + ) + .when_prefixed(urm_noflags), + ); + + // XX /r, RM form, FPR -> FPR. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("furm", f_unary, 1) + .operands_in(vec![fpr]) + .operands_out(vec![fpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + modrm_rr(in_reg0, out_reg0, sink); + "#, + ), + ); + + // XX /r, RM form, GPR -> FPR. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("frurm", f_unary, 1) + .operands_in(vec![gpr]) + .operands_out(vec![fpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + modrm_rr(in_reg0, out_reg0, sink); + "#, + ), + ); + + // XX /r, RM form, FPR -> GPR. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("rfurm", f_unary, 1) + .operands_in(vec![fpr]) + .operands_out(vec![gpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + modrm_rr(in_reg0, out_reg0, sink); + "#, + ), + ); + + // XX /r, RMI form for one of the roundXX SSE 4.1 instructions. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("furmi_rnd", f_unary, 2) + .operands_in(vec![fpr]) + .operands_out(vec![fpr]) + .isa_predicate(use_sse41) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + modrm_rr(in_reg0, out_reg0, sink); + sink.put1(match opcode { + Opcode::Nearest => 0b00, + Opcode::Floor => 0b01, + Opcode::Ceil => 0b10, + Opcode::Trunc => 0b11, + x => panic!("{} unexpected for furmi_rnd", opcode), + }); + "#, + ), + ); + + // XX /r, for regmove instructions. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("rmov", f_reg_move, 1) + .operands_in(vec![gpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(dst, src), sink); + modrm_rr(dst, src, sink); + "#, + ), + ); + + // XX /r, for regmove instructions (FPR version, RM encoded). + recipes.add_template_recipe( + EncodingRecipeBuilder::new("frmov", f_reg_move, 1) + .operands_in(vec![fpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(src, dst), sink); + modrm_rr(src, dst, sink); + "#, + ), + ); + + // XX /n with one arg in %rcx, for shifts. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("rc", f_binary, 1) + .operands_in(vec![ + OperandConstraint::RegClass(gpr), + OperandConstraint::FixedReg(reg_rcx), + ]) + .operands_out(vec![0]) + .emit( + r#" + {{PUT_OP}}(bits, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + "#, + ), + ); + + // XX /n for division: inputs in %rax, %rdx, r. Outputs in %rax, %rdx. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("div", f_ternary, 1) + .operands_in(vec![ + OperandConstraint::FixedReg(reg_rax), + OperandConstraint::FixedReg(reg_rdx), + OperandConstraint::RegClass(gpr), + ]) + .operands_out(vec![reg_rax, reg_rdx]) + .emit( + r#" + sink.trap(TrapCode::IntegerDivisionByZero, func.srclocs[inst]); + {{PUT_OP}}(bits, rex1(in_reg2), sink); + modrm_r_bits(in_reg2, bits, sink); + "#, + ), + ); + + // XX /n for {s,u}mulx: inputs in %rax, r. Outputs in %rdx(hi):%rax(lo) + recipes.add_template_recipe( + EncodingRecipeBuilder::new("mulx", f_binary, 1) + .operands_in(vec![ + OperandConstraint::FixedReg(reg_rax), + OperandConstraint::RegClass(gpr), + ]) + .operands_out(vec![ + OperandConstraint::FixedReg(reg_rax), + OperandConstraint::FixedReg(reg_rdx), + ]) + .emit( + r#" + {{PUT_OP}}(bits, rex1(in_reg1), sink); + modrm_r_bits(in_reg1, bits, sink); + "#, + ), + ); + + // XX /n ib with 8-bit immediate sign-extended. + { + let format = formats.get(f_binary_imm); + recipes.add_template_recipe( + EncodingRecipeBuilder::new("r_ib", f_binary_imm, 2) + .operands_in(vec![gpr]) + .operands_out(vec![0]) + .inst_predicate(InstructionPredicate::new_is_signed_int(format, "imm", 8, 0)) + .emit( + r#" + {{PUT_OP}}(bits, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + let imm: i64 = imm.into(); + sink.put1(imm as u8); + "#, + ), + ); + + // XX /n id with 32-bit immediate sign-extended. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("r_id", f_binary_imm, 5) + .operands_in(vec![gpr]) + .operands_out(vec![0]) + .inst_predicate(InstructionPredicate::new_is_signed_int( + format, "imm", 32, 0, + )) + .emit( + r#" + {{PUT_OP}}(bits, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + let imm: i64 = imm.into(); + sink.put4(imm as u32); + "#, + ), + ); + } + + { + // XX /n id with 32-bit immediate sign-extended. UnaryImm version. + let format = formats.get(f_unary_imm); + recipes.add_template_recipe( + EncodingRecipeBuilder::new("u_id", f_unary_imm, 5) + .operands_out(vec![gpr]) + .inst_predicate(InstructionPredicate::new_is_signed_int( + format, "imm", 32, 0, + )) + .emit( + r#" + {{PUT_OP}}(bits, rex1(out_reg0), sink); + modrm_r_bits(out_reg0, bits, sink); + let imm: i64 = imm.into(); + sink.put4(imm as u32); + "#, + ), + ); + } + + // XX+rd id unary with 32-bit immediate. Note no recipe predicate. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("pu_id", f_unary_imm, 4) + .operands_out(vec![gpr]) + .emit( + r#" + // The destination register is encoded in the low bits of the opcode. + // No ModR/M. + {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); + let imm: i64 = imm.into(); + sink.put4(imm as u32); + "#, + ), + ); + + // XX+rd id unary with bool immediate. Note no recipe predicate. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("pu_id_bool", f_unary_bool, 4) + .operands_out(vec![gpr]) + .emit( + r#" + // The destination register is encoded in the low bits of the opcode. + // No ModR/M. + {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); + let imm: u32 = if imm { 1 } else { 0 }; + sink.put4(imm); + "#, + ), + ); + + // XX+rd iq unary with 64-bit immediate. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("pu_iq", f_unary_imm, 8) + .operands_out(vec![gpr]) + .emit( + r#" + {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); + let imm: i64 = imm.into(); + sink.put8(imm as u64); + "#, + ), + ); + + // XX /n Unary with floating point 32-bit immediate equal to zero. + { + let format = formats.get(f_unary_ieee32); + recipes.add_template_recipe( + EncodingRecipeBuilder::new("f32imm_z", f_unary_ieee32, 1) + .operands_out(vec![fpr]) + .inst_predicate(InstructionPredicate::new_is_zero_32bit_float(format, "imm")) + .emit( + r#" + {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink); + modrm_rr(out_reg0, out_reg0, sink); + "#, + ), + ); + } + + // XX /n Unary with floating point 64-bit immediate equal to zero. + { + let format = formats.get(f_unary_ieee64); + recipes.add_template_recipe( + EncodingRecipeBuilder::new("f64imm_z", f_unary_ieee64, 1) + .operands_out(vec![fpr]) + .inst_predicate(InstructionPredicate::new_is_zero_64bit_float(format, "imm")) + .emit( + r#" + {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink); + modrm_rr(out_reg0, out_reg0, sink); + "#, + ), + ); + } + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("pushq", f_unary, 0) + .operands_in(vec![gpr]) + .emit( + r#" + sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); + {{PUT_OP}}(bits | (in_reg0 & 7), rex1(in_reg0), sink); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("popq", f_nullary, 0) + .operands_out(vec![gpr]) + .emit( + r#" + {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); + "#, + ), + ); + + // XX /r, for regmove instructions. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("copysp", f_copy_special, 1) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(dst, src), sink); + modrm_rr(dst, src, sink); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("adjustsp", f_unary, 1) + .operands_in(vec![gpr]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(RU::rsp.into(), in_reg0), sink); + modrm_rr(RU::rsp.into(), in_reg0, sink); + "#, + ), + ); + + { + let format = formats.get(f_unary_imm); + recipes.add_template_recipe( + EncodingRecipeBuilder::new("adjustsp_ib", f_unary_imm, 2) + .inst_predicate(InstructionPredicate::new_is_signed_int(format, "imm", 8, 0)) + .emit( + r#" + {{PUT_OP}}(bits, rex1(RU::rsp.into()), sink); + modrm_r_bits(RU::rsp.into(), bits, sink); + let imm: i64 = imm.into(); + sink.put1(imm as u8); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("adjustsp_id", f_unary_imm, 5) + .inst_predicate(InstructionPredicate::new_is_signed_int( + format, "imm", 32, 0, + )) + .emit( + r#" + {{PUT_OP}}(bits, rex1(RU::rsp.into()), sink); + modrm_r_bits(RU::rsp.into(), bits, sink); + let imm: i64 = imm.into(); + sink.put4(imm as u32); + "#, + ), + ); + } + + // XX+rd id with Abs4 function relocation. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fnaddr4", f_func_addr, 4) + .operands_out(vec![gpr]) + .emit( + r#" + {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); + sink.reloc_external(Reloc::Abs4, + &func.dfg.ext_funcs[func_ref].name, + 0); + sink.put4(0); + "#, + ), + ); + + // XX+rd iq with Abs8 function relocation. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fnaddr8", f_func_addr, 8) + .operands_out(vec![gpr]) + .emit( + r#" + {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); + sink.reloc_external(Reloc::Abs8, + &func.dfg.ext_funcs[func_ref].name, + 0); + sink.put8(0); + "#, + ), + ); + + // Similar to fnaddr4, but writes !0 (this is used by BaldrMonkey). + recipes.add_template_recipe( + EncodingRecipeBuilder::new("allones_fnaddr4", f_func_addr, 4) + .operands_out(vec![gpr]) + .emit( + r#" + {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); + sink.reloc_external(Reloc::Abs4, + &func.dfg.ext_funcs[func_ref].name, + 0); + // Write the immediate as `!0` for the benefit of BaldrMonkey. + sink.put4(!0); + "#, + ), + ); + + // Similar to fnaddr8, but writes !0 (this is used by BaldrMonkey). + recipes.add_template_recipe( + EncodingRecipeBuilder::new("allones_fnaddr8", f_func_addr, 8) + .operands_out(vec![gpr]) + .emit( + r#" + {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); + sink.reloc_external(Reloc::Abs8, + &func.dfg.ext_funcs[func_ref].name, + 0); + // Write the immediate as `!0` for the benefit of BaldrMonkey. + sink.put8(!0); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("pcrel_fnaddr8", f_func_addr, 5) + .operands_out(vec![gpr]) + // rex2 gets passed 0 for r/m register because the upper bit of + // r/m doesn't get decoded when in rip-relative addressing mode. + .emit( + r#" + {{PUT_OP}}(bits, rex2(0, out_reg0), sink); + modrm_riprel(out_reg0, sink); + // The addend adjusts for the difference between the end of the + // instruction and the beginning of the immediate field. + sink.reloc_external(Reloc::X86PCRel4, + &func.dfg.ext_funcs[func_ref].name, + -4); + sink.put4(0); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("got_fnaddr8", f_func_addr, 5) + .operands_out(vec![gpr]) + // rex2 gets passed 0 for r/m register because the upper bit of + // r/m doesn't get decoded when in rip-relative addressing mode. + .emit( + r#" + {{PUT_OP}}(bits, rex2(0, out_reg0), sink); + modrm_riprel(out_reg0, sink); + // The addend adjusts for the difference between the end of the + // instruction and the beginning of the immediate field. + sink.reloc_external(Reloc::X86GOTPCRel4, + &func.dfg.ext_funcs[func_ref].name, + -4); + sink.put4(0); + "#, + ), + ); + + // XX+rd id with Abs4 globalsym relocation. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("gvaddr4", f_unary_global_value, 4) + .operands_out(vec![gpr]) + .emit( + r#" + {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); + sink.reloc_external(Reloc::Abs4, + &func.global_values[global_value].symbol_name(), + 0); + sink.put4(0); + "#, + ), + ); + + // XX+rd iq with Abs8 globalsym relocation. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("gvaddr8", f_unary_global_value, 8) + .operands_out(vec![gpr]) + .emit( + r#" + {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); + sink.reloc_external(Reloc::Abs8, + &func.global_values[global_value].symbol_name(), + 0); + sink.put8(0); + "#, + ), + ); + + // XX+rd iq with PCRel4 globalsym relocation. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("pcrel_gvaddr8", f_unary_global_value, 5) + .operands_out(vec![gpr]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(0, out_reg0), sink); + modrm_rm(5, out_reg0, sink); + // The addend adjusts for the difference between the end of the + // instruction and the beginning of the immediate field. + sink.reloc_external(Reloc::X86PCRel4, + &func.global_values[global_value].symbol_name(), + -4); + sink.put4(0); + "#, + ), + ); + + // XX+rd iq with Abs8 globalsym relocation. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("got_gvaddr8", f_unary_global_value, 5) + .operands_out(vec![gpr]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(0, out_reg0), sink); + modrm_rm(5, out_reg0, sink); + // The addend adjusts for the difference between the end of the + // instruction and the beginning of the immediate field. + sink.reloc_external(Reloc::X86GOTPCRel4, + &func.global_values[global_value].symbol_name(), + -4); + sink.put4(0); + "#, + ), + ); + + // Stack addresses. + // + // TODO Alternative forms for 8-bit immediates, when applicable. + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("spaddr4_id", f_stack_load, 6) + .operands_out(vec![gpr]) + .emit( + r#" + let sp = StackRef::sp(stack_slot, &func.stack_slots); + let base = stk_base(sp.base); + {{PUT_OP}}(bits, rex2(out_reg0, base), sink); + modrm_sib_disp8(out_reg0, sink); + sib_noindex(base, sink); + let imm : i32 = offset.into(); + sink.put4(sp.offset.checked_add(imm).unwrap() as u32); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("spaddr8_id", f_stack_load, 6) + .operands_out(vec![gpr]) + .emit( + r#" + let sp = StackRef::sp(stack_slot, &func.stack_slots); + let base = stk_base(sp.base); + {{PUT_OP}}(bits, rex2(base, out_reg0), sink); + modrm_sib_disp32(out_reg0, sink); + sib_noindex(base, sink); + let imm : i32 = offset.into(); + sink.put4(sp.offset.checked_add(imm).unwrap() as u32); + "#, + ), + ); + + // Store recipes. + + { + // Simple stores. + let format = formats.get(f_store); + + // A predicate asking if the offset is zero. + let has_no_offset = InstructionPredicate::new_is_field_equal(format, "offset", "0".into()); + + // XX /r register-indirect store with no offset. + let st = recipes.add_template_recipe( + EncodingRecipeBuilder::new("st", f_store, 1) + .operands_in(vec![gpr, gpr]) + .inst_predicate(has_no_offset.clone()) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_or_offset_for_in_reg_1") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + if needs_sib_byte(in_reg1) { + modrm_sib(in_reg0, sink); + sib_noindex(in_reg1, sink); + } else if needs_offset(in_reg1) { + modrm_disp8(in_reg1, in_reg0, sink); + sink.put1(0); + } else { + modrm_rm(in_reg1, in_reg0, sink); + } + "#, + ), + ); + + // XX /r register-indirect store with no offset. + // Only ABCD allowed for stored value. This is for byte stores with no REX. + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("st_abcd", f_store, 1) + .operands_in(vec![abcd, gpr]) + .inst_predicate(has_no_offset.clone()) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_or_offset_for_in_reg_1") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + if needs_sib_byte(in_reg1) { + modrm_sib(in_reg0, sink); + sib_noindex(in_reg1, sink); + } else if needs_offset(in_reg1) { + modrm_disp8(in_reg1, in_reg0, sink); + sink.put1(0); + } else { + modrm_rm(in_reg1, in_reg0, sink); + } + "#, + ), + formats, + regs, + ) + .when_prefixed(st), + ); + + // XX /r register-indirect store of FPR with no offset. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fst", f_store, 1) + .operands_in(vec![fpr, gpr]) + .inst_predicate(has_no_offset.clone()) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_or_offset_for_in_reg_1") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + if needs_sib_byte(in_reg1) { + modrm_sib(in_reg0, sink); + sib_noindex(in_reg1, sink); + } else if needs_offset(in_reg1) { + modrm_disp8(in_reg1, in_reg0, sink); + sink.put1(0); + } else { + modrm_rm(in_reg1, in_reg0, sink); + } + "#, + ), + ); + + let has_small_offset = InstructionPredicate::new_is_signed_int(format, "offset", 8, 0); + + // XX /r register-indirect store with 8-bit offset. + let st_disp8 = recipes.add_template_recipe( + EncodingRecipeBuilder::new("stDisp8", f_store, 2) + .operands_in(vec![gpr, gpr]) + .inst_predicate(has_small_offset.clone()) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_for_in_reg_1") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + if needs_sib_byte(in_reg1) { + modrm_sib_disp8(in_reg0, sink); + sib_noindex(in_reg1, sink); + } else { + modrm_disp8(in_reg1, in_reg0, sink); + } + let offset: i32 = offset.into(); + sink.put1(offset as u8); + "#, + ), + ); + + // XX /r register-indirect store with 8-bit offset. + // Only ABCD allowed for stored value. This is for byte stores with no REX. + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("stDisp8_abcd", f_store, 2) + .operands_in(vec![abcd, gpr]) + .inst_predicate(has_small_offset.clone()) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_for_in_reg_1") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + if needs_sib_byte(in_reg1) { + modrm_sib_disp8(in_reg0, sink); + sib_noindex(in_reg1, sink); + } else { + modrm_disp8(in_reg1, in_reg0, sink); + } + let offset: i32 = offset.into(); + sink.put1(offset as u8); + "#, + ), + formats, + regs, + ) + .when_prefixed(st_disp8), + ); + + // XX /r register-indirect store with 8-bit offset of FPR. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fstDisp8", f_store, 2) + .operands_in(vec![fpr, gpr]) + .inst_predicate(has_small_offset.clone()) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_for_in_reg_1") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + if needs_sib_byte(in_reg1) { + modrm_sib_disp8(in_reg0, sink); + sib_noindex(in_reg1, sink); + } else { + modrm_disp8(in_reg1, in_reg0, sink); + } + let offset: i32 = offset.into(); + sink.put1(offset as u8); + "#, + ), + ); + + // XX /r register-indirect store with 32-bit offset. + let st_disp32 = recipes.add_template_recipe( + EncodingRecipeBuilder::new("stDisp32", f_store, 5) + .operands_in(vec![gpr, gpr]) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_for_in_reg_1") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + if needs_sib_byte(in_reg1) { + modrm_sib_disp32(in_reg0, sink); + sib_noindex(in_reg1, sink); + } else { + modrm_disp32(in_reg1, in_reg0, sink); + } + let offset: i32 = offset.into(); + sink.put4(offset as u32); + "#, + ), + ); + + // XX /r register-indirect store with 32-bit offset. + // Only ABCD allowed for stored value. This is for byte stores with no REX. + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("stDisp32_abcd", f_store, 5) + .operands_in(vec![abcd, gpr]) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_for_in_reg_1") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + if needs_sib_byte(in_reg1) { + modrm_sib_disp32(in_reg0, sink); + sib_noindex(in_reg1, sink); + } else { + modrm_disp32(in_reg1, in_reg0, sink); + } + let offset: i32 = offset.into(); + sink.put4(offset as u32); + "#, + ), + formats, + regs, + ) + .when_prefixed(st_disp32), + ); + + // XX /r register-indirect store with 32-bit offset of FPR. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fstDisp32", f_store, 5) + .operands_in(vec![fpr, gpr]) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_for_in_reg_1") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + if needs_sib_byte(in_reg1) { + modrm_sib_disp32(in_reg0, sink); + sib_noindex(in_reg1, sink); + } else { + modrm_disp32(in_reg1, in_reg0, sink); + } + let offset: i32 = offset.into(); + sink.put4(offset as u32); + "#, + ), + ); + } + + { + // Complex stores. + let format = formats.get(f_store_complex); + + // A predicate asking if the offset is zero. + let has_no_offset = InstructionPredicate::new_is_field_equal(format, "offset", "0".into()); + + // XX /r register-indirect store with index and no offset. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("stWithIndex", f_store_complex, 2) + .operands_in(vec![gpr, gpr, gpr]) + .inst_predicate(has_no_offset.clone()) + .clobbers_flags(false) + .compute_size("size_plus_maybe_offset_for_in_reg_1") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); + // The else branch always inserts an SIB byte. + if needs_offset(in_reg1) { + modrm_sib_disp8(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + sink.put1(0); + } else { + modrm_sib(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + } + "#, + ), + ); + + // XX /r register-indirect store with index and no offset. + // Only ABCD allowed for stored value. This is for byte stores with no REX. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("stWithIndex_abcd", f_store_complex, 2) + .operands_in(vec![abcd, gpr, gpr]) + .inst_predicate(has_no_offset.clone()) + .clobbers_flags(false) + .compute_size("size_plus_maybe_offset_for_in_reg_1") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); + // The else branch always inserts an SIB byte. + if needs_offset(in_reg1) { + modrm_sib_disp8(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + sink.put1(0); + } else { + modrm_sib(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + } + "#, + ), + ); + + // XX /r register-indirect store with index and no offset of FPR. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fstWithIndex", f_store_complex, 2) + .operands_in(vec![fpr, gpr, gpr]) + .inst_predicate(has_no_offset.clone()) + .clobbers_flags(false) + .compute_size("size_plus_maybe_offset_for_in_reg_1") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); + // The else branch always inserts an SIB byte. + if needs_offset(in_reg1) { + modrm_sib_disp8(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + sink.put1(0); + } else { + modrm_sib(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + } + "#, + ), + ); + + let has_small_offset = InstructionPredicate::new_is_signed_int(format, "offset", 8, 0); + + // XX /r register-indirect store with index and 8-bit offset. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("stWithIndexDisp8", f_store_complex, 3) + .operands_in(vec![gpr, gpr, gpr]) + .inst_predicate(has_small_offset.clone()) + .clobbers_flags(false) + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); + modrm_sib_disp8(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + let offset: i32 = offset.into(); + sink.put1(offset as u8); + "#, + ), + ); + + // XX /r register-indirect store with index and 8-bit offset. + // Only ABCD allowed for stored value. This is for byte stores with no REX. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("stWithIndexDisp8_abcd", f_store_complex, 3) + .operands_in(vec![abcd, gpr, gpr]) + .inst_predicate(has_small_offset.clone()) + .clobbers_flags(false) + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); + modrm_sib_disp8(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + let offset: i32 = offset.into(); + sink.put1(offset as u8); + "#, + ), + ); + + // XX /r register-indirect store with index and 8-bit offset of FPR. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fstWithIndexDisp8", f_store_complex, 3) + .operands_in(vec![fpr, gpr, gpr]) + .inst_predicate(has_small_offset.clone()) + .clobbers_flags(false) + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); + modrm_sib_disp8(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + let offset: i32 = offset.into(); + sink.put1(offset as u8); + "#, + ), + ); + + let has_big_offset = InstructionPredicate::new_is_signed_int(format, "offset", 32, 0); + + // XX /r register-indirect store with index and 32-bit offset. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("stWithIndexDisp32", f_store_complex, 6) + .operands_in(vec![gpr, gpr, gpr]) + .inst_predicate(has_big_offset.clone()) + .clobbers_flags(false) + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); + modrm_sib_disp32(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + let offset: i32 = offset.into(); + sink.put4(offset as u32); + "#, + ), + ); + + // XX /r register-indirect store with index and 32-bit offset. + // Only ABCD allowed for stored value. This is for byte stores with no REX. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("stWithIndexDisp32_abcd", f_store_complex, 6) + .operands_in(vec![abcd, gpr, gpr]) + .inst_predicate(has_big_offset.clone()) + .clobbers_flags(false) + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); + modrm_sib_disp32(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + let offset: i32 = offset.into(); + sink.put4(offset as u32); + "#, + ), + ); + + // XX /r register-indirect store with index and 32-bit offset of FPR. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fstWithIndexDisp32", f_store_complex, 6) + .operands_in(vec![fpr, gpr, gpr]) + .inst_predicate(has_big_offset.clone()) + .clobbers_flags(false) + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); + modrm_sib_disp32(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + let offset: i32 = offset.into(); + sink.put4(offset as u32); + "#, + ), + ); + } + + // Unary spill with SIB and 32-bit displacement. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("spillSib32", f_unary, 6) + .operands_in(vec![gpr]) + .operands_out(vec![stack_gpr32]) + .clobbers_flags(false) + .emit( + r#" + sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); + let base = stk_base(out_stk0.base); + {{PUT_OP}}(bits, rex2(base, in_reg0), sink); + modrm_sib_disp32(in_reg0, sink); + sib_noindex(base, sink); + sink.put4(out_stk0.offset as u32); + "#, + ), + ); + + // Like spillSib32, but targeting an FPR rather than a GPR. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fspillSib32", f_unary, 6) + .operands_in(vec![fpr]) + .operands_out(vec![stack_fpr32]) + .clobbers_flags(false) + .emit( + r#" + sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); + let base = stk_base(out_stk0.base); + {{PUT_OP}}(bits, rex2(base, in_reg0), sink); + modrm_sib_disp32(in_reg0, sink); + sib_noindex(base, sink); + sink.put4(out_stk0.offset as u32); + "#, + ), + ); + + // Regspill using RSP-relative addressing. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("regspill32", f_reg_spill, 6) + .operands_in(vec![gpr]) + .clobbers_flags(false) + .emit( + r#" + sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); + let dst = StackRef::sp(dst, &func.stack_slots); + let base = stk_base(dst.base); + {{PUT_OP}}(bits, rex2(base, src), sink); + modrm_sib_disp32(src, sink); + sib_noindex(base, sink); + sink.put4(dst.offset as u32); + "#, + ), + ); + + // Like regspill32, but targeting an FPR rather than a GPR. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fregspill32", f_reg_spill, 6) + .operands_in(vec![fpr]) + .clobbers_flags(false) + .emit( + r#" + sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); + let dst = StackRef::sp(dst, &func.stack_slots); + let base = stk_base(dst.base); + {{PUT_OP}}(bits, rex2(base, src), sink); + modrm_sib_disp32(src, sink); + sib_noindex(base, sink); + sink.put4(dst.offset as u32); + "#, + ), + ); + + // Load recipes. + + { + // Simple loads. + let format = formats.get(f_load); + + // A predicate asking if the offset is zero. + let has_no_offset = InstructionPredicate::new_is_field_equal(format, "offset", "0".into()); + + // XX /r load with no offset. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("ld", f_load, 1) + .operands_in(vec![gpr]) + .operands_out(vec![gpr]) + .inst_predicate(has_no_offset.clone()) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_or_offset_for_in_reg_0") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + if needs_sib_byte(in_reg0) { + modrm_sib(out_reg0, sink); + sib_noindex(in_reg0, sink); + } else if needs_offset(in_reg0) { + modrm_disp8(in_reg0, out_reg0, sink); + sink.put1(0); + } else { + modrm_rm(in_reg0, out_reg0, sink); + } + "#, + ), + ); + + // XX /r float load with no offset. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fld", f_load, 1) + .operands_in(vec![gpr]) + .operands_out(vec![fpr]) + .inst_predicate(has_no_offset.clone()) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_or_offset_for_in_reg_0") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + if needs_sib_byte(in_reg0) { + modrm_sib(out_reg0, sink); + sib_noindex(in_reg0, sink); + } else if needs_offset(in_reg0) { + modrm_disp8(in_reg0, out_reg0, sink); + sink.put1(0); + } else { + modrm_rm(in_reg0, out_reg0, sink); + } + "#, + ), + ); + + let has_small_offset = InstructionPredicate::new_is_signed_int(format, "offset", 8, 0); + + // XX /r load with 8-bit offset. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("ldDisp8", f_load, 2) + .operands_in(vec![gpr]) + .operands_out(vec![gpr]) + .inst_predicate(has_small_offset.clone()) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_for_in_reg_0") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + if needs_sib_byte(in_reg0) { + modrm_sib_disp8(out_reg0, sink); + sib_noindex(in_reg0, sink); + } else { + modrm_disp8(in_reg0, out_reg0, sink); + } + let offset: i32 = offset.into(); + sink.put1(offset as u8); + "#, + ), + ); + + // XX /r float load with 8-bit offset. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fldDisp8", f_load, 2) + .operands_in(vec![gpr]) + .operands_out(vec![fpr]) + .inst_predicate(has_small_offset.clone()) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_for_in_reg_0") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + if needs_sib_byte(in_reg0) { + modrm_sib_disp8(out_reg0, sink); + sib_noindex(in_reg0, sink); + } else { + modrm_disp8(in_reg0, out_reg0, sink); + } + let offset: i32 = offset.into(); + sink.put1(offset as u8); + "#, + ), + ); + + let has_big_offset = InstructionPredicate::new_is_signed_int(format, "offset", 32, 0); + + // XX /r load with 32-bit offset. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("ldDisp32", f_load, 5) + .operands_in(vec![gpr]) + .operands_out(vec![gpr]) + .inst_predicate(has_big_offset.clone()) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_for_in_reg_0") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + if needs_sib_byte(in_reg0) { + modrm_sib_disp32(out_reg0, sink); + sib_noindex(in_reg0, sink); + } else { + modrm_disp32(in_reg0, out_reg0, sink); + } + let offset: i32 = offset.into(); + sink.put4(offset as u32); + "#, + ), + ); + + // XX /r float load with 32-bit offset. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fldDisp32", f_load, 5) + .operands_in(vec![gpr]) + .operands_out(vec![fpr]) + .inst_predicate(has_big_offset.clone()) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_for_in_reg_0") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + if needs_sib_byte(in_reg0) { + modrm_sib_disp32(out_reg0, sink); + sib_noindex(in_reg0, sink); + } else { + modrm_disp32(in_reg0, out_reg0, sink); + } + let offset: i32 = offset.into(); + sink.put4(offset as u32); + "#, + ), + ); + } + + { + // Complex loads. + let format = formats.get(f_load_complex); + + // A predicate asking if the offset is zero. + let has_no_offset = InstructionPredicate::new_is_field_equal(format, "offset", "0".into()); + + // XX /r load with index and no offset. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("ldWithIndex", f_load_complex, 2) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![gpr]) + .inst_predicate(has_no_offset.clone()) + .clobbers_flags(false) + .compute_size("size_plus_maybe_offset_for_in_reg_0") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink); + // The else branch always inserts an SIB byte. + if needs_offset(in_reg0) { + modrm_sib_disp8(out_reg0, sink); + sib(0, in_reg1, in_reg0, sink); + sink.put1(0); + } else { + modrm_sib(out_reg0, sink); + sib(0, in_reg1, in_reg0, sink); + } + "#, + ), + ); + + // XX /r float load with index and no offset. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fldWithIndex", f_load_complex, 2) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![fpr]) + .inst_predicate(has_no_offset.clone()) + .clobbers_flags(false) + .compute_size("size_plus_maybe_offset_for_in_reg_0") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink); + // The else branch always inserts an SIB byte. + if needs_offset(in_reg0) { + modrm_sib_disp8(out_reg0, sink); + sib(0, in_reg1, in_reg0, sink); + sink.put1(0); + } else { + modrm_sib(out_reg0, sink); + sib(0, in_reg1, in_reg0, sink); + } + "#, + ), + ); + + let has_small_offset = InstructionPredicate::new_is_signed_int(format, "offset", 8, 0); + + // XX /r load with index and 8-bit offset. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("ldWithIndexDisp8", f_load_complex, 3) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![gpr]) + .inst_predicate(has_small_offset.clone()) + .clobbers_flags(false) + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink); + modrm_sib_disp8(out_reg0, sink); + sib(0, in_reg1, in_reg0, sink); + let offset: i32 = offset.into(); + sink.put1(offset as u8); + "#, + ), + ); + + // XX /r float load with 8-bit offset. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fldWithIndexDisp8", f_load_complex, 3) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![fpr]) + .inst_predicate(has_small_offset.clone()) + .clobbers_flags(false) + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink); + modrm_sib_disp8(out_reg0, sink); + sib(0, in_reg1, in_reg0, sink); + let offset: i32 = offset.into(); + sink.put1(offset as u8); + "#, + ), + ); + + let has_big_offset = InstructionPredicate::new_is_signed_int(format, "offset", 32, 0); + + // XX /r load with index and 32-bit offset. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("ldWithIndexDisp32", f_load_complex, 6) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![gpr]) + .inst_predicate(has_big_offset.clone()) + .clobbers_flags(false) + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink); + modrm_sib_disp32(out_reg0, sink); + sib(0, in_reg1, in_reg0, sink); + let offset: i32 = offset.into(); + sink.put4(offset as u32); + "#, + ), + ); + + // XX /r float load with index and 32-bit offset. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fldWithIndexDisp32", f_load_complex, 6) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![fpr]) + .inst_predicate(has_big_offset.clone()) + .clobbers_flags(false) + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink); + modrm_sib_disp32(out_reg0, sink); + sib(0, in_reg1, in_reg0, sink); + let offset: i32 = offset.into(); + sink.put4(offset as u32); + "#, + ), + ); + } + + // Unary fill with SIB and 32-bit displacement. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fillSib32", f_unary, 6) + .operands_in(vec![stack_gpr32]) + .operands_out(vec![gpr]) + .clobbers_flags(false) + .emit( + r#" + let base = stk_base(in_stk0.base); + {{PUT_OP}}(bits, rex2(base, out_reg0), sink); + modrm_sib_disp32(out_reg0, sink); + sib_noindex(base, sink); + sink.put4(in_stk0.offset as u32); + "#, + ), + ); + + // Like fillSib32, but targeting an FPR rather than a GPR. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("ffillSib32", f_unary, 6) + .operands_in(vec![stack_fpr32]) + .operands_out(vec![fpr]) + .clobbers_flags(false) + .emit( + r#" + let base = stk_base(in_stk0.base); + {{PUT_OP}}(bits, rex2(base, out_reg0), sink); + modrm_sib_disp32(out_reg0, sink); + sib_noindex(base, sink); + sink.put4(in_stk0.offset as u32); + "#, + ), + ); + + // Regfill with RSP-relative 32-bit displacement. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("regfill32", f_reg_fill, 6) + .operands_in(vec![stack_gpr32]) + .clobbers_flags(false) + .emit( + r#" + let src = StackRef::sp(src, &func.stack_slots); + let base = stk_base(src.base); + {{PUT_OP}}(bits, rex2(base, dst), sink); + modrm_sib_disp32(dst, sink); + sib_noindex(base, sink); + sink.put4(src.offset as u32); + "#, + ), + ); + + // Like regfill32, but targeting an FPR rather than a GPR. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fregfill32", f_reg_fill, 6) + .operands_in(vec![stack_fpr32]) + .clobbers_flags(false) + .emit( + r#" + let src = StackRef::sp(src, &func.stack_slots); + let base = stk_base(src.base); + {{PUT_OP}}(bits, rex2(base, dst), sink); + modrm_sib_disp32(dst, sink); + sib_noindex(base, sink); + sink.put4(src.offset as u32); + "#, + ), + ); + + // Call/return. + + recipes.add_template_recipe(EncodingRecipeBuilder::new("call_id", f_call, 4).emit( + r#" + sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); + {{PUT_OP}}(bits, BASE_REX, sink); + // The addend adjusts for the difference between the end of the + // instruction and the beginning of the immediate field. + sink.reloc_external(Reloc::X86CallPCRel4, + &func.dfg.ext_funcs[func_ref].name, + -4); + sink.put4(0); + "#, + )); + + recipes.add_template_recipe(EncodingRecipeBuilder::new("call_plt_id", f_call, 4).emit( + r#" + sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); + {{PUT_OP}}(bits, BASE_REX, sink); + sink.reloc_external(Reloc::X86CallPLTRel4, + &func.dfg.ext_funcs[func_ref].name, + -4); + sink.put4(0); + "#, + )); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("call_r", f_call_indirect, 1) + .operands_in(vec![gpr]) + .emit( + r#" + sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); + {{PUT_OP}}(bits, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("ret", f_multiary, 0).emit("{{PUT_OP}}(bits, BASE_REX, sink);"), + ); + + // Branches. + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("jmpb", f_jump, 1) + .branch_range((1, 8)) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, BASE_REX, sink); + disp1(destination, func, sink); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("jmpd", f_jump, 4) + .branch_range((4, 32)) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, BASE_REX, sink); + disp4(destination, func, sink); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("brib", f_branch_int, 1) + .operands_in(vec![reg_rflags]) + .branch_range((1, 8)) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits | icc2opc(cond), BASE_REX, sink); + disp1(destination, func, sink); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("brid", f_branch_int, 4) + .operands_in(vec![reg_rflags]) + .branch_range((4, 32)) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits | icc2opc(cond), BASE_REX, sink); + disp4(destination, func, sink); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("brfb", f_branch_float, 1) + .operands_in(vec![reg_rflags]) + .branch_range((1, 8)) + .clobbers_flags(false) + .inst_predicate(supported_floatccs_predicate( + &supported_floatccs, + formats.get(f_branch_float), + )) + .emit( + r#" + {{PUT_OP}}(bits | fcc2opc(cond), BASE_REX, sink); + disp1(destination, func, sink); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("brfd", f_branch_float, 4) + .operands_in(vec![reg_rflags]) + .branch_range((4, 32)) + .clobbers_flags(false) + .inst_predicate(supported_floatccs_predicate( + &supported_floatccs, + formats.get(f_branch_float), + )) + .emit( + r#" + {{PUT_OP}}(bits | fcc2opc(cond), BASE_REX, sink); + disp4(destination, func, sink); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("indirect_jmp", f_indirect_jump, 1) + .operands_in(vec![gpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("jt_entry", f_branch_table_entry, 2) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![gpr]) + .clobbers_flags(false) + .inst_predicate(valid_scale(formats.get(f_branch_table_entry))) + .compute_size("size_plus_maybe_offset_for_in_reg_1") + .emit( + r#" + {{PUT_OP}}(bits, rex3(in_reg1, out_reg0, in_reg0), sink); + if needs_offset(in_reg1) { + modrm_sib_disp8(out_reg0, sink); + sib(imm.trailing_zeros() as u8, in_reg0, in_reg1, sink); + sink.put1(0); + } else { + modrm_sib(out_reg0, sink); + sib(imm.trailing_zeros() as u8, in_reg0, in_reg1, sink); + } + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("jt_base", f_branch_table_base, 5) + .operands_out(vec![gpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(0, out_reg0), sink); + modrm_riprel(out_reg0, sink); + + // No reloc is needed here as the jump table is emitted directly after + // the function body. + jt_disp4(table, func, sink); + "#, + ), + ); + + // Test flags and set a register. + // + // These setCC instructions only set the low 8 bits, and they can only write ABCD registers + // without a REX prefix. + // + // Other instruction encodings accepting `b1` inputs have the same constraints and only look at + // the low 8 bits of the input register. + + let seti = recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("seti", f_int_cond, 1) + .operands_in(vec![reg_rflags]) + .operands_out(vec![gpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits | icc2opc(cond), rex1(out_reg0), sink); + modrm_r_bits(out_reg0, bits, sink); + "#, + ), + formats, + regs, + ) + .requires_prefix(true), + ); + + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("seti_abcd", f_int_cond, 1) + .operands_in(vec![reg_rflags]) + .operands_out(vec![abcd]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits | icc2opc(cond), rex1(out_reg0), sink); + modrm_r_bits(out_reg0, bits, sink); + "#, + ), + formats, + regs, + ) + .when_prefixed(seti), + ); + + let setf = recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("setf", f_float_cond, 1) + .operands_in(vec![reg_rflags]) + .operands_out(vec![gpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits | fcc2opc(cond), rex1(out_reg0), sink); + modrm_r_bits(out_reg0, bits, sink); + "#, + ), + formats, + regs, + ) + .requires_prefix(true), + ); + + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("setf_abcd", f_float_cond, 1) + .operands_in(vec![reg_rflags]) + .operands_out(vec![abcd]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits | fcc2opc(cond), rex1(out_reg0), sink); + modrm_r_bits(out_reg0, bits, sink); + "#, + ), + formats, + regs, + ) + .when_prefixed(setf), + ); + + // Conditional move (a.k.a integer select) + // (maybe-REX.W) 0F 4x modrm(r,r) + // 1 byte, modrm(r,r), is after the opcode + recipes.add_template_recipe( + EncodingRecipeBuilder::new("cmov", f_int_select, 1) + .operands_in(vec![ + OperandConstraint::FixedReg(reg_rflags), + OperandConstraint::RegClass(gpr), + OperandConstraint::RegClass(gpr), + ]) + .operands_out(vec![2]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits | icc2opc(cond), rex2(in_reg1, in_reg2), sink); + modrm_rr(in_reg1, in_reg2, sink); + "#, + ), + ); + + // Bit scan forwards and reverse + recipes.add_template_recipe( + EncodingRecipeBuilder::new("bsf_and_bsr", f_unary, 1) + .operands_in(vec![gpr]) + .operands_out(vec![ + OperandConstraint::RegClass(gpr), + OperandConstraint::FixedReg(reg_rflags), + ]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + modrm_rr(in_reg0, out_reg0, sink); + "#, + ), + ); + + // Compare and set flags. + + // XX /r, MR form. Compare two GPR registers and set flags. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("rcmp", f_binary, 1) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![reg_rflags]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); + modrm_rr(in_reg0, in_reg1, sink); + "#, + ), + ); + + // Same as rcmp, but second operand is the stack pointer. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("rcmp_sp", f_unary, 1) + .operands_in(vec![gpr]) + .operands_out(vec![reg_rflags]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, RU::rsp.into()), sink); + modrm_rr(in_reg0, RU::rsp.into(), sink); + "#, + ), + ); + + // XX /r, RM form. Compare two FPR registers and set flags. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fcmp", f_binary, 1) + .operands_in(vec![fpr, fpr]) + .operands_out(vec![reg_rflags]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + modrm_rr(in_reg1, in_reg0, sink); + "#, + ), + ); + + { + let format = formats.get(f_binary_imm); + + let has_small_offset = InstructionPredicate::new_is_signed_int(format, "imm", 8, 0); + + // XX /n, MI form with imm8. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("rcmp_ib", f_binary_imm, 2) + .operands_in(vec![gpr]) + .operands_out(vec![reg_rflags]) + .inst_predicate(has_small_offset) + .emit( + r#" + {{PUT_OP}}(bits, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + let imm: i64 = imm.into(); + sink.put1(imm as u8); + "#, + ), + ); + + let has_big_offset = InstructionPredicate::new_is_signed_int(format, "imm", 32, 0); + + // XX /n, MI form with imm32. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("rcmp_id", f_binary_imm, 5) + .operands_in(vec![gpr]) + .operands_out(vec![reg_rflags]) + .inst_predicate(has_big_offset) + .emit( + r#" + {{PUT_OP}}(bits, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + let imm: i64 = imm.into(); + sink.put4(imm as u32); + "#, + ), + ); + } + + // Test-and-branch. + // + // This recipe represents the macro fusion of a test and a conditional branch. + // This serves two purposes: + // + // 1. Guarantee that the test and branch get scheduled next to each other so + // macro fusion is guaranteed to be possible. + // 2. Hide the status flags from Cranelift which doesn't currently model flags. + // + // The encoding bits affect both the test and the branch instruction: + // + // Bits 0-7 are the Jcc opcode. + // Bits 8-15 control the test instruction which always has opcode byte 0x85. + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("tjccb", f_branch, 1 + 2) + .operands_in(vec![gpr]) + .branch_range((3, 8)) + .emit( + r#" + // test r, r. + {{PUT_OP}}((bits & 0xff00) | 0x85, rex2(in_reg0, in_reg0), sink); + modrm_rr(in_reg0, in_reg0, sink); + // Jcc instruction. + sink.put1(bits as u8); + disp1(destination, func, sink); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("tjccd", f_branch, 1 + 6) + .operands_in(vec![gpr]) + .branch_range((7, 32)) + .emit( + r#" + // test r, r. + {{PUT_OP}}((bits & 0xff00) | 0x85, rex2(in_reg0, in_reg0), sink); + modrm_rr(in_reg0, in_reg0, sink); + // Jcc instruction. + sink.put1(0x0f); + sink.put1(bits as u8); + disp4(destination, func, sink); + "#, + ), + ); + + // 8-bit test-and-branch. + + let t8jccb = recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("t8jccb", f_branch, 1 + 2) + .operands_in(vec![gpr]) + .branch_range((3, 8)) + .emit( + r#" + // test8 r, r. + {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink); + modrm_rr(in_reg0, in_reg0, sink); + // Jcc instruction. + sink.put1(bits as u8); + disp1(destination, func, sink); + "#, + ), + formats, + regs, + ) + .requires_prefix(true), + ); + + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("t8jccb_abcd", f_branch, 1 + 2) + .operands_in(vec![abcd]) + .branch_range((3, 8)) + .emit( + r#" + // test8 r, r. + {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink); + modrm_rr(in_reg0, in_reg0, sink); + // Jcc instruction. + sink.put1(bits as u8); + disp1(destination, func, sink); + "#, + ), + formats, + regs, + ) + .when_prefixed(t8jccb), + ); + + let t8jccd = recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("t8jccd", f_branch, 1 + 6) + .operands_in(vec![gpr]) + .branch_range((7, 32)) + .emit( + r#" + // test8 r, r. + {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink); + modrm_rr(in_reg0, in_reg0, sink); + // Jcc instruction. + sink.put1(0x0f); + sink.put1(bits as u8); + disp4(destination, func, sink); + "#, + ), + formats, + regs, + ) + .requires_prefix(true), + ); + + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("t8jccd_abcd", f_branch, 1 + 6) + .operands_in(vec![abcd]) + .branch_range((7, 32)) + .emit( + r#" + // test8 r, r. + {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink); + modrm_rr(in_reg0, in_reg0, sink); + // Jcc instruction. + sink.put1(0x0f); + sink.put1(bits as u8); + disp4(destination, func, sink); + "#, + ), + formats, + regs, + ) + .when_prefixed(t8jccd), + ); + + // Worst case test-and-branch recipe for brz.b1 and brnz.b1 in 32-bit mode. + // The register allocator can't handle a branch instruction with constrained + // operands like the t8jccd_abcd above. This variant can accept the b1 opernd in + // any register, but is is larger because it uses a 32-bit test instruction with + // a 0xff immediate. + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("t8jccd_long", f_branch, 5 + 6) + .operands_in(vec![gpr]) + .branch_range((11, 32)) + .emit( + r#" + // test32 r, 0xff. + {{PUT_OP}}((bits & 0xff00) | 0xf7, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + sink.put4(0xff); + // Jcc instruction. + sink.put1(0x0f); + sink.put1(bits as u8); + disp4(destination, func, sink); + "#, + ), + ); + + // Comparison that produces a `b1` result in a GPR. + // + // This is a macro of a `cmp` instruction followed by a `setCC` instruction. + // + // TODO This is not a great solution because: + // + // - The cmp+setcc combination is not recognized by CPU's macro fusion. + // - The 64-bit encoding has issues with REX prefixes. The `cmp` and `setCC` + // instructions may need a REX independently. + // - Modeling CPU flags in the type system would be better. + // + // Since the `setCC` instructions only write an 8-bit register, we use that as + // our `b1` representation: A `b1` value is represented as a GPR where the low 8 + // bits are known to be 0 or 1. The high bits are undefined. + // + // This bandaid macro doesn't support a REX prefix for the final `setCC` + // instruction, so it is limited to the `ABCD` register class for booleans. + // The omission of a `when_prefixed` alternative is deliberate here. + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("icscc", f_int_compare, 1 + 3) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![abcd]) + .emit( + r#" + // Comparison instruction. + {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); + modrm_rr(in_reg0, in_reg1, sink); + // `setCC` instruction, no REX. + use crate::ir::condcodes::IntCC::*; + let setcc = match cond { + Equal => 0x94, + NotEqual => 0x95, + SignedLessThan => 0x9c, + SignedGreaterThanOrEqual => 0x9d, + SignedGreaterThan => 0x9f, + SignedLessThanOrEqual => 0x9e, + UnsignedLessThan => 0x92, + UnsignedGreaterThanOrEqual => 0x93, + UnsignedGreaterThan => 0x97, + UnsignedLessThanOrEqual => 0x96, + }; + sink.put1(0x0f); + sink.put1(setcc); + modrm_rr(out_reg0, 0, sink); + "#, + ), + ); + + { + let format = formats.get(f_int_compare_imm); + + let is_small_imm = InstructionPredicate::new_is_signed_int(format, "imm", 8, 0); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("icscc_ib", f_int_compare_imm, 2 + 3) + .operands_in(vec![gpr]) + .operands_out(vec![abcd]) + .inst_predicate(is_small_imm) + .emit( + r#" + // Comparison instruction. + {{PUT_OP}}(bits, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + let imm: i64 = imm.into(); + sink.put1(imm as u8); + // `setCC` instruction, no REX. + use crate::ir::condcodes::IntCC::*; + let setcc = match cond { + Equal => 0x94, + NotEqual => 0x95, + SignedLessThan => 0x9c, + SignedGreaterThanOrEqual => 0x9d, + SignedGreaterThan => 0x9f, + SignedLessThanOrEqual => 0x9e, + UnsignedLessThan => 0x92, + UnsignedGreaterThanOrEqual => 0x93, + UnsignedGreaterThan => 0x97, + UnsignedLessThanOrEqual => 0x96, + }; + sink.put1(0x0f); + sink.put1(setcc); + modrm_rr(out_reg0, 0, sink); + "#, + ), + ); + + let is_big_imm = InstructionPredicate::new_is_signed_int(format, "imm", 32, 0); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("icscc_id", f_int_compare_imm, 5 + 3) + .operands_in(vec![gpr]) + .operands_out(vec![abcd]) + .inst_predicate(is_big_imm) + .emit( + r#" + // Comparison instruction. + {{PUT_OP}}(bits, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + let imm: i64 = imm.into(); + sink.put4(imm as u32); + // `setCC` instruction, no REX. + use crate::ir::condcodes::IntCC::*; + let setcc = match cond { + Equal => 0x94, + NotEqual => 0x95, + SignedLessThan => 0x9c, + SignedGreaterThanOrEqual => 0x9d, + SignedGreaterThan => 0x9f, + SignedLessThanOrEqual => 0x9e, + UnsignedLessThan => 0x92, + UnsignedGreaterThanOrEqual => 0x93, + UnsignedGreaterThan => 0x97, + UnsignedLessThanOrEqual => 0x96, + }; + sink.put1(0x0f); + sink.put1(setcc); + modrm_rr(out_reg0, 0, sink); + "#, + ), + ); + } + + // Make a FloatCompare instruction predicate with the supported condition codes. + // + // Same thing for floating point. + // + // The ucomiss/ucomisd instructions set the FLAGS bits CF/PF/CF like this: + // + // ZPC OSA + // UN 111 000 + // GT 000 000 + // LT 001 000 + // EQ 100 000 + // + // Not all floating point condition codes are supported. + // The omission of a `when_prefixed` alternative is deliberate here. + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fcscc", f_float_compare, 1 + 3) + .operands_in(vec![fpr, fpr]) + .operands_out(vec![abcd]) + .inst_predicate(supported_floatccs_predicate( + &supported_floatccs, + formats.get(f_float_compare), + )) + .emit( + r#" + // Comparison instruction. + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + modrm_rr(in_reg1, in_reg0, sink); + // `setCC` instruction, no REX. + use crate::ir::condcodes::FloatCC::*; + let setcc = match cond { + Ordered => 0x9b, // EQ|LT|GT => setnp (P=0) + Unordered => 0x9a, // UN => setp (P=1) + OrderedNotEqual => 0x95, // LT|GT => setne (Z=0), + UnorderedOrEqual => 0x94, // UN|EQ => sete (Z=1) + GreaterThan => 0x97, // GT => seta (C=0&Z=0) + GreaterThanOrEqual => 0x93, // GT|EQ => setae (C=0) + UnorderedOrLessThan => 0x92, // UN|LT => setb (C=1) + UnorderedOrLessThanOrEqual => 0x96, // UN|LT|EQ => setbe (Z=1|C=1) + Equal | // EQ + NotEqual | // UN|LT|GT + LessThan | // LT + LessThanOrEqual | // LT|EQ + UnorderedOrGreaterThan | // UN|GT + UnorderedOrGreaterThanOrEqual // UN|GT|EQ + => panic!("{} not supported by fcscc", cond), + }; + sink.put1(0x0f); + sink.put1(setcc); + modrm_rr(out_reg0, 0, sink); + "#, + ), + ); + + recipes +}