diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs index 68cc77be0e..ad4934ee97 100644 --- a/cranelift/codegen/meta/src/isa/x86/encodings.rs +++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs @@ -394,336 +394,41 @@ impl PerCpuModeEncodings { // Definitions. -#[allow(clippy::cognitive_complexity)] -pub(crate) fn define( - shared_defs: &SharedDefinitions, - settings: &SettingGroup, - x86: &InstructionGroup, - r: &RecipeGroup, -) -> PerCpuModeEncodings { +#[inline(never)] +fn define_moves(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r: &RecipeGroup) { let shared = &shared_defs.instructions; let formats = &shared_defs.formats; // Shorthands for instructions. - let adjust_sp_down = shared.by_name("adjust_sp_down"); - let adjust_sp_down_imm = shared.by_name("adjust_sp_down_imm"); - let adjust_sp_up_imm = shared.by_name("adjust_sp_up_imm"); - let band = shared.by_name("band"); - let band_imm = shared.by_name("band_imm"); - let band_not = shared.by_name("band_not"); let bconst = shared.by_name("bconst"); let bint = shared.by_name("bint"); - let bitcast = shared.by_name("bitcast"); - let bnot = shared.by_name("bnot"); - let bor = shared.by_name("bor"); - let bor_imm = shared.by_name("bor_imm"); - let brff = shared.by_name("brff"); - let brif = shared.by_name("brif"); - let brnz = shared.by_name("brnz"); - let brz = shared.by_name("brz"); - let bxor = shared.by_name("bxor"); - let bxor_imm = shared.by_name("bxor_imm"); - let call = shared.by_name("call"); - let call_indirect = shared.by_name("call_indirect"); - let ceil = shared.by_name("ceil"); - let clz = shared.by_name("clz"); let copy = shared.by_name("copy"); - let copy_nop = shared.by_name("copy_nop"); let copy_special = shared.by_name("copy_special"); let copy_to_ssa = shared.by_name("copy_to_ssa"); - let ctz = shared.by_name("ctz"); - let debugtrap = shared.by_name("debugtrap"); - let f32const = shared.by_name("f32const"); - let f64const = shared.by_name("f64const"); - let fadd = shared.by_name("fadd"); - let fcmp = shared.by_name("fcmp"); - let fcvt_from_sint = shared.by_name("fcvt_from_sint"); - let fdemote = shared.by_name("fdemote"); - let fdiv = shared.by_name("fdiv"); - let ffcmp = shared.by_name("ffcmp"); - let fill = shared.by_name("fill"); - let fill_nop = shared.by_name("fill_nop"); - let floor = shared.by_name("floor"); - let fmax = shared.by_name("fmax"); - let fmin = shared.by_name("fmin"); - let fmul = shared.by_name("fmul"); - let fpromote = shared.by_name("fpromote"); - let fsub = shared.by_name("fsub"); - let func_addr = shared.by_name("func_addr"); let get_pinned_reg = shared.by_name("get_pinned_reg"); - let iadd = shared.by_name("iadd"); - let iadd_ifcout = shared.by_name("iadd_ifcout"); - let iadd_ifcin = shared.by_name("iadd_ifcin"); - let iadd_ifcarry = shared.by_name("iadd_ifcarry"); - let iadd_imm = shared.by_name("iadd_imm"); - let icmp = shared.by_name("icmp"); - let icmp_imm = shared.by_name("icmp_imm"); let iconst = shared.by_name("iconst"); - let ifcmp = shared.by_name("ifcmp"); - let ifcmp_imm = shared.by_name("ifcmp_imm"); - let ifcmp_sp = shared.by_name("ifcmp_sp"); - let imul = shared.by_name("imul"); - let indirect_jump_table_br = shared.by_name("indirect_jump_table_br"); let ireduce = shared.by_name("ireduce"); - let ishl = shared.by_name("ishl"); - let ishl_imm = shared.by_name("ishl_imm"); - let is_null = shared.by_name("is_null"); - let istore16 = shared.by_name("istore16"); - let istore16_complex = shared.by_name("istore16_complex"); - let istore32 = shared.by_name("istore32"); - let istore32_complex = shared.by_name("istore32_complex"); - let istore8 = shared.by_name("istore8"); - let istore8_complex = shared.by_name("istore8_complex"); - let isub = shared.by_name("isub"); - let isub_ifbout = shared.by_name("isub_ifbout"); - let isub_ifbin = shared.by_name("isub_ifbin"); - let isub_ifborrow = shared.by_name("isub_ifborrow"); - let jump = shared.by_name("jump"); - let jump_table_base = shared.by_name("jump_table_base"); - let jump_table_entry = shared.by_name("jump_table_entry"); - let load = shared.by_name("load"); - let load_complex = shared.by_name("load_complex"); - let nearest = shared.by_name("nearest"); - let null = shared.by_name("null"); - let popcnt = shared.by_name("popcnt"); - let raw_bitcast = shared.by_name("raw_bitcast"); - let regfill = shared.by_name("regfill"); let regmove = shared.by_name("regmove"); - let regspill = shared.by_name("regspill"); - let return_ = shared.by_name("return"); - let rotl = shared.by_name("rotl"); - let rotl_imm = shared.by_name("rotl_imm"); - let rotr = shared.by_name("rotr"); - let rotr_imm = shared.by_name("rotr_imm"); - let sadd_sat = shared.by_name("sadd_sat"); - let safepoint = shared.by_name("safepoint"); - let scalar_to_vector = shared.by_name("scalar_to_vector"); - let selectif = shared.by_name("selectif"); let sextend = shared.by_name("sextend"); let set_pinned_reg = shared.by_name("set_pinned_reg"); - let sload16 = shared.by_name("sload16"); - let sload16_complex = shared.by_name("sload16_complex"); - let sload32 = shared.by_name("sload32"); - let sload32_complex = shared.by_name("sload32_complex"); - let sload8 = shared.by_name("sload8"); - let sload8_complex = shared.by_name("sload8_complex"); - let spill = shared.by_name("spill"); - let sqrt = shared.by_name("sqrt"); - let sshr = shared.by_name("sshr"); - let sshr_imm = shared.by_name("sshr_imm"); - let ssub_sat = shared.by_name("ssub_sat"); - let stack_addr = shared.by_name("stack_addr"); - let store = shared.by_name("store"); - let store_complex = shared.by_name("store_complex"); - let symbol_value = shared.by_name("symbol_value"); - let trap = shared.by_name("trap"); - let trapff = shared.by_name("trapff"); - let trapif = shared.by_name("trapif"); - let resumable_trap = shared.by_name("resumable_trap"); - let trueff = shared.by_name("trueff"); - let trueif = shared.by_name("trueif"); - let trunc = shared.by_name("trunc"); - let uadd_sat = shared.by_name("uadd_sat"); let uextend = shared.by_name("uextend"); - let uload16 = shared.by_name("uload16"); - let uload16_complex = shared.by_name("uload16_complex"); - let uload32 = shared.by_name("uload32"); - let uload32_complex = shared.by_name("uload32_complex"); - let uload8 = shared.by_name("uload8"); - let uload8_complex = shared.by_name("uload8_complex"); - let ushr = shared.by_name("ushr"); - let ushr_imm = shared.by_name("ushr_imm"); - let usub_sat = shared.by_name("usub_sat"); - let vconst = shared.by_name("vconst"); - let x86_bsf = x86.by_name("x86_bsf"); - let x86_bsr = x86.by_name("x86_bsr"); - let x86_cvtt2si = x86.by_name("x86_cvtt2si"); - let x86_fmax = x86.by_name("x86_fmax"); - let x86_fmin = x86.by_name("x86_fmin"); - let x86_insertps = x86.by_name("x86_insertps"); - let x86_movlhps = x86.by_name("x86_movlhps"); - let x86_movsd = x86.by_name("x86_movsd"); - let x86_pop = x86.by_name("x86_pop"); - let x86_pextr = x86.by_name("x86_pextr"); - let x86_pinsr = x86.by_name("x86_pinsr"); - let x86_pmaxs = x86.by_name("x86_pmaxs"); - let x86_pmaxu = x86.by_name("x86_pmaxu"); - let x86_pmins = x86.by_name("x86_pmins"); - let x86_pminu = x86.by_name("x86_pminu"); - let x86_pshufd = x86.by_name("x86_pshufd"); - let x86_pshufb = x86.by_name("x86_pshufb"); - let x86_psll = x86.by_name("x86_psll"); - let x86_psra = x86.by_name("x86_psra"); - let x86_psrl = x86.by_name("x86_psrl"); - let x86_ptest = x86.by_name("x86_ptest"); - let x86_push = x86.by_name("x86_push"); - let x86_sdivmodx = x86.by_name("x86_sdivmodx"); - let x86_smulx = x86.by_name("x86_smulx"); - let x86_udivmodx = x86.by_name("x86_udivmodx"); - let x86_umulx = x86.by_name("x86_umulx"); // Shorthands for recipes. - let rec_adjustsp = r.template("adjustsp"); - let rec_adjustsp_ib = r.template("adjustsp_ib"); - let rec_adjustsp_id = r.template("adjustsp_id"); - let rec_allones_fnaddr4 = r.template("allones_fnaddr4"); - let rec_allones_fnaddr8 = r.template("allones_fnaddr8"); - let rec_brfb = r.template("brfb"); - let rec_brfd = r.template("brfd"); - let rec_brib = r.template("brib"); - let rec_brid = r.template("brid"); - let rec_bsf_and_bsr = r.template("bsf_and_bsr"); - let rec_call_id = r.template("call_id"); - let rec_call_plt_id = r.template("call_plt_id"); - let rec_call_r = r.template("call_r"); - let rec_cmov = r.template("cmov"); let rec_copysp = r.template("copysp"); - let rec_div = r.template("div"); - let rec_debugtrap = r.recipe("debugtrap"); - let rec_f_ib = r.template("f_ib"); - let rec_f32imm_z = r.template("f32imm_z"); - let rec_f64imm_z = r.template("f64imm_z"); - let rec_fa = r.template("fa"); - let rec_fax = r.template("fax"); - let rec_fa_ib = r.template("fa_ib"); - let rec_fcmp = r.template("fcmp"); - let rec_fcscc = r.template("fcscc"); - let rec_ffillnull = r.recipe("ffillnull"); - let rec_ffillSib32 = r.template("ffillSib32"); - let rec_fillnull = r.recipe("fillnull"); - let rec_fillSib32 = r.template("fillSib32"); - let rec_fld = r.template("fld"); - let rec_fldDisp32 = r.template("fldDisp32"); - let rec_fldDisp8 = r.template("fldDisp8"); - let rec_fldWithIndex = r.template("fldWithIndex"); - let rec_fldWithIndexDisp32 = r.template("fldWithIndexDisp32"); - let rec_fldWithIndexDisp8 = r.template("fldWithIndexDisp8"); - let rec_fnaddr4 = r.template("fnaddr4"); - let rec_fnaddr8 = r.template("fnaddr8"); - let rec_fregfill32 = r.template("fregfill32"); - let rec_fregspill32 = r.template("fregspill32"); - let rec_frmov = r.template("frmov"); - let rec_frurm = r.template("frurm"); - let rec_fspillSib32 = r.template("fspillSib32"); - let rec_fst = r.template("fst"); - let rec_fstDisp32 = r.template("fstDisp32"); - let rec_fstDisp8 = r.template("fstDisp8"); - let rec_fstWithIndex = r.template("fstWithIndex"); - let rec_fstWithIndexDisp32 = r.template("fstWithIndexDisp32"); - let rec_fstWithIndexDisp8 = r.template("fstWithIndexDisp8"); - let rec_furm = r.template("furm"); let rec_furm_reg_to_ssa = r.template("furm_reg_to_ssa"); - let rec_furmi_rnd = r.template("furmi_rnd"); let rec_get_pinned_reg = r.recipe("get_pinned_reg"); - let rec_got_fnaddr8 = r.template("got_fnaddr8"); - let rec_got_gvaddr8 = r.template("got_gvaddr8"); - let rec_gvaddr4 = r.template("gvaddr4"); - let rec_gvaddr8 = r.template("gvaddr8"); - let rec_icscc = r.template("icscc"); - let rec_icscc_fpr = r.template("icscc_fpr"); - let rec_icscc_ib = r.template("icscc_ib"); - let rec_icscc_id = r.template("icscc_id"); - let rec_indirect_jmp = r.template("indirect_jmp"); - let rec_is_zero = r.template("is_zero"); - let rec_jmpb = r.template("jmpb"); - let rec_jmpd = r.template("jmpd"); - let rec_jt_base = r.template("jt_base"); - let rec_jt_entry = r.template("jt_entry"); - let rec_ld = r.template("ld"); - let rec_ldDisp32 = r.template("ldDisp32"); - let rec_ldDisp8 = r.template("ldDisp8"); - let rec_ldWithIndex = r.template("ldWithIndex"); - let rec_ldWithIndexDisp32 = r.template("ldWithIndexDisp32"); - let rec_ldWithIndexDisp8 = r.template("ldWithIndexDisp8"); - let rec_mulx = r.template("mulx"); let rec_null = r.recipe("null"); - let rec_null_fpr = r.recipe("null_fpr"); - let rec_pcrel_fnaddr8 = r.template("pcrel_fnaddr8"); - let rec_pcrel_gvaddr8 = r.template("pcrel_gvaddr8"); - let rec_pfcmp = r.template("pfcmp"); - let rec_popq = r.template("popq"); let rec_pu_id = r.template("pu_id"); let rec_pu_id_bool = r.template("pu_id_bool"); - let rec_pu_id_ref = r.template("pu_id_ref"); let rec_pu_iq = r.template("pu_iq"); - let rec_pushq = r.template("pushq"); - let rec_ret = r.template("ret"); - let rec_r_ib = r.template("r_ib"); - let rec_r_ib_unsigned_gpr = r.template("r_ib_unsigned_gpr"); - let rec_r_ib_unsigned_fpr = r.template("r_ib_unsigned_fpr"); - let rec_r_ib_unsigned_r = r.template("r_ib_unsigned_r"); - let rec_r_id = r.template("r_id"); - let rec_rcmp = r.template("rcmp"); - let rec_rcmp_ib = r.template("rcmp_ib"); - let rec_rcmp_id = r.template("rcmp_id"); - let rec_rcmp_sp = r.template("rcmp_sp"); - let rec_regfill32 = r.template("regfill32"); - let rec_regspill32 = r.template("regspill32"); - let rec_rc = r.template("rc"); - let rec_rfumr = r.template("rfumr"); - let rec_rfurm = r.template("rfurm"); let rec_rmov = r.template("rmov"); - let rec_rr = r.template("rr"); - let rec_rout = r.template("rout"); - let rec_rin = r.template("rin"); - let rec_rio = r.template("rio"); - let rec_rrx = r.template("rrx"); - let rec_safepoint = r.recipe("safepoint"); - let rec_setf_abcd = r.template("setf_abcd"); - let rec_seti_abcd = r.template("seti_abcd"); let rec_set_pinned_reg = r.template("set_pinned_reg"); - let rec_spaddr4_id = r.template("spaddr4_id"); - let rec_spaddr8_id = r.template("spaddr8_id"); - let rec_spillSib32 = r.template("spillSib32"); - let rec_st = r.template("st"); - let rec_stacknull = r.recipe("stacknull"); - let rec_stDisp32 = r.template("stDisp32"); - let rec_stDisp32_abcd = r.template("stDisp32_abcd"); - let rec_stDisp8 = r.template("stDisp8"); - let rec_stDisp8_abcd = r.template("stDisp8_abcd"); - let rec_stWithIndex = r.template("stWithIndex"); - let rec_stWithIndexDisp32 = r.template("stWithIndexDisp32"); - let rec_stWithIndexDisp32_abcd = r.template("stWithIndexDisp32_abcd"); - let rec_stWithIndexDisp8 = r.template("stWithIndexDisp8"); - let rec_stWithIndexDisp8_abcd = r.template("stWithIndexDisp8_abcd"); - let rec_stWithIndex_abcd = r.template("stWithIndex_abcd"); - let rec_st_abcd = r.template("st_abcd"); - let rec_t8jccb_abcd = r.template("t8jccb_abcd"); - let rec_t8jccd_abcd = r.template("t8jccd_abcd"); - let rec_t8jccd_long = r.template("t8jccd_long"); - let rec_tjccb = r.template("tjccb"); - let rec_tjccd = r.template("tjccd"); - let rec_trap = r.template("trap"); - let rec_trapif = r.recipe("trapif"); - let rec_trapff = r.recipe("trapff"); let rec_u_id = r.template("u_id"); let rec_u_id_z = r.template("u_id_z"); let rec_umr = r.template("umr"); let rec_umr_reg_to_ssa = r.template("umr_reg_to_ssa"); - let rec_ur = r.template("ur"); - let rec_urm = r.template("urm"); let rec_urm_noflags = r.template("urm_noflags"); let rec_urm_noflags_abcd = r.template("urm_noflags_abcd"); - let rec_vconst = r.template("vconst"); - let rec_vconst_optimized = r.template("vconst_optimized"); - - // Predicates shorthands. - let all_ones_funcaddrs_and_not_is_pic = - settings.predicate_by_name("all_ones_funcaddrs_and_not_is_pic"); - let is_pic = settings.predicate_by_name("is_pic"); - let not_all_ones_funcaddrs_and_not_is_pic = - settings.predicate_by_name("not_all_ones_funcaddrs_and_not_is_pic"); - let not_is_pic = settings.predicate_by_name("not_is_pic"); - let use_popcnt = settings.predicate_by_name("use_popcnt"); - let use_lzcnt = settings.predicate_by_name("use_lzcnt"); - let use_bmi1 = settings.predicate_by_name("use_bmi1"); - let use_sse41 = settings.predicate_by_name("use_sse41"); - let use_ssse3_simd = settings.predicate_by_name("use_ssse3_simd"); - let use_sse41_simd = settings.predicate_by_name("use_sse41_simd"); - let use_sse42_simd = settings.predicate_by_name("use_sse42_simd"); - - // Definitions. - let mut e = PerCpuModeEncodings::new(); // The pinned reg is fixed to a certain value entirely user-controlled, so it generates nothing! e.enc64_rec(get_pinned_reg.bind(I64), rec_get_pinned_reg, 0); @@ -732,41 +437,6 @@ pub(crate) fn define( rec_set_pinned_reg.opcodes(&MOV_STORE).rex().w(), ); - e.enc_i32_i64(iadd, rec_rr.opcodes(&ADD)); - e.enc_i32_i64(iadd_ifcout, rec_rout.opcodes(&ADD)); - e.enc_i32_i64(iadd_ifcin, rec_rin.opcodes(&ADC)); - e.enc_i32_i64(iadd_ifcarry, rec_rio.opcodes(&ADC)); - - e.enc_i32_i64(isub, rec_rr.opcodes(&SUB)); - e.enc_i32_i64(isub_ifbout, rec_rout.opcodes(&SUB)); - e.enc_i32_i64(isub_ifbin, rec_rin.opcodes(&SBB)); - e.enc_i32_i64(isub_ifborrow, rec_rio.opcodes(&SBB)); - - e.enc_i32_i64(band, rec_rr.opcodes(&AND)); - e.enc_b32_b64(band, rec_rr.opcodes(&AND)); - e.enc_i32_i64(bor, rec_rr.opcodes(&OR)); - e.enc_b32_b64(bor, rec_rr.opcodes(&OR)); - e.enc_i32_i64(bxor, rec_rr.opcodes(&XOR)); - e.enc_b32_b64(bxor, rec_rr.opcodes(&XOR)); - - // x86 has a bitwise not instruction NOT. - e.enc_i32_i64(bnot, rec_ur.opcodes(&NOT).rrr(2)); - e.enc_b32_b64(bnot, rec_ur.opcodes(&NOT).rrr(2)); - - // Also add a `b1` encodings for the logic instructions. - // TODO: Should this be done with 8-bit instructions? It would improve partial register - // dependencies. - e.enc_both(band.bind(B1), rec_rr.opcodes(&AND)); - e.enc_both(bor.bind(B1), rec_rr.opcodes(&OR)); - e.enc_both(bxor.bind(B1), rec_rr.opcodes(&XOR)); - - e.enc_i32_i64(imul, rec_rrx.opcodes(&IMUL)); - e.enc_i32_i64(x86_sdivmodx, rec_div.opcodes(&IDIV).rrr(7)); - e.enc_i32_i64(x86_udivmodx, rec_div.opcodes(&DIV).rrr(6)); - - e.enc_i32_i64(x86_smulx, rec_mulx.opcodes(&IMUL_RDX_RAX).rrr(5)); - e.enc_i32_i64(x86_umulx, rec_mulx.opcodes(&MUL).rrr(4)); - e.enc_i32_i64(copy, rec_umr.opcodes(&MOV_STORE)); e.enc_r32_r64_rex_only(copy, rec_umr.opcodes(&MOV_STORE)); e.enc_both(copy.bind(B1), rec_umr.opcodes(&MOV_STORE)); @@ -790,21 +460,6 @@ pub(crate) fn define( e.enc64(regmove.bind(R32), rec_rmov.opcodes(&MOV_STORE).rex()); e.enc64(regmove.bind(R64), rec_rmov.opcodes(&MOV_STORE).rex().w()); - e.enc_i32_i64(iadd_imm, rec_r_ib.opcodes(&ADD_IMM8_SIGN_EXTEND).rrr(0)); - e.enc_i32_i64(iadd_imm, rec_r_id.opcodes(&ADD_IMM).rrr(0)); - - e.enc_i32_i64(band_imm, rec_r_ib.opcodes(&AND_IMM8_SIGN_EXTEND).rrr(4)); - e.enc_i32_i64(band_imm, rec_r_id.opcodes(&AND_IMM).rrr(4)); - - e.enc_i32_i64(bor_imm, rec_r_ib.opcodes(&OR_IMM8_SIGN_EXTEND).rrr(1)); - e.enc_i32_i64(bor_imm, rec_r_id.opcodes(&OR_IMM).rrr(1)); - - e.enc_i32_i64(bxor_imm, rec_r_ib.opcodes(&XOR_IMM8_SIGN_EXTEND).rrr(6)); - e.enc_i32_i64(bxor_imm, rec_r_id.opcodes(&XOR_IMM).rrr(6)); - - // TODO: band_imm.i64 with an unsigned 32-bit immediate can be encoded as band_imm.i32. Can - // even use the single-byte immediate for 0xffff_ffXX masks. - // Immediate constants. e.enc32(iconst.bind(I32), rec_pu_id.opcodes(&MOV_IMM)); @@ -845,6 +500,7 @@ pub(crate) fn define( rec_u_id_z.opcodes(&XORB), is_zero_int.clone(), ); + // You may expect that i16 encodings would have an 0x66 prefix on the opcode to indicate that // encodings should be on 16-bit operands (f.ex, "xor %ax, %ax"). Cranelift currently does not // know that it can drop the 0x66 prefix and clear the upper half of a 32-bit register in these @@ -864,58 +520,252 @@ pub(crate) fn define( ); e.enc_x86_64_instp(iconst.bind(I64), rec_u_id_z.opcodes(&XOR), is_zero_int); - // Shifts and rotates. - // Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit - // and 16-bit shifts would need explicit masking. + // Numerical conversions. - for &(inst, rrr) in &[(rotl, 0), (rotr, 1), (ishl, 4), (ushr, 5), (sshr, 7)] { - // Cannot use enc_i32_i64 for this pattern because instructions require - // to bind any. - e.enc32( - inst.bind(I32).bind(Any), - rec_rc.opcodes(&ROTATE_CL).rrr(rrr), - ); - e.enc64( - inst.bind(I64).bind(Any), - rec_rc.opcodes(&ROTATE_CL).rrr(rrr).rex().w(), - ); - e.enc64( - inst.bind(I32).bind(Any), - rec_rc.opcodes(&ROTATE_CL).rrr(rrr).rex(), - ); - e.enc64( - inst.bind(I32).bind(Any), - rec_rc.opcodes(&ROTATE_CL).rrr(rrr), - ); + // Reducing an integer is a no-op. + e.enc32_rec(ireduce.bind(I8).bind(I16), rec_null, 0); + e.enc32_rec(ireduce.bind(I8).bind(I32), rec_null, 0); + e.enc32_rec(ireduce.bind(I16).bind(I32), rec_null, 0); + + e.enc64_rec(ireduce.bind(I8).bind(I16), rec_null, 0); + e.enc64_rec(ireduce.bind(I8).bind(I32), rec_null, 0); + e.enc64_rec(ireduce.bind(I16).bind(I32), rec_null, 0); + e.enc64_rec(ireduce.bind(I8).bind(I64), rec_null, 0); + e.enc64_rec(ireduce.bind(I16).bind(I64), rec_null, 0); + e.enc64_rec(ireduce.bind(I32).bind(I64), rec_null, 0); + + // TODO: Add encodings for cbw, cwde, cdqe, which are sign-extending + // instructions for %al/%ax/%eax to %ax/%eax/%rax. + + // movsbl + e.enc32( + sextend.bind(I32).bind(I8), + rec_urm_noflags_abcd.opcodes(&MOVSX_BYTE), + ); + e.enc64( + sextend.bind(I32).bind(I8), + rec_urm_noflags.opcodes(&MOVSX_BYTE).rex(), + ); + e.enc64( + sextend.bind(I32).bind(I8), + rec_urm_noflags_abcd.opcodes(&MOVSX_BYTE), + ); + + // movswl + e.enc32( + sextend.bind(I32).bind(I16), + rec_urm_noflags.opcodes(&MOVSX_WORD), + ); + e.enc64( + sextend.bind(I32).bind(I16), + rec_urm_noflags.opcodes(&MOVSX_WORD).rex(), + ); + e.enc64( + sextend.bind(I32).bind(I16), + rec_urm_noflags.opcodes(&MOVSX_WORD), + ); + + // movsbq + e.enc64( + sextend.bind(I64).bind(I8), + rec_urm_noflags.opcodes(&MOVSX_BYTE).rex().w(), + ); + + // movswq + e.enc64( + sextend.bind(I64).bind(I16), + rec_urm_noflags.opcodes(&MOVSX_WORD).rex().w(), + ); + + // movslq + e.enc64( + sextend.bind(I64).bind(I32), + rec_urm_noflags.opcodes(&MOVSXD).rex().w(), + ); + + // movzbl + e.enc32( + uextend.bind(I32).bind(I8), + rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), + ); + e.enc64( + uextend.bind(I32).bind(I8), + rec_urm_noflags.opcodes(&MOVZX_BYTE).rex(), + ); + e.enc64( + uextend.bind(I32).bind(I8), + rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), + ); + + // movzwl + e.enc32( + uextend.bind(I32).bind(I16), + rec_urm_noflags.opcodes(&MOVZX_WORD), + ); + e.enc64( + uextend.bind(I32).bind(I16), + rec_urm_noflags.opcodes(&MOVZX_WORD).rex(), + ); + e.enc64( + uextend.bind(I32).bind(I16), + rec_urm_noflags.opcodes(&MOVZX_WORD), + ); + + // movzbq, encoded as movzbl because it's equivalent and shorter. + e.enc64( + uextend.bind(I64).bind(I8), + rec_urm_noflags.opcodes(&MOVZX_BYTE).rex(), + ); + e.enc64( + uextend.bind(I64).bind(I8), + rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), + ); + + // movzwq, encoded as movzwl because it's equivalent and shorter + e.enc64( + uextend.bind(I64).bind(I16), + rec_urm_noflags.opcodes(&MOVZX_WORD).rex(), + ); + e.enc64( + uextend.bind(I64).bind(I16), + rec_urm_noflags.opcodes(&MOVZX_WORD), + ); + + // A 32-bit register copy clears the high 32 bits. + e.enc64( + uextend.bind(I64).bind(I32), + rec_umr.opcodes(&MOV_STORE).rex(), + ); + e.enc64(uextend.bind(I64).bind(I32), rec_umr.opcodes(&MOV_STORE)); + + // Convert bool to int. + // + // This assumes that b1 is represented as an 8-bit low register with the value 0 + // or 1. + // + // Encode movzbq as movzbl, because it's equivalent and shorter. + for &to in &[I8, I16, I32, I64] { + for &from in &[B1, B8] { + e.enc64( + bint.bind(to).bind(from), + rec_urm_noflags.opcodes(&MOVZX_BYTE).rex(), + ); + e.enc64( + bint.bind(to).bind(from), + rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), + ); + if to != I64 { + e.enc32( + bint.bind(to).bind(from), + rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), + ); + } + } } - e.enc_i32_i64(rotl_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(0)); - e.enc_i32_i64(rotr_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(1)); - e.enc_i32_i64(ishl_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(4)); - e.enc_i32_i64(ushr_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(5)); - e.enc_i32_i64(sshr_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(7)); + // Copy Special + // For x86-64, only define REX forms for now, since we can't describe the + // special regunit immediate operands with the current constraint language. + e.enc64(copy_special, rec_copysp.opcodes(&MOV_STORE).rex().w()); + e.enc32(copy_special, rec_copysp.opcodes(&MOV_STORE)); - // Population count. - e.enc32_isap(popcnt.bind(I32), rec_urm.opcodes(&POPCNT), use_popcnt); - e.enc64_isap( - popcnt.bind(I64), - rec_urm.opcodes(&POPCNT).rex().w(), - use_popcnt, + // Copy to SSA. These have to be done with special _rex_only encoders, because the standard + // machinery for deciding whether a REX.{RXB} prefix is needed doesn't take into account + // the source register, which is specified directly in the instruction. + e.enc_i32_i64_rex_only(copy_to_ssa, rec_umr_reg_to_ssa.opcodes(&MOV_STORE)); + e.enc_r32_r64_rex_only(copy_to_ssa, rec_umr_reg_to_ssa.opcodes(&MOV_STORE)); + e.enc_both_rex_only(copy_to_ssa.bind(B1), rec_umr_reg_to_ssa.opcodes(&MOV_STORE)); + e.enc_both_rex_only(copy_to_ssa.bind(I8), rec_umr_reg_to_ssa.opcodes(&MOV_STORE)); + e.enc_both_rex_only( + copy_to_ssa.bind(I16), + rec_umr_reg_to_ssa.opcodes(&MOV_STORE), ); - e.enc64_isap(popcnt.bind(I32), rec_urm.opcodes(&POPCNT).rex(), use_popcnt); - e.enc64_isap(popcnt.bind(I32), rec_urm.opcodes(&POPCNT), use_popcnt); + e.enc_both_rex_only( + copy_to_ssa.bind(F64), + rec_furm_reg_to_ssa.opcodes(&MOVSD_LOAD), + ); + e.enc_both_rex_only( + copy_to_ssa.bind(F32), + rec_furm_reg_to_ssa.opcodes(&MOVSS_LOAD), + ); +} - // Count leading zero bits. - e.enc32_isap(clz.bind(I32), rec_urm.opcodes(&LZCNT), use_lzcnt); - e.enc64_isap(clz.bind(I64), rec_urm.opcodes(&LZCNT).rex().w(), use_lzcnt); - e.enc64_isap(clz.bind(I32), rec_urm.opcodes(&LZCNT).rex(), use_lzcnt); - e.enc64_isap(clz.bind(I32), rec_urm.opcodes(&LZCNT), use_lzcnt); +#[inline(never)] +fn define_memory( + e: &mut PerCpuModeEncodings, + shared_defs: &SharedDefinitions, + x86: &InstructionGroup, + r: &RecipeGroup, +) { + let shared = &shared_defs.instructions; + let formats = &shared_defs.formats; - // Count trailing zero bits. - e.enc32_isap(ctz.bind(I32), rec_urm.opcodes(&TZCNT), use_bmi1); - e.enc64_isap(ctz.bind(I64), rec_urm.opcodes(&TZCNT).rex().w(), use_bmi1); - e.enc64_isap(ctz.bind(I32), rec_urm.opcodes(&TZCNT).rex(), use_bmi1); - e.enc64_isap(ctz.bind(I32), rec_urm.opcodes(&TZCNT), use_bmi1); + // Shorthands for instructions. + let adjust_sp_down = shared.by_name("adjust_sp_down"); + let adjust_sp_down_imm = shared.by_name("adjust_sp_down_imm"); + let adjust_sp_up_imm = shared.by_name("adjust_sp_up_imm"); + let copy_nop = shared.by_name("copy_nop"); + let fill = shared.by_name("fill"); + let fill_nop = shared.by_name("fill_nop"); + let istore16 = shared.by_name("istore16"); + let istore16_complex = shared.by_name("istore16_complex"); + let istore32 = shared.by_name("istore32"); + let istore32_complex = shared.by_name("istore32_complex"); + let istore8 = shared.by_name("istore8"); + let istore8_complex = shared.by_name("istore8_complex"); + let load = shared.by_name("load"); + let load_complex = shared.by_name("load_complex"); + let regfill = shared.by_name("regfill"); + let regspill = shared.by_name("regspill"); + let sload16 = shared.by_name("sload16"); + let sload16_complex = shared.by_name("sload16_complex"); + let sload32 = shared.by_name("sload32"); + let sload32_complex = shared.by_name("sload32_complex"); + let sload8 = shared.by_name("sload8"); + let sload8_complex = shared.by_name("sload8_complex"); + let spill = shared.by_name("spill"); + let store = shared.by_name("store"); + let store_complex = shared.by_name("store_complex"); + let uload16 = shared.by_name("uload16"); + let uload16_complex = shared.by_name("uload16_complex"); + let uload32 = shared.by_name("uload32"); + let uload32_complex = shared.by_name("uload32_complex"); + let uload8 = shared.by_name("uload8"); + let uload8_complex = shared.by_name("uload8_complex"); + let x86_pop = x86.by_name("x86_pop"); + let x86_push = x86.by_name("x86_push"); + + // Shorthands for recipes. + let rec_adjustsp = r.template("adjustsp"); + let rec_adjustsp_ib = r.template("adjustsp_ib"); + let rec_adjustsp_id = r.template("adjustsp_id"); + let rec_ffillnull = r.recipe("ffillnull"); + let rec_fillnull = r.recipe("fillnull"); + let rec_fillSib32 = r.template("fillSib32"); + let rec_ld = r.template("ld"); + let rec_ldDisp32 = r.template("ldDisp32"); + let rec_ldDisp8 = r.template("ldDisp8"); + let rec_ldWithIndex = r.template("ldWithIndex"); + let rec_ldWithIndexDisp32 = r.template("ldWithIndexDisp32"); + let rec_ldWithIndexDisp8 = r.template("ldWithIndexDisp8"); + let rec_popq = r.template("popq"); + let rec_pushq = r.template("pushq"); + let rec_regfill32 = r.template("regfill32"); + let rec_regspill32 = r.template("regspill32"); + let rec_spillSib32 = r.template("spillSib32"); + let rec_st = r.template("st"); + let rec_stacknull = r.recipe("stacknull"); + let rec_stDisp32 = r.template("stDisp32"); + let rec_stDisp32_abcd = r.template("stDisp32_abcd"); + let rec_stDisp8 = r.template("stDisp8"); + let rec_stDisp8_abcd = r.template("stDisp8_abcd"); + let rec_stWithIndex = r.template("stWithIndex"); + let rec_stWithIndexDisp32 = r.template("stWithIndexDisp32"); + let rec_stWithIndexDisp32_abcd = r.template("stWithIndexDisp32_abcd"); + let rec_stWithIndexDisp8 = r.template("stWithIndexDisp8"); + let rec_stWithIndexDisp8_abcd = r.template("stWithIndexDisp8_abcd"); + let rec_stWithIndex_abcd = r.template("stWithIndex_abcd"); + let rec_st_abcd = r.template("st_abcd"); // Loads and stores. let is_load_complex_length_two = @@ -1080,32 +930,6 @@ pub(crate) fn define( e.enc32(x86_pop.bind(I32), rec_popq.opcodes(&POP_REG)); e.enc_x86_64(x86_pop.bind(I64), rec_popq.opcodes(&POP_REG)); - // Copy Special - // For x86-64, only define REX forms for now, since we can't describe the - // special regunit immediate operands with the current constraint language. - e.enc64(copy_special, rec_copysp.opcodes(&MOV_STORE).rex().w()); - e.enc32(copy_special, rec_copysp.opcodes(&MOV_STORE)); - - // Copy to SSA. These have to be done with special _rex_only encoders, because the standard - // machinery for deciding whether a REX.{RXB} prefix is needed doesn't take into account - // the source register, which is specified directly in the instruction. - e.enc_i32_i64_rex_only(copy_to_ssa, rec_umr_reg_to_ssa.opcodes(&MOV_STORE)); - e.enc_r32_r64_rex_only(copy_to_ssa, rec_umr_reg_to_ssa.opcodes(&MOV_STORE)); - e.enc_both_rex_only(copy_to_ssa.bind(B1), rec_umr_reg_to_ssa.opcodes(&MOV_STORE)); - e.enc_both_rex_only(copy_to_ssa.bind(I8), rec_umr_reg_to_ssa.opcodes(&MOV_STORE)); - e.enc_both_rex_only( - copy_to_ssa.bind(I16), - rec_umr_reg_to_ssa.opcodes(&MOV_STORE), - ); - e.enc_both_rex_only( - copy_to_ssa.bind(F64), - rec_furm_reg_to_ssa.opcodes(&MOVSD_LOAD), - ); - e.enc_both_rex_only( - copy_to_ssa.bind(F32), - rec_furm_reg_to_ssa.opcodes(&MOVSS_LOAD), - ); - // Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn // into a no-op. // The same encoding is generated for both the 64- and 32-bit architectures. @@ -1151,6 +975,94 @@ pub(crate) fn define( adjust_sp_down_imm, rec_adjustsp_id.opcodes(&CMP_IMM).rrr(5).rex().w(), ); +} + +#[inline(never)] +fn define_fpu_moves(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r: &RecipeGroup) { + let shared = &shared_defs.instructions; + + // Shorthands for instructions. + let bitcast = shared.by_name("bitcast"); + let copy = shared.by_name("copy"); + let regmove = shared.by_name("regmove"); + + // Shorthands for recipes. + let rec_frmov = r.template("frmov"); + let rec_frurm = r.template("frurm"); + let rec_furm = r.template("furm"); + let rec_rfumr = r.template("rfumr"); + + // Floating-point moves. + // movd + e.enc_both( + bitcast.bind(F32).bind(I32), + rec_frurm.opcodes(&MOVD_LOAD_XMM), + ); + e.enc_both( + bitcast.bind(I32).bind(F32), + rec_rfumr.opcodes(&MOVD_STORE_XMM), + ); + + // movq + e.enc64( + bitcast.bind(F64).bind(I64), + rec_frurm.opcodes(&MOVD_LOAD_XMM).rex().w(), + ); + e.enc64( + bitcast.bind(I64).bind(F64), + rec_rfumr.opcodes(&MOVD_STORE_XMM).rex().w(), + ); + + // movaps + e.enc_both(copy.bind(F32), rec_furm.opcodes(&MOVAPS_LOAD)); + e.enc_both(copy.bind(F64), rec_furm.opcodes(&MOVAPS_LOAD)); + + // TODO For x86-64, only define REX forms for now, since we can't describe the special regunit + // immediate operands with the current constraint language. + e.enc32(regmove.bind(F32), rec_frmov.opcodes(&MOVAPS_LOAD)); + e.enc64(regmove.bind(F32), rec_frmov.opcodes(&MOVAPS_LOAD).rex()); + + // TODO For x86-64, only define REX forms for now, since we can't describe the special regunit + // immediate operands with the current constraint language. + e.enc32(regmove.bind(F64), rec_frmov.opcodes(&MOVAPS_LOAD)); + e.enc64(regmove.bind(F64), rec_frmov.opcodes(&MOVAPS_LOAD).rex()); +} + +#[inline(never)] +fn define_fpu_memory( + e: &mut PerCpuModeEncodings, + shared_defs: &SharedDefinitions, + r: &RecipeGroup, +) { + let shared = &shared_defs.instructions; + + // Shorthands for instructions. + let fill = shared.by_name("fill"); + let load = shared.by_name("load"); + let load_complex = shared.by_name("load_complex"); + let regfill = shared.by_name("regfill"); + let regspill = shared.by_name("regspill"); + let spill = shared.by_name("spill"); + let store = shared.by_name("store"); + let store_complex = shared.by_name("store_complex"); + + // Shorthands for recipes. + let rec_ffillSib32 = r.template("ffillSib32"); + let rec_fld = r.template("fld"); + let rec_fldDisp32 = r.template("fldDisp32"); + let rec_fldDisp8 = r.template("fldDisp8"); + let rec_fldWithIndex = r.template("fldWithIndex"); + let rec_fldWithIndexDisp32 = r.template("fldWithIndexDisp32"); + let rec_fldWithIndexDisp8 = r.template("fldWithIndexDisp8"); + let rec_fregfill32 = r.template("fregfill32"); + let rec_fregspill32 = r.template("fregspill32"); + let rec_fspillSib32 = r.template("fspillSib32"); + let rec_fst = r.template("fst"); + let rec_fstDisp32 = r.template("fstDisp32"); + let rec_fstDisp8 = r.template("fstDisp8"); + let rec_fstWithIndex = r.template("fstWithIndex"); + let rec_fstWithIndexDisp32 = r.template("fstWithIndexDisp32"); + let rec_fstWithIndexDisp8 = r.template("fstWithIndexDisp8"); // Float loads and stores. e.enc_both(load.bind(F32).bind(Any), rec_fld.opcodes(&MOVSS_LOAD)); @@ -1242,358 +1154,53 @@ pub(crate) fn define( e.enc_both(regspill.bind(F32), rec_fregspill32.opcodes(&MOVSS_STORE)); e.enc_both(spill.bind(F64), rec_fspillSib32.opcodes(&MOVSD_STORE)); e.enc_both(regspill.bind(F64), rec_fregspill32.opcodes(&MOVSD_STORE)); +} - // Function addresses. +#[inline(never)] +fn define_fpu_ops( + e: &mut PerCpuModeEncodings, + shared_defs: &SharedDefinitions, + settings: &SettingGroup, + x86: &InstructionGroup, + r: &RecipeGroup, +) { + let shared = &shared_defs.instructions; + let formats = &shared_defs.formats; - // Non-PIC, all-ones funcaddresses. - e.enc32_isap( - func_addr.bind(I32), - rec_fnaddr4.opcodes(&MOV_IMM), - not_all_ones_funcaddrs_and_not_is_pic, - ); - e.enc64_isap( - func_addr.bind(I64), - rec_fnaddr8.opcodes(&MOV_IMM).rex().w(), - not_all_ones_funcaddrs_and_not_is_pic, - ); + // Shorthands for instructions. + let ceil = shared.by_name("ceil"); + let f32const = shared.by_name("f32const"); + let f64const = shared.by_name("f64const"); + let fadd = shared.by_name("fadd"); + let fcmp = shared.by_name("fcmp"); + let fcvt_from_sint = shared.by_name("fcvt_from_sint"); + let fdemote = shared.by_name("fdemote"); + let fdiv = shared.by_name("fdiv"); + let ffcmp = shared.by_name("ffcmp"); + let floor = shared.by_name("floor"); + let fmul = shared.by_name("fmul"); + let fpromote = shared.by_name("fpromote"); + let fsub = shared.by_name("fsub"); + let nearest = shared.by_name("nearest"); + let sqrt = shared.by_name("sqrt"); + let trunc = shared.by_name("trunc"); + let x86_cvtt2si = x86.by_name("x86_cvtt2si"); + let x86_fmax = x86.by_name("x86_fmax"); + let x86_fmin = x86.by_name("x86_fmin"); - // Non-PIC, all-zeros funcaddresses. - e.enc32_isap( - func_addr.bind(I32), - rec_allones_fnaddr4.opcodes(&MOV_IMM), - all_ones_funcaddrs_and_not_is_pic, - ); - e.enc64_isap( - func_addr.bind(I64), - rec_allones_fnaddr8.opcodes(&MOV_IMM).rex().w(), - all_ones_funcaddrs_and_not_is_pic, - ); + // Shorthands for recipes. + let rec_f32imm_z = r.template("f32imm_z"); + let rec_f64imm_z = r.template("f64imm_z"); + let rec_fa = r.template("fa"); + let rec_fcmp = r.template("fcmp"); + let rec_fcscc = r.template("fcscc"); + let rec_frurm = r.template("frurm"); + let rec_furm = r.template("furm"); + let rec_furmi_rnd = r.template("furmi_rnd"); + let rec_rfurm = r.template("rfurm"); - // 64-bit, colocated, both PIC and non-PIC. Use the lea instruction's pc-relative field. - let is_colocated_func = - InstructionPredicate::new_is_colocated_func(&*formats.func_addr, "func_ref"); - e.enc64_instp( - func_addr.bind(I64), - rec_pcrel_fnaddr8.opcodes(&LEA).rex().w(), - is_colocated_func, - ); - - // 64-bit, non-colocated, PIC. - e.enc64_isap( - func_addr.bind(I64), - rec_got_fnaddr8.opcodes(&MOV_LOAD).rex().w(), - is_pic, - ); - - // Global addresses. - - // Non-PIC. - e.enc32_isap( - symbol_value.bind(I32), - rec_gvaddr4.opcodes(&MOV_IMM), - not_is_pic, - ); - e.enc64_isap( - symbol_value.bind(I64), - rec_gvaddr8.opcodes(&MOV_IMM).rex().w(), - not_is_pic, - ); - - // PIC, colocated. - e.enc64_func( - symbol_value.bind(I64), - rec_pcrel_gvaddr8.opcodes(&LEA).rex().w(), - |encoding| { - encoding - .isa_predicate(is_pic) - .inst_predicate(InstructionPredicate::new_is_colocated_data(formats)) - }, - ); - - // PIC, non-colocated. - e.enc64_isap( - symbol_value.bind(I64), - rec_got_gvaddr8.opcodes(&MOV_LOAD).rex().w(), - is_pic, - ); - - // Stack addresses. - // - // TODO: Add encoding rules for stack_load and stack_store, so that they - // don't get legalized to stack_addr + load/store. - e.enc32(stack_addr.bind(I32), rec_spaddr4_id.opcodes(&LEA)); - e.enc64(stack_addr.bind(I64), rec_spaddr8_id.opcodes(&LEA).rex().w()); - - // Call/return - - // 32-bit, both PIC and non-PIC. - e.enc32(call, rec_call_id.opcodes(&CALL_RELATIVE)); - - // 64-bit, colocated, both PIC and non-PIC. Use the call instruction's pc-relative field. - let is_colocated_func = InstructionPredicate::new_is_colocated_func(&*formats.call, "func_ref"); - e.enc64_instp(call, rec_call_id.opcodes(&CALL_RELATIVE), is_colocated_func); - - // 64-bit, non-colocated, PIC. There is no 64-bit non-colocated non-PIC version, since non-PIC - // is currently using the large model, which requires calls be lowered to - // func_addr+call_indirect. - e.enc64_isap(call, rec_call_plt_id.opcodes(&CALL_RELATIVE), is_pic); - - e.enc32( - call_indirect.bind(I32), - rec_call_r.opcodes(&JUMP_ABSOLUTE).rrr(2), - ); - e.enc64( - call_indirect.bind(I64), - rec_call_r.opcodes(&JUMP_ABSOLUTE).rrr(2).rex(), - ); - e.enc64( - call_indirect.bind(I64), - rec_call_r.opcodes(&JUMP_ABSOLUTE).rrr(2), - ); - - e.enc32(return_, rec_ret.opcodes(&RET_NEAR)); - e.enc64(return_, rec_ret.opcodes(&RET_NEAR)); - - // Branches. - e.enc32(jump, rec_jmpb.opcodes(&JUMP_SHORT)); - e.enc64(jump, rec_jmpb.opcodes(&JUMP_SHORT)); - e.enc32(jump, rec_jmpd.opcodes(&JUMP_NEAR_RELATIVE)); - e.enc64(jump, rec_jmpd.opcodes(&JUMP_NEAR_RELATIVE)); - - e.enc_both(brif, rec_brib.opcodes(&JUMP_SHORT_IF_OVERFLOW)); - e.enc_both(brif, rec_brid.opcodes(&JUMP_NEAR_IF_OVERFLOW)); - - // Not all float condition codes are legal, see `supported_floatccs`. - e.enc_both(brff, rec_brfb.opcodes(&JUMP_SHORT_IF_OVERFLOW)); - e.enc_both(brff, rec_brfd.opcodes(&JUMP_NEAR_IF_OVERFLOW)); - - // Note that the tjccd opcode will be prefixed with 0x0f. - e.enc_i32_i64_explicit_rex(brz, rec_tjccb.opcodes(&JUMP_SHORT_IF_EQUAL)); - e.enc_i32_i64_explicit_rex(brz, rec_tjccd.opcodes(&TEST_BYTE_REG)); - e.enc_i32_i64_explicit_rex(brnz, rec_tjccb.opcodes(&JUMP_SHORT_IF_NOT_EQUAL)); - e.enc_i32_i64_explicit_rex(brnz, rec_tjccd.opcodes(&TEST_REG)); - - // Branch on a b1 value in a register only looks at the low 8 bits. See also - // bint encodings below. - // - // Start with the worst-case encoding for X86_32 only. The register allocator - // can't handle a branch with an ABCD-constrained operand. - e.enc32(brz.bind(B1), rec_t8jccd_long.opcodes(&TEST_BYTE_REG)); - e.enc32(brnz.bind(B1), rec_t8jccd_long.opcodes(&TEST_REG)); - - e.enc_both(brz.bind(B1), rec_t8jccb_abcd.opcodes(&JUMP_SHORT_IF_EQUAL)); - e.enc_both(brz.bind(B1), rec_t8jccd_abcd.opcodes(&TEST_BYTE_REG)); - e.enc_both( - brnz.bind(B1), - rec_t8jccb_abcd.opcodes(&JUMP_SHORT_IF_NOT_EQUAL), - ); - e.enc_both(brnz.bind(B1), rec_t8jccd_abcd.opcodes(&TEST_REG)); - - // Jump tables. - e.enc64( - jump_table_entry.bind(I64), - rec_jt_entry.opcodes(&MOVSXD).rex().w(), - ); - e.enc32(jump_table_entry.bind(I32), rec_jt_entry.opcodes(&MOV_LOAD)); - - e.enc64( - jump_table_base.bind(I64), - rec_jt_base.opcodes(&LEA).rex().w(), - ); - e.enc32(jump_table_base.bind(I32), rec_jt_base.opcodes(&LEA)); - - e.enc_x86_64( - indirect_jump_table_br.bind(I64), - rec_indirect_jmp.opcodes(&JUMP_ABSOLUTE).rrr(4), - ); - e.enc32( - indirect_jump_table_br.bind(I32), - rec_indirect_jmp.opcodes(&JUMP_ABSOLUTE).rrr(4), - ); - - // Trap as ud2 - e.enc32(trap, rec_trap.opcodes(&UNDEFINED2)); - e.enc64(trap, rec_trap.opcodes(&UNDEFINED2)); - e.enc32(resumable_trap, rec_trap.opcodes(&UNDEFINED2)); - e.enc64(resumable_trap, rec_trap.opcodes(&UNDEFINED2)); - - // Debug trap as int3 - e.enc32_rec(debugtrap, rec_debugtrap, 0); - e.enc64_rec(debugtrap, rec_debugtrap, 0); - - e.enc32_rec(trapif, rec_trapif, 0); - e.enc64_rec(trapif, rec_trapif, 0); - e.enc32_rec(trapff, rec_trapff, 0); - e.enc64_rec(trapff, rec_trapff, 0); - - // Comparisons - e.enc_i32_i64(icmp, rec_icscc.opcodes(&CMP_REG)); - e.enc_i32_i64(icmp_imm, rec_icscc_ib.opcodes(&CMP_IMM8).rrr(7)); - e.enc_i32_i64(icmp_imm, rec_icscc_id.opcodes(&CMP_IMM).rrr(7)); - e.enc_i32_i64(ifcmp, rec_rcmp.opcodes(&CMP_REG)); - e.enc_i32_i64(ifcmp_imm, rec_rcmp_ib.opcodes(&CMP_IMM8).rrr(7)); - e.enc_i32_i64(ifcmp_imm, rec_rcmp_id.opcodes(&CMP_IMM).rrr(7)); - // TODO: We could special-case ifcmp_imm(x, 0) to TEST(x, x). - - e.enc32(ifcmp_sp.bind(I32), rec_rcmp_sp.opcodes(&CMP_REG)); - e.enc64(ifcmp_sp.bind(I64), rec_rcmp_sp.opcodes(&CMP_REG).rex().w()); - - // Convert flags to bool. - // This encodes `b1` as an 8-bit low register with the value 0 or 1. - e.enc_both(trueif, rec_seti_abcd.opcodes(&SET_BYTE_IF_OVERFLOW)); - e.enc_both(trueff, rec_setf_abcd.opcodes(&SET_BYTE_IF_OVERFLOW)); - - // Conditional move (a.k.a integer select). - e.enc_i32_i64(selectif, rec_cmov.opcodes(&CMOV_OVERFLOW)); - - // Bit scan forwards and reverse - e.enc_i32_i64(x86_bsf, rec_bsf_and_bsr.opcodes(&BIT_SCAN_FORWARD)); - e.enc_i32_i64(x86_bsr, rec_bsf_and_bsr.opcodes(&BIT_SCAN_REVERSE)); - - // Convert bool to int. - // - // This assumes that b1 is represented as an 8-bit low register with the value 0 - // or 1. - // - // Encode movzbq as movzbl, because it's equivalent and shorter. - for &to in &[I8, I16, I32, I64] { - for &from in &[B1, B8] { - e.enc64( - bint.bind(to).bind(from), - rec_urm_noflags.opcodes(&MOVZX_BYTE).rex(), - ); - e.enc64( - bint.bind(to).bind(from), - rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), - ); - if to != I64 { - e.enc32( - bint.bind(to).bind(from), - rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), - ); - } - } - } - - // Numerical conversions. - - // Reducing an integer is a no-op. - e.enc32_rec(ireduce.bind(I8).bind(I16), rec_null, 0); - e.enc32_rec(ireduce.bind(I8).bind(I32), rec_null, 0); - e.enc32_rec(ireduce.bind(I16).bind(I32), rec_null, 0); - - e.enc64_rec(ireduce.bind(I8).bind(I16), rec_null, 0); - e.enc64_rec(ireduce.bind(I8).bind(I32), rec_null, 0); - e.enc64_rec(ireduce.bind(I16).bind(I32), rec_null, 0); - e.enc64_rec(ireduce.bind(I8).bind(I64), rec_null, 0); - e.enc64_rec(ireduce.bind(I16).bind(I64), rec_null, 0); - e.enc64_rec(ireduce.bind(I32).bind(I64), rec_null, 0); - - // TODO: Add encodings for cbw, cwde, cdqe, which are sign-extending - // instructions for %al/%ax/%eax to %ax/%eax/%rax. - - // movsbl - e.enc32( - sextend.bind(I32).bind(I8), - rec_urm_noflags_abcd.opcodes(&MOVSX_BYTE), - ); - e.enc64( - sextend.bind(I32).bind(I8), - rec_urm_noflags.opcodes(&MOVSX_BYTE).rex(), - ); - e.enc64( - sextend.bind(I32).bind(I8), - rec_urm_noflags_abcd.opcodes(&MOVSX_BYTE), - ); - - // movswl - e.enc32( - sextend.bind(I32).bind(I16), - rec_urm_noflags.opcodes(&MOVSX_WORD), - ); - e.enc64( - sextend.bind(I32).bind(I16), - rec_urm_noflags.opcodes(&MOVSX_WORD).rex(), - ); - e.enc64( - sextend.bind(I32).bind(I16), - rec_urm_noflags.opcodes(&MOVSX_WORD), - ); - - // movsbq - e.enc64( - sextend.bind(I64).bind(I8), - rec_urm_noflags.opcodes(&MOVSX_BYTE).rex().w(), - ); - - // movswq - e.enc64( - sextend.bind(I64).bind(I16), - rec_urm_noflags.opcodes(&MOVSX_WORD).rex().w(), - ); - - // movslq - e.enc64( - sextend.bind(I64).bind(I32), - rec_urm_noflags.opcodes(&MOVSXD).rex().w(), - ); - - // movzbl - e.enc32( - uextend.bind(I32).bind(I8), - rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), - ); - e.enc64( - uextend.bind(I32).bind(I8), - rec_urm_noflags.opcodes(&MOVZX_BYTE).rex(), - ); - e.enc64( - uextend.bind(I32).bind(I8), - rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), - ); - - // movzwl - e.enc32( - uextend.bind(I32).bind(I16), - rec_urm_noflags.opcodes(&MOVZX_WORD), - ); - e.enc64( - uextend.bind(I32).bind(I16), - rec_urm_noflags.opcodes(&MOVZX_WORD).rex(), - ); - e.enc64( - uextend.bind(I32).bind(I16), - rec_urm_noflags.opcodes(&MOVZX_WORD), - ); - - // movzbq, encoded as movzbl because it's equivalent and shorter. - e.enc64( - uextend.bind(I64).bind(I8), - rec_urm_noflags.opcodes(&MOVZX_BYTE).rex(), - ); - e.enc64( - uextend.bind(I64).bind(I8), - rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), - ); - - // movzwq, encoded as movzwl because it's equivalent and shorter - e.enc64( - uextend.bind(I64).bind(I16), - rec_urm_noflags.opcodes(&MOVZX_WORD).rex(), - ); - e.enc64( - uextend.bind(I64).bind(I16), - rec_urm_noflags.opcodes(&MOVZX_WORD), - ); - - // A 32-bit register copy clears the high 32 bits. - e.enc64( - uextend.bind(I64).bind(I32), - rec_umr.opcodes(&MOV_STORE).rex(), - ); - e.enc64(uextend.bind(I64).bind(I32), rec_umr.opcodes(&MOV_STORE)); - - // Floating point + // Predicates shorthands. + let use_sse41 = settings.predicate_by_name("use_sse41"); // Floating-point constants equal to 0.0 can be encoded using either `xorps` or `xorpd`, for // 32-bit and 64-bit floats respectively. @@ -1616,40 +1223,6 @@ pub(crate) fn define( e.enc_x86_64_instp(f32const, rec_f32imm_z.opcodes(&XORPS), is_zero_32_bit_float); e.enc_x86_64_instp(f64const, rec_f64imm_z.opcodes(&XORPD), is_zero_64_bit_float); - // movd - e.enc_both( - bitcast.bind(F32).bind(I32), - rec_frurm.opcodes(&MOVD_LOAD_XMM), - ); - e.enc_both( - bitcast.bind(I32).bind(F32), - rec_rfumr.opcodes(&MOVD_STORE_XMM), - ); - - // movq - e.enc64( - bitcast.bind(F64).bind(I64), - rec_frurm.opcodes(&MOVD_LOAD_XMM).rex().w(), - ); - e.enc64( - bitcast.bind(I64).bind(F64), - rec_rfumr.opcodes(&MOVD_STORE_XMM).rex().w(), - ); - - // movaps - e.enc_both(copy.bind(F32), rec_furm.opcodes(&MOVAPS_LOAD)); - e.enc_both(copy.bind(F64), rec_furm.opcodes(&MOVAPS_LOAD)); - - // TODO For x86-64, only define REX forms for now, since we can't describe the special regunit - // immediate operands with the current constraint language. - e.enc32(regmove.bind(F32), rec_frmov.opcodes(&MOVAPS_LOAD)); - e.enc64(regmove.bind(F32), rec_frmov.opcodes(&MOVAPS_LOAD).rex()); - - // TODO For x86-64, only define REX forms for now, since we can't describe the special regunit - // immediate operands with the current constraint language. - e.enc32(regmove.bind(F64), rec_frmov.opcodes(&MOVAPS_LOAD)); - e.enc64(regmove.bind(F64), rec_frmov.opcodes(&MOVAPS_LOAD).rex()); - // cvtsi2ss e.enc_i32_i64(fcvt_from_sint.bind(F32), rec_frurm.opcodes(&CVTSI2SS)); @@ -1711,6 +1284,156 @@ pub(crate) fn define( e.enc_both(x86_fmax.bind(F32), rec_fa.opcodes(&MAXSS)); e.enc_both(x86_fmax.bind(F64), rec_fa.opcodes(&MAXSD)); + // Comparisons. + // + // This only covers the condition codes in `supported_floatccs`, the rest are + // handled by legalization patterns. + e.enc_both(fcmp.bind(F32), rec_fcscc.opcodes(&UCOMISS)); + e.enc_both(fcmp.bind(F64), rec_fcscc.opcodes(&UCOMISD)); + e.enc_both(ffcmp.bind(F32), rec_fcmp.opcodes(&UCOMISS)); + e.enc_both(ffcmp.bind(F64), rec_fcmp.opcodes(&UCOMISD)); +} + +#[inline(never)] +fn define_alu( + e: &mut PerCpuModeEncodings, + shared_defs: &SharedDefinitions, + settings: &SettingGroup, + x86: &InstructionGroup, + r: &RecipeGroup, +) { + let shared = &shared_defs.instructions; + + // Shorthands for instructions. + let clz = shared.by_name("clz"); + let ctz = shared.by_name("ctz"); + let icmp = shared.by_name("icmp"); + let icmp_imm = shared.by_name("icmp_imm"); + let ifcmp = shared.by_name("ifcmp"); + let ifcmp_imm = shared.by_name("ifcmp_imm"); + let ifcmp_sp = shared.by_name("ifcmp_sp"); + let ishl = shared.by_name("ishl"); + let ishl_imm = shared.by_name("ishl_imm"); + let popcnt = shared.by_name("popcnt"); + let rotl = shared.by_name("rotl"); + let rotl_imm = shared.by_name("rotl_imm"); + let rotr = shared.by_name("rotr"); + let rotr_imm = shared.by_name("rotr_imm"); + let selectif = shared.by_name("selectif"); + let sshr = shared.by_name("sshr"); + let sshr_imm = shared.by_name("sshr_imm"); + let trueff = shared.by_name("trueff"); + let trueif = shared.by_name("trueif"); + let ushr = shared.by_name("ushr"); + let ushr_imm = shared.by_name("ushr_imm"); + let x86_bsf = x86.by_name("x86_bsf"); + let x86_bsr = x86.by_name("x86_bsr"); + + // Shorthands for recipes. + let rec_bsf_and_bsr = r.template("bsf_and_bsr"); + let rec_cmov = r.template("cmov"); + let rec_icscc = r.template("icscc"); + let rec_icscc_ib = r.template("icscc_ib"); + let rec_icscc_id = r.template("icscc_id"); + let rec_rcmp = r.template("rcmp"); + let rec_rcmp_ib = r.template("rcmp_ib"); + let rec_rcmp_id = r.template("rcmp_id"); + let rec_rcmp_sp = r.template("rcmp_sp"); + let rec_rc = r.template("rc"); + let rec_setf_abcd = r.template("setf_abcd"); + let rec_seti_abcd = r.template("seti_abcd"); + let rec_urm = r.template("urm"); + + // Predicates shorthands. + let use_popcnt = settings.predicate_by_name("use_popcnt"); + let use_lzcnt = settings.predicate_by_name("use_lzcnt"); + let use_bmi1 = settings.predicate_by_name("use_bmi1"); + + let band = shared.by_name("band"); + let band_imm = shared.by_name("band_imm"); + let band_not = shared.by_name("band_not"); + let bnot = shared.by_name("bnot"); + let bor = shared.by_name("bor"); + let bor_imm = shared.by_name("bor_imm"); + let bxor = shared.by_name("bxor"); + let bxor_imm = shared.by_name("bxor_imm"); + let iadd = shared.by_name("iadd"); + let iadd_ifcarry = shared.by_name("iadd_ifcarry"); + let iadd_ifcin = shared.by_name("iadd_ifcin"); + let iadd_ifcout = shared.by_name("iadd_ifcout"); + let iadd_imm = shared.by_name("iadd_imm"); + let imul = shared.by_name("imul"); + let isub = shared.by_name("isub"); + let isub_ifbin = shared.by_name("isub_ifbin"); + let isub_ifborrow = shared.by_name("isub_ifborrow"); + let isub_ifbout = shared.by_name("isub_ifbout"); + let x86_sdivmodx = x86.by_name("x86_sdivmodx"); + let x86_smulx = x86.by_name("x86_smulx"); + let x86_udivmodx = x86.by_name("x86_udivmodx"); + let x86_umulx = x86.by_name("x86_umulx"); + + let rec_div = r.template("div"); + let rec_fa = r.template("fa"); + let rec_fax = r.template("fax"); + let rec_mulx = r.template("mulx"); + let rec_r_ib = r.template("r_ib"); + let rec_r_id = r.template("r_id"); + let rec_rin = r.template("rin"); + let rec_rio = r.template("rio"); + let rec_rout = r.template("rout"); + let rec_rr = r.template("rr"); + let rec_rrx = r.template("rrx"); + let rec_ur = r.template("ur"); + + e.enc_i32_i64(iadd, rec_rr.opcodes(&ADD)); + e.enc_i32_i64(iadd_ifcout, rec_rout.opcodes(&ADD)); + e.enc_i32_i64(iadd_ifcin, rec_rin.opcodes(&ADC)); + e.enc_i32_i64(iadd_ifcarry, rec_rio.opcodes(&ADC)); + e.enc_i32_i64(iadd_imm, rec_r_ib.opcodes(&ADD_IMM8_SIGN_EXTEND).rrr(0)); + e.enc_i32_i64(iadd_imm, rec_r_id.opcodes(&ADD_IMM).rrr(0)); + + e.enc_i32_i64(isub, rec_rr.opcodes(&SUB)); + e.enc_i32_i64(isub_ifbout, rec_rout.opcodes(&SUB)); + e.enc_i32_i64(isub_ifbin, rec_rin.opcodes(&SBB)); + e.enc_i32_i64(isub_ifborrow, rec_rio.opcodes(&SBB)); + + e.enc_i32_i64(band, rec_rr.opcodes(&AND)); + e.enc_b32_b64(band, rec_rr.opcodes(&AND)); + + // TODO: band_imm.i64 with an unsigned 32-bit immediate can be encoded as band_imm.i32. Can + // even use the single-byte immediate for 0xffff_ffXX masks. + + e.enc_i32_i64(band_imm, rec_r_ib.opcodes(&AND_IMM8_SIGN_EXTEND).rrr(4)); + e.enc_i32_i64(band_imm, rec_r_id.opcodes(&AND_IMM).rrr(4)); + + e.enc_i32_i64(bor, rec_rr.opcodes(&OR)); + e.enc_b32_b64(bor, rec_rr.opcodes(&OR)); + e.enc_i32_i64(bor_imm, rec_r_ib.opcodes(&OR_IMM8_SIGN_EXTEND).rrr(1)); + e.enc_i32_i64(bor_imm, rec_r_id.opcodes(&OR_IMM).rrr(1)); + + e.enc_i32_i64(bxor, rec_rr.opcodes(&XOR)); + e.enc_b32_b64(bxor, rec_rr.opcodes(&XOR)); + e.enc_i32_i64(bxor_imm, rec_r_ib.opcodes(&XOR_IMM8_SIGN_EXTEND).rrr(6)); + e.enc_i32_i64(bxor_imm, rec_r_id.opcodes(&XOR_IMM).rrr(6)); + + // x86 has a bitwise not instruction NOT. + e.enc_i32_i64(bnot, rec_ur.opcodes(&NOT).rrr(2)); + e.enc_b32_b64(bnot, rec_ur.opcodes(&NOT).rrr(2)); + + // Also add a `b1` encodings for the logic instructions. + // TODO: Should this be done with 8-bit instructions? It would improve partial register + // dependencies. + e.enc_both(band.bind(B1), rec_rr.opcodes(&AND)); + e.enc_both(bor.bind(B1), rec_rr.opcodes(&OR)); + e.enc_both(bxor.bind(B1), rec_rr.opcodes(&XOR)); + + e.enc_i32_i64(imul, rec_rrx.opcodes(&IMUL)); + e.enc_i32_i64(x86_sdivmodx, rec_div.opcodes(&IDIV).rrr(7)); + e.enc_i32_i64(x86_udivmodx, rec_div.opcodes(&DIV).rrr(6)); + + e.enc_i32_i64(x86_smulx, rec_mulx.opcodes(&IMUL_RDX_RAX).rrr(5)); + e.enc_i32_i64(x86_umulx, rec_mulx.opcodes(&MUL).rrr(4)); + // Binary bitwise ops. // // The F64 version is intentionally encoded using the single-precision opcode: @@ -1728,14 +1451,182 @@ pub(crate) fn define( e.enc_both(band_not.bind(F32), rec_fax.opcodes(&ANDNPS)); e.enc_both(band_not.bind(F64), rec_fax.opcodes(&ANDNPS)); - // Comparisons. - // - // This only covers the condition codes in `supported_floatccs`, the rest are - // handled by legalization patterns. - e.enc_both(fcmp.bind(F32), rec_fcscc.opcodes(&UCOMISS)); - e.enc_both(fcmp.bind(F64), rec_fcscc.opcodes(&UCOMISD)); - e.enc_both(ffcmp.bind(F32), rec_fcmp.opcodes(&UCOMISS)); - e.enc_both(ffcmp.bind(F64), rec_fcmp.opcodes(&UCOMISD)); + // Shifts and rotates. + // Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit + // and 16-bit shifts would need explicit masking. + + for &(inst, rrr) in &[(rotl, 0), (rotr, 1), (ishl, 4), (ushr, 5), (sshr, 7)] { + // Cannot use enc_i32_i64 for this pattern because instructions require + // to bind any. + e.enc32( + inst.bind(I32).bind(Any), + rec_rc.opcodes(&ROTATE_CL).rrr(rrr), + ); + e.enc64( + inst.bind(I64).bind(Any), + rec_rc.opcodes(&ROTATE_CL).rrr(rrr).rex().w(), + ); + e.enc64( + inst.bind(I32).bind(Any), + rec_rc.opcodes(&ROTATE_CL).rrr(rrr).rex(), + ); + e.enc64( + inst.bind(I32).bind(Any), + rec_rc.opcodes(&ROTATE_CL).rrr(rrr), + ); + } + + e.enc_i32_i64(rotl_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(0)); + e.enc_i32_i64(rotr_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(1)); + e.enc_i32_i64(ishl_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(4)); + e.enc_i32_i64(ushr_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(5)); + e.enc_i32_i64(sshr_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(7)); + + // Population count. + e.enc32_isap(popcnt.bind(I32), rec_urm.opcodes(&POPCNT), use_popcnt); + e.enc64_isap( + popcnt.bind(I64), + rec_urm.opcodes(&POPCNT).rex().w(), + use_popcnt, + ); + e.enc64_isap(popcnt.bind(I32), rec_urm.opcodes(&POPCNT).rex(), use_popcnt); + e.enc64_isap(popcnt.bind(I32), rec_urm.opcodes(&POPCNT), use_popcnt); + + // Count leading zero bits. + e.enc32_isap(clz.bind(I32), rec_urm.opcodes(&LZCNT), use_lzcnt); + e.enc64_isap(clz.bind(I64), rec_urm.opcodes(&LZCNT).rex().w(), use_lzcnt); + e.enc64_isap(clz.bind(I32), rec_urm.opcodes(&LZCNT).rex(), use_lzcnt); + e.enc64_isap(clz.bind(I32), rec_urm.opcodes(&LZCNT), use_lzcnt); + + // Count trailing zero bits. + e.enc32_isap(ctz.bind(I32), rec_urm.opcodes(&TZCNT), use_bmi1); + e.enc64_isap(ctz.bind(I64), rec_urm.opcodes(&TZCNT).rex().w(), use_bmi1); + e.enc64_isap(ctz.bind(I32), rec_urm.opcodes(&TZCNT).rex(), use_bmi1); + e.enc64_isap(ctz.bind(I32), rec_urm.opcodes(&TZCNT), use_bmi1); + + // Bit scan forwards and reverse + e.enc_i32_i64(x86_bsf, rec_bsf_and_bsr.opcodes(&BIT_SCAN_FORWARD)); + e.enc_i32_i64(x86_bsr, rec_bsf_and_bsr.opcodes(&BIT_SCAN_REVERSE)); + + // Comparisons + e.enc_i32_i64(icmp, rec_icscc.opcodes(&CMP_REG)); + e.enc_i32_i64(icmp_imm, rec_icscc_ib.opcodes(&CMP_IMM8).rrr(7)); + e.enc_i32_i64(icmp_imm, rec_icscc_id.opcodes(&CMP_IMM).rrr(7)); + e.enc_i32_i64(ifcmp, rec_rcmp.opcodes(&CMP_REG)); + e.enc_i32_i64(ifcmp_imm, rec_rcmp_ib.opcodes(&CMP_IMM8).rrr(7)); + e.enc_i32_i64(ifcmp_imm, rec_rcmp_id.opcodes(&CMP_IMM).rrr(7)); + // TODO: We could special-case ifcmp_imm(x, 0) to TEST(x, x). + + e.enc32(ifcmp_sp.bind(I32), rec_rcmp_sp.opcodes(&CMP_REG)); + e.enc64(ifcmp_sp.bind(I64), rec_rcmp_sp.opcodes(&CMP_REG).rex().w()); + + // Convert flags to bool. + // This encodes `b1` as an 8-bit low register with the value 0 or 1. + e.enc_both(trueif, rec_seti_abcd.opcodes(&SET_BYTE_IF_OVERFLOW)); + e.enc_both(trueff, rec_setf_abcd.opcodes(&SET_BYTE_IF_OVERFLOW)); + + // Conditional move (a.k.a integer select). + e.enc_i32_i64(selectif, rec_cmov.opcodes(&CMOV_OVERFLOW)); +} + +#[inline(never)] +fn define_simd( + e: &mut PerCpuModeEncodings, + shared_defs: &SharedDefinitions, + settings: &SettingGroup, + x86: &InstructionGroup, + r: &RecipeGroup, +) { + let shared = &shared_defs.instructions; + let formats = &shared_defs.formats; + + // Shorthands for instructions. + let bitcast = shared.by_name("bitcast"); + let bor = shared.by_name("bor"); + let bxor = shared.by_name("bxor"); + let copy = shared.by_name("copy"); + let copy_nop = shared.by_name("copy_nop"); + let fadd = shared.by_name("fadd"); + let fcmp = shared.by_name("fcmp"); + let fdiv = shared.by_name("fdiv"); + let fill = shared.by_name("fill"); + let fill_nop = shared.by_name("fill_nop"); + let fmax = shared.by_name("fmax"); + let fmin = shared.by_name("fmin"); + let fmul = shared.by_name("fmul"); + let fsub = shared.by_name("fsub"); + let iadd = shared.by_name("iadd"); + let icmp = shared.by_name("icmp"); + let imul = shared.by_name("imul"); + let ishl_imm = shared.by_name("ishl_imm"); + let load = shared.by_name("load"); + let raw_bitcast = shared.by_name("raw_bitcast"); + let regfill = shared.by_name("regfill"); + let regmove = shared.by_name("regmove"); + let regspill = shared.by_name("regspill"); + let sadd_sat = shared.by_name("sadd_sat"); + let scalar_to_vector = shared.by_name("scalar_to_vector"); + let spill = shared.by_name("spill"); + let sqrt = shared.by_name("sqrt"); + let sshr_imm = shared.by_name("sshr_imm"); + let ssub_sat = shared.by_name("ssub_sat"); + let store = shared.by_name("store"); + let uadd_sat = shared.by_name("uadd_sat"); + let ushr_imm = shared.by_name("ushr_imm"); + let usub_sat = shared.by_name("usub_sat"); + let vconst = shared.by_name("vconst"); + let x86_insertps = x86.by_name("x86_insertps"); + let x86_movlhps = x86.by_name("x86_movlhps"); + let x86_movsd = x86.by_name("x86_movsd"); + let x86_pextr = x86.by_name("x86_pextr"); + let x86_pinsr = x86.by_name("x86_pinsr"); + let x86_pmaxs = x86.by_name("x86_pmaxs"); + let x86_pmaxu = x86.by_name("x86_pmaxu"); + let x86_pmins = x86.by_name("x86_pmins"); + let x86_pminu = x86.by_name("x86_pminu"); + let x86_pshufb = x86.by_name("x86_pshufb"); + let x86_pshufd = x86.by_name("x86_pshufd"); + let x86_psll = x86.by_name("x86_psll"); + let x86_psra = x86.by_name("x86_psra"); + let x86_psrl = x86.by_name("x86_psrl"); + let x86_ptest = x86.by_name("x86_ptest"); + + // Shorthands for recipes. + let rec_f_ib = r.template("f_ib"); + let rec_fa = r.template("fa"); + let rec_fa_ib = r.template("fa_ib"); + let rec_fax = r.template("fax"); + let rec_fcmp = r.template("fcmp"); + let rec_ffillSib32 = r.template("ffillSib32"); + let rec_ffillnull = r.recipe("ffillnull"); + let rec_fld = r.template("fld"); + let rec_fldDisp32 = r.template("fldDisp32"); + let rec_fldDisp8 = r.template("fldDisp8"); + let rec_fregfill32 = r.template("fregfill32"); + let rec_fregspill32 = r.template("fregspill32"); + let rec_frmov = r.template("frmov"); + let rec_frurm = r.template("frurm"); + let rec_fspillSib32 = r.template("fspillSib32"); + let rec_fst = r.template("fst"); + let rec_fstDisp32 = r.template("fstDisp32"); + let rec_fstDisp8 = r.template("fstDisp8"); + let rec_furm = r.template("furm"); + let rec_icscc_fpr = r.template("icscc_fpr"); + let rec_null_fpr = r.recipe("null_fpr"); + let rec_pfcmp = r.template("pfcmp"); + let rec_r_ib_unsigned_fpr = r.template("r_ib_unsigned_fpr"); + let rec_r_ib_unsigned_gpr = r.template("r_ib_unsigned_gpr"); + let rec_r_ib_unsigned_r = r.template("r_ib_unsigned_r"); + let rec_stacknull = r.recipe("stacknull"); + let rec_vconst = r.template("vconst"); + let rec_vconst_optimized = r.template("vconst_optimized"); + + // Predicates shorthands. + settings.predicate_by_name("all_ones_funcaddrs_and_not_is_pic"); + settings.predicate_by_name("not_all_ones_funcaddrs_and_not_is_pic"); + let use_ssse3_simd = settings.predicate_by_name("use_ssse3_simd"); + let use_sse41_simd = settings.predicate_by_name("use_sse41_simd"); + let use_sse42_simd = settings.predicate_by_name("use_sse42_simd"); // SIMD vector size: eventually multiple vector sizes may be supported but for now only // SSE-sized vectors are available. @@ -1976,6 +1867,7 @@ pub(crate) fn define( ); // SIMD integer subtraction + let isub = shared.by_name("isub"); for (ty, opcodes) in &[(I8, &PSUBB), (I16, &PSUBW), (I32, &PSUBD), (I64, &PSUBQ)] { let isub = isub.bind(vector(*ty, sse_vector_size)); e.enc_32_64(isub, rec_fa.opcodes(*opcodes)); @@ -2010,6 +1902,8 @@ pub(crate) fn define( } // SIMD logical operations + let band = shared.by_name("band"); + let band_not = shared.by_name("band_not"); for ty in ValueType::all_lane_types().filter(allowed_simd_type) { // and let band = band.bind(vector(ty, sse_vector_size)); @@ -2148,8 +2042,298 @@ pub(crate) fn define( let inst = inst.bind(vector(*ty, sse_vector_size)); e.enc_both(inst, rec_furm.opcodes(opcodes)); } +} - // Reference type instructions +#[inline(never)] +fn define_entity_ref( + e: &mut PerCpuModeEncodings, + shared_defs: &SharedDefinitions, + settings: &SettingGroup, + r: &RecipeGroup, +) { + let shared = &shared_defs.instructions; + let formats = &shared_defs.formats; + + // Shorthands for instructions. + let func_addr = shared.by_name("func_addr"); + let stack_addr = shared.by_name("stack_addr"); + let symbol_value = shared.by_name("symbol_value"); + + // Shorthands for recipes. + let rec_allones_fnaddr4 = r.template("allones_fnaddr4"); + let rec_allones_fnaddr8 = r.template("allones_fnaddr8"); + let rec_fnaddr4 = r.template("fnaddr4"); + let rec_fnaddr8 = r.template("fnaddr8"); + let rec_got_fnaddr8 = r.template("got_fnaddr8"); + let rec_got_gvaddr8 = r.template("got_gvaddr8"); + let rec_gvaddr4 = r.template("gvaddr4"); + let rec_gvaddr8 = r.template("gvaddr8"); + let rec_pcrel_fnaddr8 = r.template("pcrel_fnaddr8"); + let rec_pcrel_gvaddr8 = r.template("pcrel_gvaddr8"); + let rec_spaddr4_id = r.template("spaddr4_id"); + let rec_spaddr8_id = r.template("spaddr8_id"); + + // Predicates shorthands. + let all_ones_funcaddrs_and_not_is_pic = + settings.predicate_by_name("all_ones_funcaddrs_and_not_is_pic"); + let is_pic = settings.predicate_by_name("is_pic"); + let not_all_ones_funcaddrs_and_not_is_pic = + settings.predicate_by_name("not_all_ones_funcaddrs_and_not_is_pic"); + let not_is_pic = settings.predicate_by_name("not_is_pic"); + + // Function addresses. + + // Non-PIC, all-ones funcaddresses. + e.enc32_isap( + func_addr.bind(I32), + rec_fnaddr4.opcodes(&MOV_IMM), + not_all_ones_funcaddrs_and_not_is_pic, + ); + e.enc64_isap( + func_addr.bind(I64), + rec_fnaddr8.opcodes(&MOV_IMM).rex().w(), + not_all_ones_funcaddrs_and_not_is_pic, + ); + + // Non-PIC, all-zeros funcaddresses. + e.enc32_isap( + func_addr.bind(I32), + rec_allones_fnaddr4.opcodes(&MOV_IMM), + all_ones_funcaddrs_and_not_is_pic, + ); + e.enc64_isap( + func_addr.bind(I64), + rec_allones_fnaddr8.opcodes(&MOV_IMM).rex().w(), + all_ones_funcaddrs_and_not_is_pic, + ); + + // 64-bit, colocated, both PIC and non-PIC. Use the lea instruction's pc-relative field. + let is_colocated_func = + InstructionPredicate::new_is_colocated_func(&*formats.func_addr, "func_ref"); + e.enc64_instp( + func_addr.bind(I64), + rec_pcrel_fnaddr8.opcodes(&LEA).rex().w(), + is_colocated_func, + ); + + // 64-bit, non-colocated, PIC. + e.enc64_isap( + func_addr.bind(I64), + rec_got_fnaddr8.opcodes(&MOV_LOAD).rex().w(), + is_pic, + ); + + // Global addresses. + + // Non-PIC. + e.enc32_isap( + symbol_value.bind(I32), + rec_gvaddr4.opcodes(&MOV_IMM), + not_is_pic, + ); + e.enc64_isap( + symbol_value.bind(I64), + rec_gvaddr8.opcodes(&MOV_IMM).rex().w(), + not_is_pic, + ); + + // PIC, colocated. + e.enc64_func( + symbol_value.bind(I64), + rec_pcrel_gvaddr8.opcodes(&LEA).rex().w(), + |encoding| { + encoding + .isa_predicate(is_pic) + .inst_predicate(InstructionPredicate::new_is_colocated_data(formats)) + }, + ); + + // PIC, non-colocated. + e.enc64_isap( + symbol_value.bind(I64), + rec_got_gvaddr8.opcodes(&MOV_LOAD).rex().w(), + is_pic, + ); + + // Stack addresses. + // + // TODO: Add encoding rules for stack_load and stack_store, so that they + // don't get legalized to stack_addr + load/store. + e.enc32(stack_addr.bind(I32), rec_spaddr4_id.opcodes(&LEA)); + e.enc64(stack_addr.bind(I64), rec_spaddr8_id.opcodes(&LEA).rex().w()); +} + +/// Control flow opcodes. +#[inline(never)] +fn define_control_flow( + e: &mut PerCpuModeEncodings, + shared_defs: &SharedDefinitions, + settings: &SettingGroup, + r: &RecipeGroup, +) { + let shared = &shared_defs.instructions; + let formats = &shared_defs.formats; + + // Shorthands for instructions. + let brff = shared.by_name("brff"); + let brif = shared.by_name("brif"); + let brnz = shared.by_name("brnz"); + let brz = shared.by_name("brz"); + let call = shared.by_name("call"); + let call_indirect = shared.by_name("call_indirect"); + let debugtrap = shared.by_name("debugtrap"); + let indirect_jump_table_br = shared.by_name("indirect_jump_table_br"); + let jump = shared.by_name("jump"); + let jump_table_base = shared.by_name("jump_table_base"); + let jump_table_entry = shared.by_name("jump_table_entry"); + let return_ = shared.by_name("return"); + let trap = shared.by_name("trap"); + let trapff = shared.by_name("trapff"); + let trapif = shared.by_name("trapif"); + let resumable_trap = shared.by_name("resumable_trap"); + + // Shorthands for recipes. + let rec_brfb = r.template("brfb"); + let rec_brfd = r.template("brfd"); + let rec_brib = r.template("brib"); + let rec_brid = r.template("brid"); + let rec_call_id = r.template("call_id"); + let rec_call_plt_id = r.template("call_plt_id"); + let rec_call_r = r.template("call_r"); + let rec_debugtrap = r.recipe("debugtrap"); + let rec_indirect_jmp = r.template("indirect_jmp"); + let rec_jmpb = r.template("jmpb"); + let rec_jmpd = r.template("jmpd"); + let rec_jt_base = r.template("jt_base"); + let rec_jt_entry = r.template("jt_entry"); + let rec_ret = r.template("ret"); + let rec_t8jccb_abcd = r.template("t8jccb_abcd"); + let rec_t8jccd_abcd = r.template("t8jccd_abcd"); + let rec_t8jccd_long = r.template("t8jccd_long"); + let rec_tjccb = r.template("tjccb"); + let rec_tjccd = r.template("tjccd"); + let rec_trap = r.template("trap"); + let rec_trapif = r.recipe("trapif"); + let rec_trapff = r.recipe("trapff"); + + // Predicates shorthands. + let is_pic = settings.predicate_by_name("is_pic"); + + // Call/return + + // 32-bit, both PIC and non-PIC. + e.enc32(call, rec_call_id.opcodes(&CALL_RELATIVE)); + + // 64-bit, colocated, both PIC and non-PIC. Use the call instruction's pc-relative field. + let is_colocated_func = InstructionPredicate::new_is_colocated_func(&*formats.call, "func_ref"); + e.enc64_instp(call, rec_call_id.opcodes(&CALL_RELATIVE), is_colocated_func); + + // 64-bit, non-colocated, PIC. There is no 64-bit non-colocated non-PIC version, since non-PIC + // is currently using the large model, which requires calls be lowered to + // func_addr+call_indirect. + e.enc64_isap(call, rec_call_plt_id.opcodes(&CALL_RELATIVE), is_pic); + + e.enc32( + call_indirect.bind(I32), + rec_call_r.opcodes(&JUMP_ABSOLUTE).rrr(2), + ); + e.enc64( + call_indirect.bind(I64), + rec_call_r.opcodes(&JUMP_ABSOLUTE).rrr(2).rex(), + ); + e.enc64( + call_indirect.bind(I64), + rec_call_r.opcodes(&JUMP_ABSOLUTE).rrr(2), + ); + + e.enc32(return_, rec_ret.opcodes(&RET_NEAR)); + e.enc64(return_, rec_ret.opcodes(&RET_NEAR)); + + // Branches. + e.enc32(jump, rec_jmpb.opcodes(&JUMP_SHORT)); + e.enc64(jump, rec_jmpb.opcodes(&JUMP_SHORT)); + e.enc32(jump, rec_jmpd.opcodes(&JUMP_NEAR_RELATIVE)); + e.enc64(jump, rec_jmpd.opcodes(&JUMP_NEAR_RELATIVE)); + + e.enc_both(brif, rec_brib.opcodes(&JUMP_SHORT_IF_OVERFLOW)); + e.enc_both(brif, rec_brid.opcodes(&JUMP_NEAR_IF_OVERFLOW)); + + // Not all float condition codes are legal, see `supported_floatccs`. + e.enc_both(brff, rec_brfb.opcodes(&JUMP_SHORT_IF_OVERFLOW)); + e.enc_both(brff, rec_brfd.opcodes(&JUMP_NEAR_IF_OVERFLOW)); + + // Note that the tjccd opcode will be prefixed with 0x0f. + e.enc_i32_i64_explicit_rex(brz, rec_tjccb.opcodes(&JUMP_SHORT_IF_EQUAL)); + e.enc_i32_i64_explicit_rex(brz, rec_tjccd.opcodes(&TEST_BYTE_REG)); + e.enc_i32_i64_explicit_rex(brnz, rec_tjccb.opcodes(&JUMP_SHORT_IF_NOT_EQUAL)); + e.enc_i32_i64_explicit_rex(brnz, rec_tjccd.opcodes(&TEST_REG)); + + // Branch on a b1 value in a register only looks at the low 8 bits. See also + // bint encodings below. + // + // Start with the worst-case encoding for X86_32 only. The register allocator + // can't handle a branch with an ABCD-constrained operand. + e.enc32(brz.bind(B1), rec_t8jccd_long.opcodes(&TEST_BYTE_REG)); + e.enc32(brnz.bind(B1), rec_t8jccd_long.opcodes(&TEST_REG)); + + e.enc_both(brz.bind(B1), rec_t8jccb_abcd.opcodes(&JUMP_SHORT_IF_EQUAL)); + e.enc_both(brz.bind(B1), rec_t8jccd_abcd.opcodes(&TEST_BYTE_REG)); + e.enc_both( + brnz.bind(B1), + rec_t8jccb_abcd.opcodes(&JUMP_SHORT_IF_NOT_EQUAL), + ); + e.enc_both(brnz.bind(B1), rec_t8jccd_abcd.opcodes(&TEST_REG)); + + // Jump tables. + e.enc64( + jump_table_entry.bind(I64), + rec_jt_entry.opcodes(&MOVSXD).rex().w(), + ); + e.enc32(jump_table_entry.bind(I32), rec_jt_entry.opcodes(&MOV_LOAD)); + + e.enc64( + jump_table_base.bind(I64), + rec_jt_base.opcodes(&LEA).rex().w(), + ); + e.enc32(jump_table_base.bind(I32), rec_jt_base.opcodes(&LEA)); + + e.enc_x86_64( + indirect_jump_table_br.bind(I64), + rec_indirect_jmp.opcodes(&JUMP_ABSOLUTE).rrr(4), + ); + e.enc32( + indirect_jump_table_br.bind(I32), + rec_indirect_jmp.opcodes(&JUMP_ABSOLUTE).rrr(4), + ); + + // Trap as ud2 + e.enc32(trap, rec_trap.opcodes(&UNDEFINED2)); + e.enc64(trap, rec_trap.opcodes(&UNDEFINED2)); + e.enc32(resumable_trap, rec_trap.opcodes(&UNDEFINED2)); + e.enc64(resumable_trap, rec_trap.opcodes(&UNDEFINED2)); + + // Debug trap as int3 + e.enc32_rec(debugtrap, rec_debugtrap, 0); + e.enc64_rec(debugtrap, rec_debugtrap, 0); + + e.enc32_rec(trapif, rec_trapif, 0); + e.enc64_rec(trapif, rec_trapif, 0); + e.enc32_rec(trapff, rec_trapff, 0); + e.enc64_rec(trapff, rec_trapff, 0); +} + +/// Reference type instructions. +#[inline(never)] +fn define_reftypes(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r: &RecipeGroup) { + let shared = &shared_defs.instructions; + + let is_null = shared.by_name("is_null"); + let null = shared.by_name("null"); + let safepoint = shared.by_name("safepoint"); + + let rec_is_zero = r.template("is_zero"); + let rec_pu_id_ref = r.template("pu_id_ref"); + let rec_safepoint = r.recipe("safepoint"); // Null references implemented as iconst 0. e.enc32(null.bind(R32), rec_pu_id_ref.opcodes(&MOV_IMM)); @@ -2163,6 +2347,28 @@ pub(crate) fn define( // safepoint instruction calls sink, no actual encoding. e.enc32_rec(safepoint, rec_safepoint, 0); e.enc64_rec(safepoint, rec_safepoint, 0); +} + +#[allow(clippy::cognitive_complexity)] +pub(crate) fn define( + shared_defs: &SharedDefinitions, + settings: &SettingGroup, + x86: &InstructionGroup, + r: &RecipeGroup, +) -> PerCpuModeEncodings { + // Definitions. + let mut e = PerCpuModeEncodings::new(); + + define_moves(&mut e, shared_defs, r); + define_memory(&mut e, shared_defs, x86, r); + define_fpu_moves(&mut e, shared_defs, r); + define_fpu_memory(&mut e, shared_defs, r); + define_fpu_ops(&mut e, shared_defs, settings, x86, r); + define_alu(&mut e, shared_defs, settings, x86, r); + define_simd(&mut e, shared_defs, settings, x86, r); + define_entity_ref(&mut e, shared_defs, settings, r); + define_control_flow(&mut e, shared_defs, settings, r); + define_reftypes(&mut e, shared_defs, r); e } diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs index 388e2cefad..5f9a525353 100644 --- a/cranelift/codegen/meta/src/shared/instructions.rs +++ b/cranelift/codegen/meta/src/shared/instructions.rs @@ -11,6 +11,476 @@ use crate::shared::formats::Formats; use crate::shared::types; use crate::shared::{entities::EntityRefs, immediates::Immediates}; +#[inline(never)] +fn define_control_flow( + ig: &mut InstructionGroupBuilder, + formats: &Formats, + imm: &Immediates, + entities: &EntityRefs, +) { + let EBB = &Operand::new("EBB", &entities.ebb).with_doc("Destination extended basic block"); + let args = &Operand::new("args", &entities.varargs).with_doc("EBB arguments"); + + ig.push( + Inst::new( + "jump", + r#" + Jump. + + Unconditionally jump to an extended basic block, passing the specified + EBB arguments. The number and types of arguments must match the + destination EBB. + "#, + &formats.jump, + ) + .operands_in(vec![EBB, args]) + .is_terminator(true) + .is_branch(true), + ); + + ig.push( + Inst::new( + "fallthrough", + r#" + Fall through to the next EBB. + + This is the same as `jump`, except the destination EBB must be + the next one in the layout. + + Jumps are turned into fall-through instructions by the branch + relaxation pass. There is no reason to use this instruction outside + that pass. + "#, + &formats.jump, + ) + .operands_in(vec![EBB, args]) + .is_terminator(true) + .is_branch(true), + ); + + let Testable = &TypeVar::new( + "Testable", + "A scalar boolean or integer type", + TypeSetBuilder::new() + .ints(Interval::All) + .bools(Interval::All) + .build(), + ); + + { + let c = &Operand::new("c", Testable).with_doc("Controlling value to test"); + + ig.push( + Inst::new( + "brz", + r#" + Branch when zero. + + If ``c`` is a `b1` value, take the branch when ``c`` is false. If + ``c`` is an integer value, take the branch when ``c = 0``. + "#, + &formats.branch, + ) + .operands_in(vec![c, EBB, args]) + .is_branch(true), + ); + + ig.push( + Inst::new( + "brnz", + r#" + Branch when non-zero. + + If ``c`` is a `b1` value, take the branch when ``c`` is true. If + ``c`` is an integer value, take the branch when ``c != 0``. + "#, + &formats.branch, + ) + .operands_in(vec![c, EBB, args]) + .is_branch(true), + ); + } + + let iB = &TypeVar::new( + "iB", + "A scalar integer type", + TypeSetBuilder::new().ints(Interval::All).build(), + ); + let iflags: &TypeVar = &ValueType::Special(types::Flag::IFlags.into()).into(); + let fflags: &TypeVar = &ValueType::Special(types::Flag::FFlags.into()).into(); + + { + let Cond = &Operand::new("Cond", &imm.intcc); + let x = &Operand::new("x", iB); + let y = &Operand::new("y", iB); + + ig.push( + Inst::new( + "br_icmp", + r#" + Compare scalar integers and branch. + + Compare ``x`` and ``y`` in the same way as the `icmp` instruction + and take the branch if the condition is true: + + ```text + br_icmp ugt v1, v2, ebb4(v5, v6) + ``` + + is semantically equivalent to: + + ```text + v10 = icmp ugt, v1, v2 + brnz v10, ebb4(v5, v6) + ``` + + Some RISC architectures like MIPS and RISC-V provide instructions that + implement all or some of the condition codes. The instruction can also + be used to represent *macro-op fusion* on architectures like Intel's. + "#, + &formats.branch_icmp, + ) + .operands_in(vec![Cond, x, y, EBB, args]) + .is_branch(true), + ); + + let f = &Operand::new("f", iflags); + + ig.push( + Inst::new( + "brif", + r#" + Branch when condition is true in integer CPU flags. + "#, + &formats.branch_int, + ) + .operands_in(vec![Cond, f, EBB, args]) + .is_branch(true), + ); + } + + { + let Cond = &Operand::new("Cond", &imm.floatcc); + + let f = &Operand::new("f", fflags); + + ig.push( + Inst::new( + "brff", + r#" + Branch when condition is true in floating point CPU flags. + "#, + &formats.branch_float, + ) + .operands_in(vec![Cond, f, EBB, args]) + .is_branch(true), + ); + } + + { + let x = &Operand::new("x", iB).with_doc("index into jump table"); + let JT = &Operand::new("JT", &entities.jump_table); + + ig.push( + Inst::new( + "br_table", + r#" + Indirect branch via jump table. + + Use ``x`` as an unsigned index into the jump table ``JT``. If a jump + table entry is found, branch to the corresponding EBB. If no entry was + found or the index is out-of-bounds, branch to the given default EBB. + + Note that this branch instruction can't pass arguments to the targeted + blocks. Split critical edges as needed to work around this. + + Do not confuse this with "tables" in WebAssembly. ``br_table`` is for + jump tables with destinations within the current function only -- think + of a ``match`` in Rust or a ``switch`` in C. If you want to call a + function in a dynamic library, that will typically use + ``call_indirect``. + "#, + &formats.branch_table, + ) + .operands_in(vec![x, EBB, JT]) + .is_terminator(true) + .is_branch(true), + ); + } + + let iAddr = &TypeVar::new( + "iAddr", + "An integer address type", + TypeSetBuilder::new().ints(32..64).build(), + ); + + { + let x = &Operand::new("x", iAddr).with_doc("index into jump table"); + let addr = &Operand::new("addr", iAddr); + let Size = &Operand::new("Size", &imm.uimm8).with_doc("Size in bytes"); + let JT = &Operand::new("JT", &entities.jump_table); + let entry = &Operand::new("entry", iAddr).with_doc("entry of jump table"); + + ig.push( + Inst::new( + "jump_table_entry", + r#" + Get an entry from a jump table. + + Load a serialized ``entry`` from a jump table ``JT`` at a given index + ``addr`` with a specific ``Size``. The retrieved entry may need to be + decoded after loading, depending upon the jump table type used. + + Currently, the only type supported is entries which are relative to the + base of the jump table. + "#, + &formats.branch_table_entry, + ) + .operands_in(vec![x, addr, Size, JT]) + .operands_out(vec![entry]) + .can_load(true), + ); + + ig.push( + Inst::new( + "jump_table_base", + r#" + Get the absolute base address of a jump table. + + This is used for jump tables wherein the entries are stored relative to + the base of jump table. In order to use these, generated code should first + load an entry using ``jump_table_entry``, then use this instruction to add + the relative base back to it. + "#, + &formats.branch_table_base, + ) + .operands_in(vec![JT]) + .operands_out(vec![addr]), + ); + + ig.push( + Inst::new( + "indirect_jump_table_br", + r#" + Branch indirectly via a jump table entry. + + Unconditionally jump via a jump table entry that was previously loaded + with the ``jump_table_entry`` instruction. + "#, + &formats.indirect_jump, + ) + .operands_in(vec![addr, JT]) + .is_indirect_branch(true) + .is_terminator(true) + .is_branch(true), + ); + } + + ig.push( + Inst::new( + "debugtrap", + r#" + Encodes an assembly debug trap. + "#, + &formats.nullary, + ) + .other_side_effects(true) + .can_load(true) + .can_store(true), + ); + + { + let code = &Operand::new("code", &imm.trapcode); + ig.push( + Inst::new( + "trap", + r#" + Terminate execution unconditionally. + "#, + &formats.trap, + ) + .operands_in(vec![code]) + .can_trap(true) + .is_terminator(true), + ); + + let c = &Operand::new("c", Testable).with_doc("Controlling value to test"); + ig.push( + Inst::new( + "trapz", + r#" + Trap when zero. + + if ``c`` is non-zero, execution continues at the following instruction. + "#, + &formats.cond_trap, + ) + .operands_in(vec![c, code]) + .can_trap(true), + ); + + ig.push( + Inst::new( + "resumable_trap", + r#" + A resumable trap. + + This instruction allows non-conditional traps to be used as non-terminal instructions. + "#, + &formats.trap, + ) + .operands_in(vec![code]) + .can_trap(true), + ); + + let c = &Operand::new("c", Testable).with_doc("Controlling value to test"); + ig.push( + Inst::new( + "trapnz", + r#" + Trap when non-zero. + + if ``c`` is zero, execution continues at the following instruction. + "#, + &formats.cond_trap, + ) + .operands_in(vec![c, code]) + .can_trap(true), + ); + + let Cond = &Operand::new("Cond", &imm.intcc); + let f = &Operand::new("f", iflags); + ig.push( + Inst::new( + "trapif", + r#" + Trap when condition is true in integer CPU flags. + "#, + &formats.int_cond_trap, + ) + .operands_in(vec![Cond, f, code]) + .can_trap(true), + ); + + let Cond = &Operand::new("Cond", &imm.floatcc); + let f = &Operand::new("f", fflags); + let code = &Operand::new("code", &imm.trapcode); + ig.push( + Inst::new( + "trapff", + r#" + Trap when condition is true in floating point CPU flags. + "#, + &formats.float_cond_trap, + ) + .operands_in(vec![Cond, f, code]) + .can_trap(true), + ); + } + + let rvals = &Operand::new("rvals", &entities.varargs).with_doc("return values"); + ig.push( + Inst::new( + "return", + r#" + Return from the function. + + Unconditionally transfer control to the calling function, passing the + provided return values. The list of return values must match the + function signature's return types. + "#, + &formats.multiary, + ) + .operands_in(vec![rvals]) + .is_return(true) + .is_terminator(true), + ); + + let rvals = &Operand::new("rvals", &entities.varargs).with_doc("return values"); + ig.push( + Inst::new( + "fallthrough_return", + r#" + Return from the function by fallthrough. + + This is a specialized instruction for use where one wants to append + a custom epilogue, which will then perform the real return. This + instruction has no encoding. + "#, + &formats.multiary, + ) + .operands_in(vec![rvals]) + .is_return(true) + .is_terminator(true), + ); + + let FN = &Operand::new("FN", &entities.func_ref) + .with_doc("function to call, declared by `function`"); + let args = &Operand::new("args", &entities.varargs).with_doc("call arguments"); + let rvals = &Operand::new("rvals", &entities.varargs).with_doc("return values"); + ig.push( + Inst::new( + "call", + r#" + Direct function call. + + Call a function which has been declared in the preamble. The argument + types must match the function's signature. + "#, + &formats.call, + ) + .operands_in(vec![FN, args]) + .operands_out(vec![rvals]) + .is_call(true), + ); + + let SIG = &Operand::new("SIG", &entities.sig_ref).with_doc("function signature"); + let callee = &Operand::new("callee", iAddr).with_doc("address of function to call"); + let args = &Operand::new("args", &entities.varargs).with_doc("call arguments"); + let rvals = &Operand::new("rvals", &entities.varargs).with_doc("return values"); + ig.push( + Inst::new( + "call_indirect", + r#" + Indirect function call. + + Call the function pointed to by `callee` with the given arguments. The + called function must match the specified signature. + + Note that this is different from WebAssembly's ``call_indirect``; the + callee is a native address, rather than a table index. For WebAssembly, + `table_addr` and `load` are used to obtain a native address + from a table. + "#, + &formats.call_indirect, + ) + .operands_in(vec![SIG, callee, args]) + .operands_out(vec![rvals]) + .is_call(true), + ); + + let FN = &Operand::new("FN", &entities.func_ref) + .with_doc("function to call, declared by `function`"); + let addr = &Operand::new("addr", iAddr); + ig.push( + Inst::new( + "func_addr", + r#" + Get the address of a function. + + Compute the absolute address of a function declared in the preamble. + The returned address can be used as a ``callee`` argument to + `call_indirect`. This is also a method for calling functions that + are too far away to be addressable by a direct `call` + instruction. + "#, + &formats.func_addr, + ) + .operands_in(vec![FN]) + .operands_out(vec![addr]), + ); +} + #[allow(clippy::many_single_char_names)] pub(crate) fn define( all_instructions: &mut AllInstructions, @@ -20,6 +490,8 @@ pub(crate) fn define( ) -> InstructionGroup { let mut ig = InstructionGroupBuilder::new(all_instructions); + define_control_flow(&mut ig, formats, imm, entities); + // Operand kind shorthands. let iflags: &TypeVar = &ValueType::Special(types::Flag::IFlags.into()).into(); let fflags: &TypeVar = &ValueType::Special(types::Flag::FFlags.into()).into(); @@ -114,426 +586,6 @@ pub(crate) fn define( let MemTo = &TypeVar::copy_from(Mem, "MemTo".to_string()); let addr = &Operand::new("addr", iAddr); - let c = &Operand::new("c", Testable).with_doc("Controlling value to test"); - let Cond = &Operand::new("Cond", &imm.intcc); - let x = &Operand::new("x", iB); - let y = &Operand::new("y", iB); - let EBB = &Operand::new("EBB", &entities.ebb).with_doc("Destination extended basic block"); - let args = &Operand::new("args", &entities.varargs).with_doc("EBB arguments"); - - ig.push( - Inst::new( - "jump", - r#" - Jump. - - Unconditionally jump to an extended basic block, passing the specified - EBB arguments. The number and types of arguments must match the - destination EBB. - "#, - &formats.jump, - ) - .operands_in(vec![EBB, args]) - .is_terminator(true) - .is_branch(true), - ); - - ig.push( - Inst::new( - "fallthrough", - r#" - Fall through to the next EBB. - - This is the same as `jump`, except the destination EBB must be - the next one in the layout. - - Jumps are turned into fall-through instructions by the branch - relaxation pass. There is no reason to use this instruction outside - that pass. - "#, - &formats.jump, - ) - .operands_in(vec![EBB, args]) - .is_terminator(true) - .is_branch(true), - ); - - ig.push( - Inst::new( - "brz", - r#" - Branch when zero. - - If ``c`` is a `b1` value, take the branch when ``c`` is false. If - ``c`` is an integer value, take the branch when ``c = 0``. - "#, - &formats.branch, - ) - .operands_in(vec![c, EBB, args]) - .is_branch(true), - ); - - ig.push( - Inst::new( - "brnz", - r#" - Branch when non-zero. - - If ``c`` is a `b1` value, take the branch when ``c`` is true. If - ``c`` is an integer value, take the branch when ``c != 0``. - "#, - &formats.branch, - ) - .operands_in(vec![c, EBB, args]) - .is_branch(true), - ); - - ig.push( - Inst::new( - "br_icmp", - r#" - Compare scalar integers and branch. - - Compare ``x`` and ``y`` in the same way as the `icmp` instruction - and take the branch if the condition is true: - - ```text - br_icmp ugt v1, v2, ebb4(v5, v6) - ``` - - is semantically equivalent to: - - ```text - v10 = icmp ugt, v1, v2 - brnz v10, ebb4(v5, v6) - ``` - - Some RISC architectures like MIPS and RISC-V provide instructions that - implement all or some of the condition codes. The instruction can also - be used to represent *macro-op fusion* on architectures like Intel's. - "#, - &formats.branch_icmp, - ) - .operands_in(vec![Cond, x, y, EBB, args]) - .is_branch(true), - ); - - let f = &Operand::new("f", iflags); - - ig.push( - Inst::new( - "brif", - r#" - Branch when condition is true in integer CPU flags. - "#, - &formats.branch_int, - ) - .operands_in(vec![Cond, f, EBB, args]) - .is_branch(true), - ); - - let Cond = &Operand::new("Cond", &imm.floatcc); - let f = &Operand::new("f", fflags); - - ig.push( - Inst::new( - "brff", - r#" - Branch when condition is true in floating point CPU flags. - "#, - &formats.branch_float, - ) - .operands_in(vec![Cond, f, EBB, args]) - .is_branch(true), - ); - - // The index into the br_table can be any type; legalizer will convert it to the right type. - let x = &Operand::new("x", iB).with_doc("index into jump table"); - let entry = &Operand::new("entry", iAddr).with_doc("entry of jump table"); - let JT = &Operand::new("JT", &entities.jump_table); - - ig.push( - Inst::new( - "br_table", - r#" - Indirect branch via jump table. - - Use ``x`` as an unsigned index into the jump table ``JT``. If a jump - table entry is found, branch to the corresponding EBB. If no entry was - found or the index is out-of-bounds, branch to the given default EBB. - - Note that this branch instruction can't pass arguments to the targeted - blocks. Split critical edges as needed to work around this. - - Do not confuse this with "tables" in WebAssembly. ``br_table`` is for - jump tables with destinations within the current function only -- think - of a ``match`` in Rust or a ``switch`` in C. If you want to call a - function in a dynamic library, that will typically use - ``call_indirect``. - "#, - &formats.branch_table, - ) - .operands_in(vec![x, EBB, JT]) - .is_terminator(true) - .is_branch(true), - ); - - // These are the instructions which br_table legalizes to: they perform address computations, - // using pointer-sized integers, so their type variables are more constrained. - let x = &Operand::new("x", iAddr).with_doc("index into jump table"); - let Size = &Operand::new("Size", &imm.uimm8).with_doc("Size in bytes"); - - ig.push( - Inst::new( - "jump_table_entry", - r#" - Get an entry from a jump table. - - Load a serialized ``entry`` from a jump table ``JT`` at a given index - ``addr`` with a specific ``Size``. The retrieved entry may need to be - decoded after loading, depending upon the jump table type used. - - Currently, the only type supported is entries which are relative to the - base of the jump table. - "#, - &formats.branch_table_entry, - ) - .operands_in(vec![x, addr, Size, JT]) - .operands_out(vec![entry]) - .can_load(true), - ); - - ig.push( - Inst::new( - "jump_table_base", - r#" - Get the absolute base address of a jump table. - - This is used for jump tables wherein the entries are stored relative to - the base of jump table. In order to use these, generated code should first - load an entry using ``jump_table_entry``, then use this instruction to add - the relative base back to it. - "#, - &formats.branch_table_base, - ) - .operands_in(vec![JT]) - .operands_out(vec![addr]), - ); - - ig.push( - Inst::new( - "indirect_jump_table_br", - r#" - Branch indirectly via a jump table entry. - - Unconditionally jump via a jump table entry that was previously loaded - with the ``jump_table_entry`` instruction. - "#, - &formats.indirect_jump, - ) - .operands_in(vec![addr, JT]) - .is_indirect_branch(true) - .is_terminator(true) - .is_branch(true), - ); - - ig.push( - Inst::new( - "debugtrap", - r#" - Encodes an assembly debug trap. - "#, - &formats.nullary, - ) - .other_side_effects(true) - .can_load(true) - .can_store(true), - ); - - let code = &Operand::new("code", &imm.trapcode); - - ig.push( - Inst::new( - "trap", - r#" - Terminate execution unconditionally. - "#, - &formats.trap, - ) - .operands_in(vec![code]) - .can_trap(true) - .is_terminator(true), - ); - - ig.push( - Inst::new( - "trapz", - r#" - Trap when zero. - - if ``c`` is non-zero, execution continues at the following instruction. - "#, - &formats.cond_trap, - ) - .operands_in(vec![c, code]) - .can_trap(true), - ); - - ig.push( - Inst::new( - "resumable_trap", - r#" - A resumable trap. - - This instruction allows non-conditional traps to be used as non-terminal instructions. - "#, - &formats.trap, - ) - .operands_in(vec![code]) - .can_trap(true), - ); - - ig.push( - Inst::new( - "trapnz", - r#" - Trap when non-zero. - - if ``c`` is zero, execution continues at the following instruction. - "#, - &formats.cond_trap, - ) - .operands_in(vec![c, code]) - .can_trap(true), - ); - - let Cond = &Operand::new("Cond", &imm.intcc); - let f = &Operand::new("f", iflags); - - ig.push( - Inst::new( - "trapif", - r#" - Trap when condition is true in integer CPU flags. - "#, - &formats.int_cond_trap, - ) - .operands_in(vec![Cond, f, code]) - .can_trap(true), - ); - - let Cond = &Operand::new("Cond", &imm.floatcc); - let f = &Operand::new("f", fflags); - - ig.push( - Inst::new( - "trapff", - r#" - Trap when condition is true in floating point CPU flags. - "#, - &formats.float_cond_trap, - ) - .operands_in(vec![Cond, f, code]) - .can_trap(true), - ); - - let rvals = &Operand::new("rvals", &entities.varargs).with_doc("return values"); - - ig.push( - Inst::new( - "return", - r#" - Return from the function. - - Unconditionally transfer control to the calling function, passing the - provided return values. The list of return values must match the - function signature's return types. - "#, - &formats.multiary, - ) - .operands_in(vec![rvals]) - .is_return(true) - .is_terminator(true), - ); - - ig.push( - Inst::new( - "fallthrough_return", - r#" - Return from the function by fallthrough. - - This is a specialized instruction for use where one wants to append - a custom epilogue, which will then perform the real return. This - instruction has no encoding. - "#, - &formats.multiary, - ) - .operands_in(vec![rvals]) - .is_return(true) - .is_terminator(true), - ); - - let FN = &Operand::new("FN", &entities.func_ref) - .with_doc("function to call, declared by `function`"); - let args = &Operand::new("args", &entities.varargs).with_doc("call arguments"); - - ig.push( - Inst::new( - "call", - r#" - Direct function call. - - Call a function which has been declared in the preamble. The argument - types must match the function's signature. - "#, - &formats.call, - ) - .operands_in(vec![FN, args]) - .operands_out(vec![rvals]) - .is_call(true), - ); - - let SIG = &Operand::new("SIG", &entities.sig_ref).with_doc("function signature"); - let callee = &Operand::new("callee", iAddr).with_doc("address of function to call"); - - ig.push( - Inst::new( - "call_indirect", - r#" - Indirect function call. - - Call the function pointed to by `callee` with the given arguments. The - called function must match the specified signature. - - Note that this is different from WebAssembly's ``call_indirect``; the - callee is a native address, rather than a table index. For WebAssembly, - `table_addr` and `load` are used to obtain a native address - from a table. - "#, - &formats.call_indirect, - ) - .operands_in(vec![SIG, callee, args]) - .operands_out(vec![rvals]) - .is_call(true), - ); - - ig.push( - Inst::new( - "func_addr", - r#" - Get the address of a function. - - Compute the absolute address of a function declared in the preamble. - The returned address can be used as a ``callee`` argument to - `call_indirect`. This is also a method for calling functions that - are too far away to be addressable by a direct `call` - instruction. - "#, - &formats.func_addr, - ) - .operands_in(vec![FN]) - .operands_out(vec![addr]), - ); let SS = &Operand::new("SS", &entities.stack_slot); let Offset = &Operand::new("Offset", &imm.offset32).with_doc("Byte offset from base address");