diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs index fc385d512a..01f25d9d7f 100644 --- a/cranelift/codegen/meta/src/isa/x86/encodings.rs +++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs @@ -17,6 +17,8 @@ use crate::shared::types::Int::{I16, I32, I64, I8}; use crate::shared::types::Reference::{R32, R64}; use crate::shared::Definitions as SharedDefinitions; +use crate::isa::x86::opcodes::*; + use super::recipes::{RecipeGroup, Template}; pub(crate) struct PerCpuModeEncodings { @@ -681,91 +683,91 @@ pub(crate) fn define( e.enc64_rec(get_pinned_reg.bind(I64), rec_get_pinned_reg, 0); e.enc_x86_64( set_pinned_reg.bind(I64), - rec_set_pinned_reg.opcodes(vec![0x89]).rex().w(), + rec_set_pinned_reg.opcodes(&MOV_STORE).rex().w(), ); - e.enc_i32_i64(iadd, rec_rr.opcodes(vec![0x01])); - e.enc_i32_i64(iadd_ifcout, rec_rout.opcodes(vec![0x01])); - e.enc_i32_i64(iadd_ifcin, rec_rin.opcodes(vec![0x11])); - e.enc_i32_i64(iadd_ifcarry, rec_rio.opcodes(vec![0x11])); + e.enc_i32_i64(iadd, rec_rr.opcodes(&ADD)); + e.enc_i32_i64(iadd_ifcout, rec_rout.opcodes(&ADD)); + e.enc_i32_i64(iadd_ifcin, rec_rin.opcodes(&ADC)); + e.enc_i32_i64(iadd_ifcarry, rec_rio.opcodes(&ADC)); - e.enc_i32_i64(isub, rec_rr.opcodes(vec![0x29])); - e.enc_i32_i64(isub_ifbout, rec_rout.opcodes(vec![0x29])); - e.enc_i32_i64(isub_ifbin, rec_rin.opcodes(vec![0x19])); - e.enc_i32_i64(isub_ifborrow, rec_rio.opcodes(vec![0x19])); + e.enc_i32_i64(isub, rec_rr.opcodes(&SUB)); + e.enc_i32_i64(isub_ifbout, rec_rout.opcodes(&SUB)); + e.enc_i32_i64(isub_ifbin, rec_rin.opcodes(&SBB)); + e.enc_i32_i64(isub_ifborrow, rec_rio.opcodes(&SBB)); - e.enc_i32_i64(band, rec_rr.opcodes(vec![0x21])); - e.enc_b32_b64(band, rec_rr.opcodes(vec![0x21])); - e.enc_i32_i64(bor, rec_rr.opcodes(vec![0x09])); - e.enc_b32_b64(bor, rec_rr.opcodes(vec![0x09])); - e.enc_i32_i64(bxor, rec_rr.opcodes(vec![0x31])); - e.enc_b32_b64(bxor, rec_rr.opcodes(vec![0x31])); + e.enc_i32_i64(band, rec_rr.opcodes(&AND)); + e.enc_b32_b64(band, rec_rr.opcodes(&AND)); + e.enc_i32_i64(bor, rec_rr.opcodes(&OR)); + e.enc_b32_b64(bor, rec_rr.opcodes(&OR)); + e.enc_i32_i64(bxor, rec_rr.opcodes(&XOR)); + e.enc_b32_b64(bxor, rec_rr.opcodes(&XOR)); // x86 has a bitwise not instruction NOT. - e.enc_i32_i64(bnot, rec_ur.opcodes(vec![0xf7]).rrr(2)); - e.enc_b32_b64(bnot, rec_ur.opcodes(vec![0xf7]).rrr(2)); + e.enc_i32_i64(bnot, rec_ur.opcodes(&NOT).rrr(2)); + e.enc_b32_b64(bnot, rec_ur.opcodes(&NOT).rrr(2)); // Also add a `b1` encodings for the logic instructions. // TODO: Should this be done with 8-bit instructions? It would improve partial register // dependencies. - e.enc_both(band.bind(B1), rec_rr.opcodes(vec![0x21])); - e.enc_both(bor.bind(B1), rec_rr.opcodes(vec![0x09])); - e.enc_both(bxor.bind(B1), rec_rr.opcodes(vec![0x31])); + e.enc_both(band.bind(B1), rec_rr.opcodes(&AND)); + e.enc_both(bor.bind(B1), rec_rr.opcodes(&OR)); + e.enc_both(bxor.bind(B1), rec_rr.opcodes(&XOR)); - e.enc_i32_i64(imul, rec_rrx.opcodes(vec![0x0f, 0xaf])); - e.enc_i32_i64(x86_sdivmodx, rec_div.opcodes(vec![0xf7]).rrr(7)); - e.enc_i32_i64(x86_udivmodx, rec_div.opcodes(vec![0xf7]).rrr(6)); + e.enc_i32_i64(imul, rec_rrx.opcodes(&IMUL)); + e.enc_i32_i64(x86_sdivmodx, rec_div.opcodes(&IDIV).rrr(7)); + e.enc_i32_i64(x86_udivmodx, rec_div.opcodes(&DIV).rrr(6)); - e.enc_i32_i64(x86_smulx, rec_mulx.opcodes(vec![0xf7]).rrr(5)); - e.enc_i32_i64(x86_umulx, rec_mulx.opcodes(vec![0xf7]).rrr(4)); + e.enc_i32_i64(x86_smulx, rec_mulx.opcodes(&IMUL_RDX_RAX).rrr(5)); + e.enc_i32_i64(x86_umulx, rec_mulx.opcodes(&MUL).rrr(4)); - e.enc_i32_i64(copy, rec_umr.opcodes(vec![0x89])); - e.enc_r32_r64_rex_only(copy, rec_umr.opcodes(vec![0x89])); - e.enc_both(copy.bind(B1), rec_umr.opcodes(vec![0x89])); - e.enc_both(copy.bind(I8), rec_umr.opcodes(vec![0x89])); - e.enc_both(copy.bind(I16), rec_umr.opcodes(vec![0x89])); + e.enc_i32_i64(copy, rec_umr.opcodes(&MOV_STORE)); + e.enc_r32_r64_rex_only(copy, rec_umr.opcodes(&MOV_STORE)); + e.enc_both(copy.bind(B1), rec_umr.opcodes(&MOV_STORE)); + e.enc_both(copy.bind(I8), rec_umr.opcodes(&MOV_STORE)); + e.enc_both(copy.bind(I16), rec_umr.opcodes(&MOV_STORE)); // TODO For x86-64, only define REX forms for now, since we can't describe the // special regunit immediate operands with the current constraint language. for &ty in &[I8, I16, I32] { - e.enc32(regmove.bind(ty), rec_rmov.opcodes(vec![0x89])); - e.enc64(regmove.bind(ty), rec_rmov.opcodes(vec![0x89]).rex()); + e.enc32(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE)); + e.enc64(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE).rex()); } for &ty in &[B8, B16, B32] { - e.enc32(regmove.bind(ty), rec_rmov.opcodes(vec![0x89])); - e.enc64(regmove.bind(ty), rec_rmov.opcodes(vec![0x89]).rex()); + e.enc32(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE)); + e.enc64(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE).rex()); } - e.enc64(regmove.bind(I64), rec_rmov.opcodes(vec![0x89]).rex().w()); - e.enc64(regmove.bind(B64), rec_rmov.opcodes(vec![0x89]).rex().w()); - e.enc_both(regmove.bind(B1), rec_rmov.opcodes(vec![0x89])); - e.enc_both(regmove.bind(I8), rec_rmov.opcodes(vec![0x89])); - e.enc32(regmove.bind_ref(R32), rec_rmov.opcodes(vec![0x89])); - e.enc64(regmove.bind_ref(R32), rec_rmov.opcodes(vec![0x89]).rex()); + e.enc64(regmove.bind(I64), rec_rmov.opcodes(&MOV_STORE).rex().w()); + e.enc64(regmove.bind(B64), rec_rmov.opcodes(&MOV_STORE).rex().w()); + e.enc_both(regmove.bind(B1), rec_rmov.opcodes(&MOV_STORE)); + e.enc_both(regmove.bind(I8), rec_rmov.opcodes(&MOV_STORE)); + e.enc32(regmove.bind_ref(R32), rec_rmov.opcodes(&MOV_STORE)); + e.enc64(regmove.bind_ref(R32), rec_rmov.opcodes(&MOV_STORE).rex()); e.enc64( regmove.bind_ref(R64), - rec_rmov.opcodes(vec![0x89]).rex().w(), + rec_rmov.opcodes(&MOV_STORE).rex().w(), ); - e.enc_i32_i64(iadd_imm, rec_r_ib.opcodes(vec![0x83]).rrr(0)); - e.enc_i32_i64(iadd_imm, rec_r_id.opcodes(vec![0x81]).rrr(0)); + e.enc_i32_i64(iadd_imm, rec_r_ib.opcodes(&ADD_IMM8_SIGN_EXTEND).rrr(0)); + e.enc_i32_i64(iadd_imm, rec_r_id.opcodes(&ADD_IMM).rrr(0)); - e.enc_i32_i64(band_imm, rec_r_ib.opcodes(vec![0x83]).rrr(4)); - e.enc_i32_i64(band_imm, rec_r_id.opcodes(vec![0x81]).rrr(4)); + e.enc_i32_i64(band_imm, rec_r_ib.opcodes(&AND_IMM8_SIGN_EXTEND).rrr(4)); + e.enc_i32_i64(band_imm, rec_r_id.opcodes(&AND_IMM).rrr(4)); - e.enc_i32_i64(bor_imm, rec_r_ib.opcodes(vec![0x83]).rrr(1)); - e.enc_i32_i64(bor_imm, rec_r_id.opcodes(vec![0x81]).rrr(1)); + e.enc_i32_i64(bor_imm, rec_r_ib.opcodes(&OR_IMM8_SIGN_EXTEND).rrr(1)); + e.enc_i32_i64(bor_imm, rec_r_id.opcodes(&OR_IMM).rrr(1)); - e.enc_i32_i64(bxor_imm, rec_r_ib.opcodes(vec![0x83]).rrr(6)); - e.enc_i32_i64(bxor_imm, rec_r_id.opcodes(vec![0x81]).rrr(6)); + e.enc_i32_i64(bxor_imm, rec_r_ib.opcodes(&XOR_IMM8_SIGN_EXTEND).rrr(6)); + e.enc_i32_i64(bxor_imm, rec_r_id.opcodes(&XOR_IMM).rrr(6)); // TODO: band_imm.i64 with an unsigned 32-bit immediate can be encoded as band_imm.i32. Can // even use the single-byte immediate for 0xffff_ffXX masks. // Immediate constants. - e.enc32(iconst.bind(I32), rec_pu_id.opcodes(vec![0xb8])); + e.enc32(iconst.bind(I32), rec_pu_id.opcodes(&MOV_IMM)); - e.enc64(iconst.bind(I32), rec_pu_id.rex().opcodes(vec![0xb8])); - e.enc64(iconst.bind(I32), rec_pu_id.opcodes(vec![0xb8])); + e.enc64(iconst.bind(I32), rec_pu_id.rex().opcodes(&MOV_IMM)); + e.enc64(iconst.bind(I32), rec_pu_id.opcodes(&MOV_IMM)); // The 32-bit immediate movl also zero-extends to 64 bits. let f_unary_imm = formats.get(formats.by_name("UnaryImm")); @@ -773,34 +775,32 @@ pub(crate) fn define( e.enc64_func( iconst.bind(I64), - rec_pu_id.opcodes(vec![0xb8]).rex(), + rec_pu_id.opcodes(&MOV_IMM).rex(), |encoding| encoding.inst_predicate(is_unsigned_int32.clone()), ); - e.enc64_func( - iconst.bind(I64), - rec_pu_id.opcodes(vec![0xb8]), - |encoding| encoding.inst_predicate(is_unsigned_int32), - ); + e.enc64_func(iconst.bind(I64), rec_pu_id.opcodes(&MOV_IMM), |encoding| { + encoding.inst_predicate(is_unsigned_int32) + }); // Sign-extended 32-bit immediate. e.enc64( iconst.bind(I64), - rec_u_id.rex().opcodes(vec![0xc7]).rrr(0).w(), + rec_u_id.rex().opcodes(&MOV_IMM_SIGNEXTEND).rrr(0).w(), ); - // Finally, the 0xb8 opcode takes an 8-byte immediate with a REX.W prefix. - e.enc64(iconst.bind(I64), rec_pu_iq.opcodes(vec![0xb8]).rex().w()); + // Finally, the MOV_IMM opcode takes an 8-byte immediate with a REX.W prefix. + e.enc64(iconst.bind(I64), rec_pu_iq.opcodes(&MOV_IMM).rex().w()); // Bool constants (uses MOV) for &ty in &[B1, B8, B16, B32] { - e.enc_both(bconst.bind(ty), rec_pu_id_bool.opcodes(vec![0xb8])); + e.enc_both(bconst.bind(ty), rec_pu_id_bool.opcodes(&MOV_IMM)); } - e.enc64(bconst.bind(B64), rec_pu_id_bool.opcodes(vec![0xb8]).rex()); + e.enc64(bconst.bind(B64), rec_pu_id_bool.opcodes(&MOV_IMM).rex()); let is_zero_int = InstructionPredicate::new_is_zero_int(f_unary_imm, "imm"); e.enc_both_instp( iconst.bind(I8), - rec_u_id_z.opcodes(vec![0x30]), + rec_u_id_z.opcodes(&XORB), is_zero_int.clone(), ); // You may expect that i16 encodings would have an 0x66 prefix on the opcode to indicate that @@ -812,19 +812,15 @@ pub(crate) fn define( // an appropriate i16 encoding available. e.enc_both_instp( iconst.bind(I16), - rec_u_id_z.opcodes(vec![0x31]), + rec_u_id_z.opcodes(&XOR), is_zero_int.clone(), ); e.enc_both_instp( iconst.bind(I32), - rec_u_id_z.opcodes(vec![0x31]), + rec_u_id_z.opcodes(&XOR), is_zero_int.clone(), ); - e.enc_x86_64_instp( - iconst.bind(I64), - rec_u_id_z.opcodes(vec![0x31]), - is_zero_int, - ); + e.enc_x86_64_instp(iconst.bind(I64), rec_u_id_z.opcodes(&XOR), is_zero_int); // Shifts and rotates. // Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit @@ -835,97 +831,49 @@ pub(crate) fn define( // to bind any. e.enc32( inst.bind(I32).bind_any(), - rec_rc.opcodes(vec![0xd3]).rrr(rrr), + rec_rc.opcodes(&ROTATE_CL).rrr(rrr), ); e.enc64( inst.bind(I64).bind_any(), - rec_rc.opcodes(vec![0xd3]).rrr(rrr).rex().w(), + rec_rc.opcodes(&ROTATE_CL).rrr(rrr).rex().w(), ); e.enc64( inst.bind(I32).bind_any(), - rec_rc.opcodes(vec![0xd3]).rrr(rrr).rex(), + rec_rc.opcodes(&ROTATE_CL).rrr(rrr).rex(), ); e.enc64( inst.bind(I32).bind_any(), - rec_rc.opcodes(vec![0xd3]).rrr(rrr), + rec_rc.opcodes(&ROTATE_CL).rrr(rrr), ); } - for &(inst, rrr) in &[ - (rotl_imm, 0), - (rotr_imm, 1), - (ishl_imm, 4), - (ushr_imm, 5), - (sshr_imm, 7), - ] { - e.enc_i32_i64(inst, rec_r_ib.opcodes(vec![0xc1]).rrr(rrr)); - } + e.enc_i32_i64(rotl_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(0)); + e.enc_i32_i64(rotr_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(1)); + e.enc_i32_i64(ishl_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(4)); + e.enc_i32_i64(ushr_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(5)); + e.enc_i32_i64(sshr_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(7)); // Population count. - e.enc32_isap( - popcnt.bind(I32), - rec_urm.opcodes(vec![0xf3, 0x0f, 0xb8]), - use_popcnt, - ); + e.enc32_isap(popcnt.bind(I32), rec_urm.opcodes(&POPCNT), use_popcnt); e.enc64_isap( popcnt.bind(I64), - rec_urm.opcodes(vec![0xf3, 0x0f, 0xb8]).rex().w(), - use_popcnt, - ); - e.enc64_isap( - popcnt.bind(I32), - rec_urm.opcodes(vec![0xf3, 0x0f, 0xb8]).rex(), - use_popcnt, - ); - e.enc64_isap( - popcnt.bind(I32), - rec_urm.opcodes(vec![0xf3, 0x0f, 0xb8]), + rec_urm.opcodes(&POPCNT).rex().w(), use_popcnt, ); + e.enc64_isap(popcnt.bind(I32), rec_urm.opcodes(&POPCNT).rex(), use_popcnt); + e.enc64_isap(popcnt.bind(I32), rec_urm.opcodes(&POPCNT), use_popcnt); // Count leading zero bits. - e.enc32_isap( - clz.bind(I32), - rec_urm.opcodes(vec![0xf3, 0x0f, 0xbd]), - use_lzcnt, - ); - e.enc64_isap( - clz.bind(I64), - rec_urm.opcodes(vec![0xf3, 0x0f, 0xbd]).rex().w(), - use_lzcnt, - ); - e.enc64_isap( - clz.bind(I32), - rec_urm.opcodes(vec![0xf3, 0x0f, 0xbd]).rex(), - use_lzcnt, - ); - e.enc64_isap( - clz.bind(I32), - rec_urm.opcodes(vec![0xf3, 0x0f, 0xbd]), - use_lzcnt, - ); + e.enc32_isap(clz.bind(I32), rec_urm.opcodes(&LZCNT), use_lzcnt); + e.enc64_isap(clz.bind(I64), rec_urm.opcodes(&LZCNT).rex().w(), use_lzcnt); + e.enc64_isap(clz.bind(I32), rec_urm.opcodes(&LZCNT).rex(), use_lzcnt); + e.enc64_isap(clz.bind(I32), rec_urm.opcodes(&LZCNT), use_lzcnt); // Count trailing zero bits. - e.enc32_isap( - ctz.bind(I32), - rec_urm.opcodes(vec![0xf3, 0x0f, 0xbc]), - use_bmi1, - ); - e.enc64_isap( - ctz.bind(I64), - rec_urm.opcodes(vec![0xf3, 0x0f, 0xbc]).rex().w(), - use_bmi1, - ); - e.enc64_isap( - ctz.bind(I32), - rec_urm.opcodes(vec![0xf3, 0x0f, 0xbc]).rex(), - use_bmi1, - ); - e.enc64_isap( - ctz.bind(I32), - rec_urm.opcodes(vec![0xf3, 0x0f, 0xbc]), - use_bmi1, - ); + e.enc32_isap(ctz.bind(I32), rec_urm.opcodes(&TZCNT), use_bmi1); + e.enc64_isap(ctz.bind(I64), rec_urm.opcodes(&TZCNT).rex().w(), use_bmi1); + e.enc64_isap(ctz.bind(I32), rec_urm.opcodes(&TZCNT).rex(), use_bmi1); + e.enc64_isap(ctz.bind(I32), rec_urm.opcodes(&TZCNT), use_bmi1); // Loads and stores. let f_load_complex = formats.get(formats.by_name("LoadComplex")); @@ -934,41 +882,41 @@ pub(crate) fn define( for recipe in &[rec_ldWithIndex, rec_ldWithIndexDisp8, rec_ldWithIndexDisp32] { e.enc_i32_i64_instp( load_complex, - recipe.opcodes(vec![0x8b]), + recipe.opcodes(&MOV_LOAD), is_load_complex_length_two.clone(), ); e.enc_x86_64_instp( uload32_complex, - recipe.opcodes(vec![0x8b]), + recipe.opcodes(&MOV_LOAD), is_load_complex_length_two.clone(), ); e.enc64_instp( sload32_complex, - recipe.opcodes(vec![0x63]).rex().w(), + recipe.opcodes(&MOVSXD).rex().w(), is_load_complex_length_two.clone(), ); e.enc_i32_i64_instp( uload16_complex, - recipe.opcodes(vec![0x0f, 0xb7]), + recipe.opcodes(&MOVZX_WORD), is_load_complex_length_two.clone(), ); e.enc_i32_i64_instp( sload16_complex, - recipe.opcodes(vec![0x0f, 0xbf]), + recipe.opcodes(&MOVSX_WORD), is_load_complex_length_two.clone(), ); e.enc_i32_i64_instp( uload8_complex, - recipe.opcodes(vec![0x0f, 0xb6]), + recipe.opcodes(&MOVZX_BYTE), is_load_complex_length_two.clone(), ); e.enc_i32_i64_instp( sload8_complex, - recipe.opcodes(vec![0x0f, 0xbe]), + recipe.opcodes(&MOVSX_BYTE), is_load_complex_length_two.clone(), ); } @@ -979,22 +927,22 @@ pub(crate) fn define( for recipe in &[rec_stWithIndex, rec_stWithIndexDisp8, rec_stWithIndexDisp32] { e.enc_i32_i64_instp( store_complex, - recipe.opcodes(vec![0x89]), + recipe.opcodes(&MOV_STORE), is_store_complex_length_three.clone(), ); e.enc_x86_64_instp( istore32_complex, - recipe.opcodes(vec![0x89]), + recipe.opcodes(&MOV_STORE), is_store_complex_length_three.clone(), ); e.enc_both_instp( istore16_complex.bind(I32), - recipe.opcodes(vec![0x66, 0x89]), + recipe.opcodes(&MOV_STORE_16), is_store_complex_length_three.clone(), ); e.enc_x86_64_instp( istore16_complex.bind(I64), - recipe.opcodes(vec![0x66, 0x89]), + recipe.opcodes(&MOV_STORE_16), is_store_complex_length_three.clone(), ); } @@ -1006,20 +954,20 @@ pub(crate) fn define( ] { e.enc_both_instp( istore8_complex.bind(I32), - recipe.opcodes(vec![0x88]), + recipe.opcodes(&MOV_BYTE_STORE), is_store_complex_length_three.clone(), ); e.enc_x86_64_instp( istore8_complex.bind(I64), - recipe.opcodes(vec![0x88]), + recipe.opcodes(&MOV_BYTE_STORE), is_store_complex_length_three.clone(), ); } for recipe in &[rec_st, rec_stDisp8, rec_stDisp32] { - e.enc_i32_i64_ld_st(store, true, recipe.opcodes(vec![0x89])); - e.enc_x86_64(istore32.bind(I64).bind_any(), recipe.opcodes(vec![0x89])); - e.enc_i32_i64_ld_st(istore16, false, recipe.opcodes(vec![0x66, 0x89])); + e.enc_i32_i64_ld_st(store, true, recipe.opcodes(&MOV_STORE)); + e.enc_x86_64(istore32.bind(I64).bind_any(), recipe.opcodes(&MOV_STORE)); + e.enc_i32_i64_ld_st(istore16, false, recipe.opcodes(&MOV_STORE_16)); } // Byte stores are more complicated because the registers they can address @@ -1027,40 +975,46 @@ pub(crate) fn define( // the corresponding st* recipes when a REX prefix is applied. for recipe in &[rec_st_abcd, rec_stDisp8_abcd, rec_stDisp32_abcd] { - e.enc_both(istore8.bind(I32).bind_any(), recipe.opcodes(vec![0x88])); - e.enc_x86_64(istore8.bind(I64).bind_any(), recipe.opcodes(vec![0x88])); + e.enc_both( + istore8.bind(I32).bind_any(), + recipe.opcodes(&MOV_BYTE_STORE), + ); + e.enc_x86_64( + istore8.bind(I64).bind_any(), + recipe.opcodes(&MOV_BYTE_STORE), + ); } - e.enc_i32_i64(spill, rec_spillSib32.opcodes(vec![0x89])); - e.enc_i32_i64(regspill, rec_regspill32.opcodes(vec![0x89])); - e.enc_r32_r64_rex_only(spill, rec_spillSib32.opcodes(vec![0x89])); - e.enc_r32_r64_rex_only(regspill, rec_regspill32.opcodes(vec![0x89])); + e.enc_i32_i64(spill, rec_spillSib32.opcodes(&MOV_STORE)); + e.enc_i32_i64(regspill, rec_regspill32.opcodes(&MOV_STORE)); + e.enc_r32_r64_rex_only(spill, rec_spillSib32.opcodes(&MOV_STORE)); + e.enc_r32_r64_rex_only(regspill, rec_regspill32.opcodes(&MOV_STORE)); // Use a 32-bit write for spilling `b1`, `i8` and `i16` to avoid // constraining the permitted registers. // See MIN_SPILL_SLOT_SIZE which makes this safe. - e.enc_both(spill.bind(B1), rec_spillSib32.opcodes(vec![0x89])); - e.enc_both(regspill.bind(B1), rec_regspill32.opcodes(vec![0x89])); + e.enc_both(spill.bind(B1), rec_spillSib32.opcodes(&MOV_STORE)); + e.enc_both(regspill.bind(B1), rec_regspill32.opcodes(&MOV_STORE)); for &ty in &[I8, I16] { - e.enc_both(spill.bind(ty), rec_spillSib32.opcodes(vec![0x89])); - e.enc_both(regspill.bind(ty), rec_regspill32.opcodes(vec![0x89])); + e.enc_both(spill.bind(ty), rec_spillSib32.opcodes(&MOV_STORE)); + e.enc_both(regspill.bind(ty), rec_regspill32.opcodes(&MOV_STORE)); } for recipe in &[rec_ld, rec_ldDisp8, rec_ldDisp32] { - e.enc_i32_i64_ld_st(load, true, recipe.opcodes(vec![0x8b])); - e.enc_x86_64(uload32.bind(I64), recipe.opcodes(vec![0x8b])); - e.enc64(sload32.bind(I64), recipe.opcodes(vec![0x63]).rex().w()); - e.enc_i32_i64_ld_st(uload16, true, recipe.opcodes(vec![0x0f, 0xb7])); - e.enc_i32_i64_ld_st(sload16, true, recipe.opcodes(vec![0x0f, 0xbf])); - e.enc_i32_i64_ld_st(uload8, true, recipe.opcodes(vec![0x0f, 0xb6])); - e.enc_i32_i64_ld_st(sload8, true, recipe.opcodes(vec![0x0f, 0xbe])); + e.enc_i32_i64_ld_st(load, true, recipe.opcodes(&MOV_LOAD)); + e.enc_x86_64(uload32.bind(I64), recipe.opcodes(&MOV_LOAD)); + e.enc64(sload32.bind(I64), recipe.opcodes(&MOVSXD).rex().w()); + e.enc_i32_i64_ld_st(uload16, true, recipe.opcodes(&MOVZX_WORD)); + e.enc_i32_i64_ld_st(sload16, true, recipe.opcodes(&MOVSX_WORD)); + e.enc_i32_i64_ld_st(uload8, true, recipe.opcodes(&MOVZX_BYTE)); + e.enc_i32_i64_ld_st(sload8, true, recipe.opcodes(&MOVSX_BYTE)); } - e.enc_i32_i64(fill, rec_fillSib32.opcodes(vec![0x8b])); - e.enc_i32_i64(regfill, rec_regfill32.opcodes(vec![0x8b])); - e.enc_r32_r64_rex_only(fill, rec_fillSib32.opcodes(vec![0x8b])); - e.enc_r32_r64_rex_only(regfill, rec_regfill32.opcodes(vec![0x8b])); + e.enc_i32_i64(fill, rec_fillSib32.opcodes(&MOV_LOAD)); + e.enc_i32_i64(regfill, rec_regfill32.opcodes(&MOV_LOAD)); + e.enc_r32_r64_rex_only(fill, rec_fillSib32.opcodes(&MOV_LOAD)); + e.enc_r32_r64_rex_only(regfill, rec_regfill32.opcodes(&MOV_LOAD)); // No-op fills, created by late-stage redundant-fill removal. for &ty in &[I64, I32, I16, I8] { @@ -1076,44 +1030,44 @@ pub(crate) fn define( // Load 32 bits from `b1`, `i8` and `i16` spill slots. See `spill.b1` above. - e.enc_both(fill.bind(B1), rec_fillSib32.opcodes(vec![0x8b])); - e.enc_both(regfill.bind(B1), rec_regfill32.opcodes(vec![0x8b])); + e.enc_both(fill.bind(B1), rec_fillSib32.opcodes(&MOV_LOAD)); + e.enc_both(regfill.bind(B1), rec_regfill32.opcodes(&MOV_LOAD)); for &ty in &[I8, I16] { - e.enc_both(fill.bind(ty), rec_fillSib32.opcodes(vec![0x8b])); - e.enc_both(regfill.bind(ty), rec_regfill32.opcodes(vec![0x8b])); + e.enc_both(fill.bind(ty), rec_fillSib32.opcodes(&MOV_LOAD)); + e.enc_both(regfill.bind(ty), rec_regfill32.opcodes(&MOV_LOAD)); } // Push and Pop. - e.enc32(x86_push.bind(I32), rec_pushq.opcodes(vec![0x50])); - e.enc_x86_64(x86_push.bind(I64), rec_pushq.opcodes(vec![0x50])); + e.enc32(x86_push.bind(I32), rec_pushq.opcodes(&PUSH_REG)); + e.enc_x86_64(x86_push.bind(I64), rec_pushq.opcodes(&PUSH_REG)); - e.enc32(x86_pop.bind(I32), rec_popq.opcodes(vec![0x58])); - e.enc_x86_64(x86_pop.bind(I64), rec_popq.opcodes(vec![0x58])); + e.enc32(x86_pop.bind(I32), rec_popq.opcodes(&POP_REG)); + e.enc_x86_64(x86_pop.bind(I64), rec_popq.opcodes(&POP_REG)); // Copy Special // For x86-64, only define REX forms for now, since we can't describe the // special regunit immediate operands with the current constraint language. - e.enc64(copy_special, rec_copysp.opcodes(vec![0x89]).rex().w()); - e.enc32(copy_special, rec_copysp.opcodes(vec![0x89])); + e.enc64(copy_special, rec_copysp.opcodes(&MOV_STORE).rex().w()); + e.enc32(copy_special, rec_copysp.opcodes(&MOV_STORE)); // Copy to SSA. These have to be done with special _rex_only encoders, because the standard // machinery for deciding whether a REX.{RXB} prefix is needed doesn't take into account // the source register, which is specified directly in the instruction. - e.enc_i32_i64_rex_only(copy_to_ssa, rec_umr_reg_to_ssa.opcodes(vec![0x89])); - e.enc_r32_r64_rex_only(copy_to_ssa, rec_umr_reg_to_ssa.opcodes(vec![0x89])); - e.enc_both_rex_only(copy_to_ssa.bind(B1), rec_umr_reg_to_ssa.opcodes(vec![0x89])); - e.enc_both_rex_only(copy_to_ssa.bind(I8), rec_umr_reg_to_ssa.opcodes(vec![0x89])); + e.enc_i32_i64_rex_only(copy_to_ssa, rec_umr_reg_to_ssa.opcodes(&MOV_STORE)); + e.enc_r32_r64_rex_only(copy_to_ssa, rec_umr_reg_to_ssa.opcodes(&MOV_STORE)); + e.enc_both_rex_only(copy_to_ssa.bind(B1), rec_umr_reg_to_ssa.opcodes(&MOV_STORE)); + e.enc_both_rex_only(copy_to_ssa.bind(I8), rec_umr_reg_to_ssa.opcodes(&MOV_STORE)); e.enc_both_rex_only( copy_to_ssa.bind(I16), - rec_umr_reg_to_ssa.opcodes(vec![0x89]), + rec_umr_reg_to_ssa.opcodes(&MOV_STORE), ); e.enc_both_rex_only( copy_to_ssa.bind(F64), - rec_furm_reg_to_ssa.opcodes(vec![0xf2, 0x0f, 0x10]), + rec_furm_reg_to_ssa.opcodes(&MOVSD_LOAD), ); e.enc_both_rex_only( copy_to_ssa.bind(F32), - rec_furm_reg_to_ssa.opcodes(vec![0xf3, 0x0f, 0x10]), + rec_furm_reg_to_ssa.opcodes(&MOVSS_LOAD), ); // Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn @@ -1129,204 +1083,159 @@ pub(crate) fn define( } // Adjust SP down by a dynamic value (or up, with a negative operand). - e.enc32(adjust_sp_down.bind(I32), rec_adjustsp.opcodes(vec![0x29])); + e.enc32(adjust_sp_down.bind(I32), rec_adjustsp.opcodes(&SUB)); e.enc64( adjust_sp_down.bind(I64), - rec_adjustsp.opcodes(vec![0x29]).rex().w(), + rec_adjustsp.opcodes(&SUB).rex().w(), ); // Adjust SP up by an immediate (or down, with a negative immediate). - e.enc32(adjust_sp_up_imm, rec_adjustsp_ib.opcodes(vec![0x83])); - e.enc32(adjust_sp_up_imm, rec_adjustsp_id.opcodes(vec![0x81])); + e.enc32(adjust_sp_up_imm, rec_adjustsp_ib.opcodes(&CMP_IMM8)); + e.enc32(adjust_sp_up_imm, rec_adjustsp_id.opcodes(&CMP_IMM)); e.enc64( adjust_sp_up_imm, - rec_adjustsp_ib.opcodes(vec![0x83]).rex().w(), + rec_adjustsp_ib.opcodes(&CMP_IMM8).rex().w(), ); e.enc64( adjust_sp_up_imm, - rec_adjustsp_id.opcodes(vec![0x81]).rex().w(), + rec_adjustsp_id.opcodes(&CMP_IMM).rex().w(), ); // Adjust SP down by an immediate (or up, with a negative immediate). e.enc32( adjust_sp_down_imm, - rec_adjustsp_ib.opcodes(vec![0x83]).rrr(5), + rec_adjustsp_ib.opcodes(&CMP_IMM8).rrr(5), ); - e.enc32( + e.enc32(adjust_sp_down_imm, rec_adjustsp_id.opcodes(&CMP_IMM).rrr(5)); + e.enc64( adjust_sp_down_imm, - rec_adjustsp_id.opcodes(vec![0x81]).rrr(5), + rec_adjustsp_ib.opcodes(&CMP_IMM8).rrr(5).rex().w(), ); e.enc64( adjust_sp_down_imm, - rec_adjustsp_ib.opcodes(vec![0x83]).rrr(5).rex().w(), - ); - e.enc64( - adjust_sp_down_imm, - rec_adjustsp_id.opcodes(vec![0x81]).rrr(5).rex().w(), + rec_adjustsp_id.opcodes(&CMP_IMM).rrr(5).rex().w(), ); // Float loads and stores. + e.enc_both(load.bind(F32).bind_any(), rec_fld.opcodes(&MOVSS_LOAD)); + e.enc_both(load.bind(F32).bind_any(), rec_fldDisp8.opcodes(&MOVSS_LOAD)); e.enc_both( load.bind(F32).bind_any(), - rec_fld.opcodes(vec![0xf3, 0x0f, 0x10]), - ); - e.enc_both( - load.bind(F32).bind_any(), - rec_fldDisp8.opcodes(vec![0xf3, 0x0f, 0x10]), - ); - e.enc_both( - load.bind(F32).bind_any(), - rec_fldDisp32.opcodes(vec![0xf3, 0x0f, 0x10]), + rec_fldDisp32.opcodes(&MOVSS_LOAD), ); e.enc_both( load_complex.bind(F32), - rec_fldWithIndex.opcodes(vec![0xf3, 0x0f, 0x10]), + rec_fldWithIndex.opcodes(&MOVSS_LOAD), ); e.enc_both( load_complex.bind(F32), - rec_fldWithIndexDisp8.opcodes(vec![0xf3, 0x0f, 0x10]), + rec_fldWithIndexDisp8.opcodes(&MOVSS_LOAD), ); e.enc_both( load_complex.bind(F32), - rec_fldWithIndexDisp32.opcodes(vec![0xf3, 0x0f, 0x10]), + rec_fldWithIndexDisp32.opcodes(&MOVSS_LOAD), ); + e.enc_both(load.bind(F64).bind_any(), rec_fld.opcodes(&MOVSD_LOAD)); + e.enc_both(load.bind(F64).bind_any(), rec_fldDisp8.opcodes(&MOVSD_LOAD)); e.enc_both( load.bind(F64).bind_any(), - rec_fld.opcodes(vec![0xf2, 0x0f, 0x10]), - ); - e.enc_both( - load.bind(F64).bind_any(), - rec_fldDisp8.opcodes(vec![0xf2, 0x0f, 0x10]), - ); - e.enc_both( - load.bind(F64).bind_any(), - rec_fldDisp32.opcodes(vec![0xf2, 0x0f, 0x10]), + rec_fldDisp32.opcodes(&MOVSD_LOAD), ); e.enc_both( load_complex.bind(F64), - rec_fldWithIndex.opcodes(vec![0xf2, 0x0f, 0x10]), + rec_fldWithIndex.opcodes(&MOVSD_LOAD), ); e.enc_both( load_complex.bind(F64), - rec_fldWithIndexDisp8.opcodes(vec![0xf2, 0x0f, 0x10]), + rec_fldWithIndexDisp8.opcodes(&MOVSD_LOAD), ); e.enc_both( load_complex.bind(F64), - rec_fldWithIndexDisp32.opcodes(vec![0xf2, 0x0f, 0x10]), + rec_fldWithIndexDisp32.opcodes(&MOVSD_LOAD), ); + e.enc_both(store.bind(F32).bind_any(), rec_fst.opcodes(&MOVSS_STORE)); e.enc_both( store.bind(F32).bind_any(), - rec_fst.opcodes(vec![0xf3, 0x0f, 0x11]), + rec_fstDisp8.opcodes(&MOVSS_STORE), ); e.enc_both( store.bind(F32).bind_any(), - rec_fstDisp8.opcodes(vec![0xf3, 0x0f, 0x11]), - ); - e.enc_both( - store.bind(F32).bind_any(), - rec_fstDisp32.opcodes(vec![0xf3, 0x0f, 0x11]), + rec_fstDisp32.opcodes(&MOVSS_STORE), ); e.enc_both( store_complex.bind(F32), - rec_fstWithIndex.opcodes(vec![0xf3, 0x0f, 0x11]), + rec_fstWithIndex.opcodes(&MOVSS_STORE), ); e.enc_both( store_complex.bind(F32), - rec_fstWithIndexDisp8.opcodes(vec![0xf3, 0x0f, 0x11]), + rec_fstWithIndexDisp8.opcodes(&MOVSS_STORE), ); e.enc_both( store_complex.bind(F32), - rec_fstWithIndexDisp32.opcodes(vec![0xf3, 0x0f, 0x11]), + rec_fstWithIndexDisp32.opcodes(&MOVSS_STORE), ); + e.enc_both(store.bind(F64).bind_any(), rec_fst.opcodes(&MOVSD_STORE)); e.enc_both( store.bind(F64).bind_any(), - rec_fst.opcodes(vec![0xf2, 0x0f, 0x11]), + rec_fstDisp8.opcodes(&MOVSD_STORE), ); e.enc_both( store.bind(F64).bind_any(), - rec_fstDisp8.opcodes(vec![0xf2, 0x0f, 0x11]), - ); - e.enc_both( - store.bind(F64).bind_any(), - rec_fstDisp32.opcodes(vec![0xf2, 0x0f, 0x11]), + rec_fstDisp32.opcodes(&MOVSD_STORE), ); e.enc_both( store_complex.bind(F64), - rec_fstWithIndex.opcodes(vec![0xf2, 0x0f, 0x11]), + rec_fstWithIndex.opcodes(&MOVSD_STORE), ); e.enc_both( store_complex.bind(F64), - rec_fstWithIndexDisp8.opcodes(vec![0xf2, 0x0f, 0x11]), + rec_fstWithIndexDisp8.opcodes(&MOVSD_STORE), ); e.enc_both( store_complex.bind(F64), - rec_fstWithIndexDisp32.opcodes(vec![0xf2, 0x0f, 0x11]), + rec_fstWithIndexDisp32.opcodes(&MOVSD_STORE), ); - e.enc_both( - fill.bind(F32), - rec_ffillSib32.opcodes(vec![0xf3, 0x0f, 0x10]), - ); - e.enc_both( - regfill.bind(F32), - rec_fregfill32.opcodes(vec![0xf3, 0x0f, 0x10]), - ); - e.enc_both( - fill.bind(F64), - rec_ffillSib32.opcodes(vec![0xf2, 0x0f, 0x10]), - ); - e.enc_both( - regfill.bind(F64), - rec_fregfill32.opcodes(vec![0xf2, 0x0f, 0x10]), - ); + e.enc_both(fill.bind(F32), rec_ffillSib32.opcodes(&MOVSS_LOAD)); + e.enc_both(regfill.bind(F32), rec_fregfill32.opcodes(&MOVSS_LOAD)); + e.enc_both(fill.bind(F64), rec_ffillSib32.opcodes(&MOVSD_LOAD)); + e.enc_both(regfill.bind(F64), rec_fregfill32.opcodes(&MOVSD_LOAD)); - e.enc_both( - spill.bind(F32), - rec_fspillSib32.opcodes(vec![0xf3, 0x0f, 0x11]), - ); - e.enc_both( - regspill.bind(F32), - rec_fregspill32.opcodes(vec![0xf3, 0x0f, 0x11]), - ); - e.enc_both( - spill.bind(F64), - rec_fspillSib32.opcodes(vec![0xf2, 0x0f, 0x11]), - ); - e.enc_both( - regspill.bind(F64), - rec_fregspill32.opcodes(vec![0xf2, 0x0f, 0x11]), - ); + e.enc_both(spill.bind(F32), rec_fspillSib32.opcodes(&MOVSS_STORE)); + e.enc_both(regspill.bind(F32), rec_fregspill32.opcodes(&MOVSS_STORE)); + e.enc_both(spill.bind(F64), rec_fspillSib32.opcodes(&MOVSD_STORE)); + e.enc_both(regspill.bind(F64), rec_fregspill32.opcodes(&MOVSD_STORE)); // Function addresses. // Non-PIC, all-ones funcaddresses. e.enc32_isap( func_addr.bind(I32), - rec_fnaddr4.opcodes(vec![0xb8]), + rec_fnaddr4.opcodes(&MOV_IMM), not_all_ones_funcaddrs_and_not_is_pic, ); e.enc64_isap( func_addr.bind(I64), - rec_fnaddr8.opcodes(vec![0xb8]).rex().w(), + rec_fnaddr8.opcodes(&MOV_IMM).rex().w(), not_all_ones_funcaddrs_and_not_is_pic, ); // Non-PIC, all-zeros funcaddresses. e.enc32_isap( func_addr.bind(I32), - rec_allones_fnaddr4.opcodes(vec![0xb8]), + rec_allones_fnaddr4.opcodes(&MOV_IMM), all_ones_funcaddrs_and_not_is_pic, ); e.enc64_isap( func_addr.bind(I64), - rec_allones_fnaddr8.opcodes(vec![0xb8]).rex().w(), + rec_allones_fnaddr8.opcodes(&MOV_IMM).rex().w(), all_ones_funcaddrs_and_not_is_pic, ); @@ -1335,14 +1244,14 @@ pub(crate) fn define( let is_colocated_func = InstructionPredicate::new_is_colocated_func(f_func_addr, "func_ref"); e.enc64_instp( func_addr.bind(I64), - rec_pcrel_fnaddr8.opcodes(vec![0x8d]).rex().w(), + rec_pcrel_fnaddr8.opcodes(&LEA).rex().w(), is_colocated_func, ); // 64-bit, non-colocated, PIC. e.enc64_isap( func_addr.bind(I64), - rec_got_fnaddr8.opcodes(vec![0x8b]).rex().w(), + rec_got_fnaddr8.opcodes(&MOV_LOAD).rex().w(), is_pic, ); @@ -1351,19 +1260,19 @@ pub(crate) fn define( // Non-PIC. e.enc32_isap( symbol_value.bind(I32), - rec_gvaddr4.opcodes(vec![0xb8]), + rec_gvaddr4.opcodes(&MOV_IMM), not_is_pic, ); e.enc64_isap( symbol_value.bind(I64), - rec_gvaddr8.opcodes(vec![0xb8]).rex().w(), + rec_gvaddr8.opcodes(&MOV_IMM).rex().w(), not_is_pic, ); // PIC, colocated. e.enc64_func( symbol_value.bind(I64), - rec_pcrel_gvaddr8.opcodes(vec![0x8d]).rex().w(), + rec_pcrel_gvaddr8.opcodes(&LEA).rex().w(), |encoding| { encoding .isa_predicate(is_pic) @@ -1374,7 +1283,7 @@ pub(crate) fn define( // PIC, non-colocated. e.enc64_isap( symbol_value.bind(I64), - rec_got_gvaddr8.opcodes(vec![0x8b]).rex().w(), + rec_got_gvaddr8.opcodes(&MOV_LOAD).rex().w(), is_pic, ); @@ -1382,102 +1291,102 @@ pub(crate) fn define( // // TODO: Add encoding rules for stack_load and stack_store, so that they // don't get legalized to stack_addr + load/store. - e.enc32(stack_addr.bind(I32), rec_spaddr4_id.opcodes(vec![0x8d])); - e.enc64( - stack_addr.bind(I64), - rec_spaddr8_id.opcodes(vec![0x8d]).rex().w(), - ); + e.enc32(stack_addr.bind(I32), rec_spaddr4_id.opcodes(&LEA)); + e.enc64(stack_addr.bind(I64), rec_spaddr8_id.opcodes(&LEA).rex().w()); // Call/return // 32-bit, both PIC and non-PIC. - e.enc32(call, rec_call_id.opcodes(vec![0xe8])); + e.enc32(call, rec_call_id.opcodes(&CALL_RELATIVE)); // 64-bit, colocated, both PIC and non-PIC. Use the call instruction's pc-relative field. let f_call = formats.get(formats.by_name("Call")); let is_colocated_func = InstructionPredicate::new_is_colocated_func(f_call, "func_ref"); - e.enc64_instp(call, rec_call_id.opcodes(vec![0xe8]), is_colocated_func); + e.enc64_instp(call, rec_call_id.opcodes(&CALL_RELATIVE), is_colocated_func); // 64-bit, non-colocated, PIC. There is no 64-bit non-colocated non-PIC version, since non-PIC // is currently using the large model, which requires calls be lowered to // func_addr+call_indirect. - e.enc64_isap(call, rec_call_plt_id.opcodes(vec![0xe8]), is_pic); + e.enc64_isap(call, rec_call_plt_id.opcodes(&CALL_RELATIVE), is_pic); e.enc32( call_indirect.bind(I32), - rec_call_r.opcodes(vec![0xff]).rrr(2), + rec_call_r.opcodes(&JUMP_ABSOLUTE).rrr(2), ); e.enc64( call_indirect.bind(I64), - rec_call_r.opcodes(vec![0xff]).rrr(2).rex(), + rec_call_r.opcodes(&JUMP_ABSOLUTE).rrr(2).rex(), ); e.enc64( call_indirect.bind(I64), - rec_call_r.opcodes(vec![0xff]).rrr(2), + rec_call_r.opcodes(&JUMP_ABSOLUTE).rrr(2), ); - e.enc32(return_, rec_ret.opcodes(vec![0xc3])); - e.enc64(return_, rec_ret.opcodes(vec![0xc3])); + e.enc32(return_, rec_ret.opcodes(&RET_NEAR)); + e.enc64(return_, rec_ret.opcodes(&RET_NEAR)); // Branches. - e.enc32(jump, rec_jmpb.opcodes(vec![0xeb])); - e.enc64(jump, rec_jmpb.opcodes(vec![0xeb])); - e.enc32(jump, rec_jmpd.opcodes(vec![0xe9])); - e.enc64(jump, rec_jmpd.opcodes(vec![0xe9])); + e.enc32(jump, rec_jmpb.opcodes(&JUMP_SHORT)); + e.enc64(jump, rec_jmpb.opcodes(&JUMP_SHORT)); + e.enc32(jump, rec_jmpd.opcodes(&JUMP_NEAR_RELATIVE)); + e.enc64(jump, rec_jmpd.opcodes(&JUMP_NEAR_RELATIVE)); - e.enc_both(brif, rec_brib.opcodes(vec![0x70])); - e.enc_both(brif, rec_brid.opcodes(vec![0x0f, 0x80])); + e.enc_both(brif, rec_brib.opcodes(&JUMP_SHORT_IF_OVERFLOW)); + e.enc_both(brif, rec_brid.opcodes(&JUMP_NEAR_IF_OVERFLOW)); // Not all float condition codes are legal, see `supported_floatccs`. - e.enc_both(brff, rec_brfb.opcodes(vec![0x70])); - e.enc_both(brff, rec_brfd.opcodes(vec![0x0f, 0x80])); + e.enc_both(brff, rec_brfb.opcodes(&JUMP_SHORT_IF_OVERFLOW)); + e.enc_both(brff, rec_brfd.opcodes(&JUMP_NEAR_IF_OVERFLOW)); // Note that the tjccd opcode will be prefixed with 0x0f. - e.enc_i32_i64(brz, rec_tjccb.opcodes(vec![0x74])); - e.enc_i32_i64(brz, rec_tjccd.opcodes(vec![0x84])); - e.enc_i32_i64(brnz, rec_tjccb.opcodes(vec![0x75])); - e.enc_i32_i64(brnz, rec_tjccd.opcodes(vec![0x85])); + e.enc_i32_i64(brz, rec_tjccb.opcodes(&JUMP_SHORT_IF_EQUAL)); + e.enc_i32_i64(brz, rec_tjccd.opcodes(&TEST_BYTE_REG)); + e.enc_i32_i64(brnz, rec_tjccb.opcodes(&JUMP_SHORT_IF_NOT_EQUAL)); + e.enc_i32_i64(brnz, rec_tjccd.opcodes(&TEST_REG)); // Branch on a b1 value in a register only looks at the low 8 bits. See also // bint encodings below. // // Start with the worst-case encoding for X86_32 only. The register allocator // can't handle a branch with an ABCD-constrained operand. - e.enc32(brz.bind(B1), rec_t8jccd_long.opcodes(vec![0x84])); - e.enc32(brnz.bind(B1), rec_t8jccd_long.opcodes(vec![0x85])); + e.enc32(brz.bind(B1), rec_t8jccd_long.opcodes(&TEST_BYTE_REG)); + e.enc32(brnz.bind(B1), rec_t8jccd_long.opcodes(&TEST_REG)); - e.enc_both(brz.bind(B1), rec_t8jccb_abcd.opcodes(vec![0x74])); - e.enc_both(brz.bind(B1), rec_t8jccd_abcd.opcodes(vec![0x84])); - e.enc_both(brnz.bind(B1), rec_t8jccb_abcd.opcodes(vec![0x75])); - e.enc_both(brnz.bind(B1), rec_t8jccd_abcd.opcodes(vec![0x85])); + e.enc_both(brz.bind(B1), rec_t8jccb_abcd.opcodes(&JUMP_SHORT_IF_EQUAL)); + e.enc_both(brz.bind(B1), rec_t8jccd_abcd.opcodes(&TEST_BYTE_REG)); + e.enc_both( + brnz.bind(B1), + rec_t8jccb_abcd.opcodes(&JUMP_SHORT_IF_NOT_EQUAL), + ); + e.enc_both(brnz.bind(B1), rec_t8jccd_abcd.opcodes(&TEST_REG)); // Jump tables. e.enc64( jump_table_entry.bind(I64), - rec_jt_entry.opcodes(vec![0x63]).rex().w(), + rec_jt_entry.opcodes(&MOVSXD).rex().w(), ); - e.enc32(jump_table_entry.bind(I32), rec_jt_entry.opcodes(vec![0x8b])); + e.enc32(jump_table_entry.bind(I32), rec_jt_entry.opcodes(&MOV_LOAD)); e.enc64( jump_table_base.bind(I64), - rec_jt_base.opcodes(vec![0x8d]).rex().w(), + rec_jt_base.opcodes(&LEA).rex().w(), ); - e.enc32(jump_table_base.bind(I32), rec_jt_base.opcodes(vec![0x8d])); + e.enc32(jump_table_base.bind(I32), rec_jt_base.opcodes(&LEA)); e.enc_x86_64( indirect_jump_table_br.bind(I64), - rec_indirect_jmp.opcodes(vec![0xff]).rrr(4), + rec_indirect_jmp.opcodes(&JUMP_ABSOLUTE).rrr(4), ); e.enc32( indirect_jump_table_br.bind(I32), - rec_indirect_jmp.opcodes(vec![0xff]).rrr(4), + rec_indirect_jmp.opcodes(&JUMP_ABSOLUTE).rrr(4), ); // Trap as ud2 - e.enc32(trap, rec_trap.opcodes(vec![0x0f, 0x0b])); - e.enc64(trap, rec_trap.opcodes(vec![0x0f, 0x0b])); - e.enc32(resumable_trap, rec_trap.opcodes(vec![0x0f, 0x0b])); - e.enc64(resumable_trap, rec_trap.opcodes(vec![0x0f, 0x0b])); + e.enc32(trap, rec_trap.opcodes(&UNDEFINED2)); + e.enc64(trap, rec_trap.opcodes(&UNDEFINED2)); + e.enc32(resumable_trap, rec_trap.opcodes(&UNDEFINED2)); + e.enc64(resumable_trap, rec_trap.opcodes(&UNDEFINED2)); // Debug trap as int3 e.enc32_rec(debugtrap, rec_debugtrap, 0); @@ -1489,31 +1398,28 @@ pub(crate) fn define( e.enc64_rec(trapff, rec_trapff, 0); // Comparisons - e.enc_i32_i64(icmp, rec_icscc.opcodes(vec![0x39])); - e.enc_i32_i64(icmp_imm, rec_icscc_ib.opcodes(vec![0x83]).rrr(7)); - e.enc_i32_i64(icmp_imm, rec_icscc_id.opcodes(vec![0x81]).rrr(7)); - e.enc_i32_i64(ifcmp, rec_rcmp.opcodes(vec![0x39])); - e.enc_i32_i64(ifcmp_imm, rec_rcmp_ib.opcodes(vec![0x83]).rrr(7)); - e.enc_i32_i64(ifcmp_imm, rec_rcmp_id.opcodes(vec![0x81]).rrr(7)); + e.enc_i32_i64(icmp, rec_icscc.opcodes(&CMP_REG)); + e.enc_i32_i64(icmp_imm, rec_icscc_ib.opcodes(&CMP_IMM8).rrr(7)); + e.enc_i32_i64(icmp_imm, rec_icscc_id.opcodes(&CMP_IMM).rrr(7)); + e.enc_i32_i64(ifcmp, rec_rcmp.opcodes(&CMP_REG)); + e.enc_i32_i64(ifcmp_imm, rec_rcmp_ib.opcodes(&CMP_IMM8).rrr(7)); + e.enc_i32_i64(ifcmp_imm, rec_rcmp_id.opcodes(&CMP_IMM).rrr(7)); // TODO: We could special-case ifcmp_imm(x, 0) to TEST(x, x). - e.enc32(ifcmp_sp.bind(I32), rec_rcmp_sp.opcodes(vec![0x39])); - e.enc64( - ifcmp_sp.bind(I64), - rec_rcmp_sp.opcodes(vec![0x39]).rex().w(), - ); + e.enc32(ifcmp_sp.bind(I32), rec_rcmp_sp.opcodes(&CMP_REG)); + e.enc64(ifcmp_sp.bind(I64), rec_rcmp_sp.opcodes(&CMP_REG).rex().w()); // Convert flags to bool. // This encodes `b1` as an 8-bit low register with the value 0 or 1. - e.enc_both(trueif, rec_seti_abcd.opcodes(vec![0x0f, 0x90])); - e.enc_both(trueff, rec_setf_abcd.opcodes(vec![0x0f, 0x90])); + e.enc_both(trueif, rec_seti_abcd.opcodes(&SET_BYTE_IF_OVERFLOW)); + e.enc_both(trueff, rec_setf_abcd.opcodes(&SET_BYTE_IF_OVERFLOW)); // Conditional move (a.k.a integer select). - e.enc_i32_i64(selectif, rec_cmov.opcodes(vec![0x0f, 0x40])); + e.enc_i32_i64(selectif, rec_cmov.opcodes(&CMOV_OVERFLOW)); // Bit scan forwards and reverse - e.enc_i32_i64(x86_bsf, rec_bsf_and_bsr.opcodes(vec![0x0f, 0xbc])); - e.enc_i32_i64(x86_bsr, rec_bsf_and_bsr.opcodes(vec![0x0f, 0xbd])); + e.enc_i32_i64(x86_bsf, rec_bsf_and_bsr.opcodes(&BIT_SCAN_FORWARD)); + e.enc_i32_i64(x86_bsr, rec_bsf_and_bsr.opcodes(&BIT_SCAN_REVERSE)); // Convert bool to int. // @@ -1523,24 +1429,24 @@ pub(crate) fn define( // Encode movzbq as movzbl, because it's equivalent and shorter. e.enc32( bint.bind(I32).bind(B1), - rec_urm_noflags_abcd.opcodes(vec![0x0f, 0xb6]), + rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), ); e.enc64( bint.bind(I64).bind(B1), - rec_urm_noflags.opcodes(vec![0x0f, 0xb6]).rex(), + rec_urm_noflags.opcodes(&MOVZX_BYTE).rex(), ); e.enc64( bint.bind(I64).bind(B1), - rec_urm_noflags_abcd.opcodes(vec![0x0f, 0xb6]), + rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), ); e.enc64( bint.bind(I32).bind(B1), - rec_urm_noflags.opcodes(vec![0x0f, 0xb6]).rex(), + rec_urm_noflags.opcodes(&MOVZX_BYTE).rex(), ); e.enc64( bint.bind(I32).bind(B1), - rec_urm_noflags_abcd.opcodes(vec![0x0f, 0xb6]), + rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), ); // Numerical conversions. @@ -1563,103 +1469,103 @@ pub(crate) fn define( // movsbl e.enc32( sextend.bind(I32).bind(I8), - rec_urm_noflags_abcd.opcodes(vec![0x0f, 0xbe]), + rec_urm_noflags_abcd.opcodes(&MOVSX_BYTE), ); e.enc64( sextend.bind(I32).bind(I8), - rec_urm_noflags.opcodes(vec![0x0f, 0xbe]).rex(), + rec_urm_noflags.opcodes(&MOVSX_BYTE).rex(), ); e.enc64( sextend.bind(I32).bind(I8), - rec_urm_noflags_abcd.opcodes(vec![0x0f, 0xbe]), + rec_urm_noflags_abcd.opcodes(&MOVSX_BYTE), ); // movswl e.enc32( sextend.bind(I32).bind(I16), - rec_urm_noflags.opcodes(vec![0x0f, 0xbf]), + rec_urm_noflags.opcodes(&MOVSX_WORD), ); e.enc64( sextend.bind(I32).bind(I16), - rec_urm_noflags.opcodes(vec![0x0f, 0xbf]).rex(), + rec_urm_noflags.opcodes(&MOVSX_WORD).rex(), ); e.enc64( sextend.bind(I32).bind(I16), - rec_urm_noflags.opcodes(vec![0x0f, 0xbf]), + rec_urm_noflags.opcodes(&MOVSX_WORD), ); // movsbq e.enc64( sextend.bind(I64).bind(I8), - rec_urm_noflags.opcodes(vec![0x0f, 0xbe]).rex().w(), + rec_urm_noflags.opcodes(&MOVSX_BYTE).rex().w(), ); // movswq e.enc64( sextend.bind(I64).bind(I16), - rec_urm_noflags.opcodes(vec![0x0f, 0xbf]).rex().w(), + rec_urm_noflags.opcodes(&MOVSX_WORD).rex().w(), ); // movslq e.enc64( sextend.bind(I64).bind(I32), - rec_urm_noflags.opcodes(vec![0x63]).rex().w(), + rec_urm_noflags.opcodes(&MOVSXD).rex().w(), ); // movzbl e.enc32( uextend.bind(I32).bind(I8), - rec_urm_noflags_abcd.opcodes(vec![0x0f, 0xb6]), + rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), ); e.enc64( uextend.bind(I32).bind(I8), - rec_urm_noflags.opcodes(vec![0x0f, 0xb6]).rex(), + rec_urm_noflags.opcodes(&MOVZX_BYTE).rex(), ); e.enc64( uextend.bind(I32).bind(I8), - rec_urm_noflags_abcd.opcodes(vec![0x0f, 0xb6]), + rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), ); // movzwl e.enc32( uextend.bind(I32).bind(I16), - rec_urm_noflags.opcodes(vec![0x0f, 0xb7]), + rec_urm_noflags.opcodes(&MOVZX_WORD), ); e.enc64( uextend.bind(I32).bind(I16), - rec_urm_noflags.opcodes(vec![0x0f, 0xb7]).rex(), + rec_urm_noflags.opcodes(&MOVZX_WORD).rex(), ); e.enc64( uextend.bind(I32).bind(I16), - rec_urm_noflags.opcodes(vec![0x0f, 0xb7]), + rec_urm_noflags.opcodes(&MOVZX_WORD), ); // movzbq, encoded as movzbl because it's equivalent and shorter. e.enc64( uextend.bind(I64).bind(I8), - rec_urm_noflags.opcodes(vec![0x0f, 0xb6]).rex(), + rec_urm_noflags.opcodes(&MOVZX_BYTE).rex(), ); e.enc64( uextend.bind(I64).bind(I8), - rec_urm_noflags_abcd.opcodes(vec![0x0f, 0xb6]), + rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), ); // movzwq, encoded as movzwl because it's equivalent and shorter e.enc64( uextend.bind(I64).bind(I16), - rec_urm_noflags.opcodes(vec![0x0f, 0xb7]).rex(), + rec_urm_noflags.opcodes(&MOVZX_WORD).rex(), ); e.enc64( uextend.bind(I64).bind(I16), - rec_urm_noflags.opcodes(vec![0x0f, 0xb7]), + rec_urm_noflags.opcodes(&MOVZX_WORD), ); // A 32-bit register copy clears the high 32 bits. e.enc64( uextend.bind(I64).bind(I32), - rec_umr.opcodes(vec![0x89]).rex(), + rec_umr.opcodes(&MOV_STORE).rex(), ); - e.enc64(uextend.bind(I64).bind(I32), rec_umr.opcodes(vec![0x89])); + e.enc64(uextend.bind(I64).bind(I32), rec_umr.opcodes(&MOV_STORE)); // Floating point @@ -1669,7 +1575,7 @@ pub(crate) fn define( let is_zero_32_bit_float = InstructionPredicate::new_is_zero_32bit_float(f_unary_ieee32, "imm"); e.enc32_instp( f32const, - rec_f32imm_z.opcodes(vec![0x0f, 0x57]), + rec_f32imm_z.opcodes(&XORPS), is_zero_32_bit_float.clone(), ); @@ -1677,148 +1583,133 @@ pub(crate) fn define( let is_zero_64_bit_float = InstructionPredicate::new_is_zero_64bit_float(f_unary_ieee64, "imm"); e.enc32_instp( f64const, - rec_f64imm_z.opcodes(vec![0x66, 0x0f, 0x57]), + rec_f64imm_z.opcodes(&XORPD), is_zero_64_bit_float.clone(), ); - e.enc_x86_64_instp( - f32const, - rec_f32imm_z.opcodes(vec![0x0f, 0x57]), - is_zero_32_bit_float, - ); - e.enc_x86_64_instp( - f64const, - rec_f64imm_z.opcodes(vec![0x66, 0x0f, 0x57]), // XORPD from SSE2 - is_zero_64_bit_float, - ); + e.enc_x86_64_instp(f32const, rec_f32imm_z.opcodes(&XORPS), is_zero_32_bit_float); + e.enc_x86_64_instp(f64const, rec_f64imm_z.opcodes(&XORPD), is_zero_64_bit_float); // movd e.enc_both( bitcast.bind(F32).bind(I32), - rec_frurm.opcodes(vec![0x66, 0x0f, 0x6e]), + rec_frurm.opcodes(&MOVD_LOAD_XMM), ); e.enc_both( bitcast.bind(I32).bind(F32), - rec_rfumr.opcodes(vec![0x66, 0x0f, 0x7e]), + rec_rfumr.opcodes(&MOVD_STORE_XMM), ); // movq e.enc64( bitcast.bind(F64).bind(I64), - rec_frurm.opcodes(vec![0x66, 0x0f, 0x6e]).rex().w(), + rec_frurm.opcodes(&MOVD_LOAD_XMM).rex().w(), ); e.enc64( bitcast.bind(I64).bind(F64), - rec_rfumr.opcodes(vec![0x66, 0x0f, 0x7e]).rex().w(), + rec_rfumr.opcodes(&MOVD_STORE_XMM).rex().w(), ); // movaps - e.enc_both(copy.bind(F32), rec_furm.opcodes(vec![0x0f, 0x28])); - e.enc_both(copy.bind(F64), rec_furm.opcodes(vec![0x0f, 0x28])); + e.enc_both(copy.bind(F32), rec_furm.opcodes(&MOVAPS_LOAD)); + e.enc_both(copy.bind(F64), rec_furm.opcodes(&MOVAPS_LOAD)); // TODO For x86-64, only define REX forms for now, since we can't describe the special regunit // immediate operands with the current constraint language. - e.enc32(regmove.bind(F32), rec_frmov.opcodes(vec![0x0f, 0x28])); - e.enc64(regmove.bind(F32), rec_frmov.opcodes(vec![0x0f, 0x28]).rex()); + e.enc32(regmove.bind(F32), rec_frmov.opcodes(&MOVAPS_LOAD)); + e.enc64(regmove.bind(F32), rec_frmov.opcodes(&MOVAPS_LOAD).rex()); // TODO For x86-64, only define REX forms for now, since we can't describe the special regunit // immediate operands with the current constraint language. - e.enc32(regmove.bind(F64), rec_frmov.opcodes(vec![0x0f, 0x28])); - e.enc64(regmove.bind(F64), rec_frmov.opcodes(vec![0x0f, 0x28]).rex()); + e.enc32(regmove.bind(F64), rec_frmov.opcodes(&MOVAPS_LOAD)); + e.enc64(regmove.bind(F64), rec_frmov.opcodes(&MOVAPS_LOAD).rex()); // cvtsi2ss - e.enc_i32_i64( - fcvt_from_sint.bind(F32), - rec_frurm.opcodes(vec![0xf3, 0x0f, 0x2a]), - ); + e.enc_i32_i64(fcvt_from_sint.bind(F32), rec_frurm.opcodes(&CVTSI2SS)); // cvtsi2sd - e.enc_i32_i64( - fcvt_from_sint.bind(F64), - rec_frurm.opcodes(vec![0xf2, 0x0f, 0x2a]), - ); + e.enc_i32_i64(fcvt_from_sint.bind(F64), rec_frurm.opcodes(&CVTSI2SD)); // cvtss2sd - e.enc_both( - fpromote.bind(F64).bind(F32), - rec_furm.opcodes(vec![0xf3, 0x0f, 0x5a]), - ); + e.enc_both(fpromote.bind(F64).bind(F32), rec_furm.opcodes(&CVTSS2SD)); // cvtsd2ss - e.enc_both( - fdemote.bind(F32).bind(F64), - rec_furm.opcodes(vec![0xf2, 0x0f, 0x5a]), - ); + e.enc_both(fdemote.bind(F32).bind(F64), rec_furm.opcodes(&CVTSD2SS)); // cvttss2si e.enc_both( x86_cvtt2si.bind(I32).bind(F32), - rec_rfurm.opcodes(vec![0xf3, 0x0f, 0x2c]), + rec_rfurm.opcodes(&CVTTSS2SI), ); e.enc64( x86_cvtt2si.bind(I64).bind(F32), - rec_rfurm.opcodes(vec![0xf3, 0x0f, 0x2c]).rex().w(), + rec_rfurm.opcodes(&CVTTSS2SI).rex().w(), ); // cvttsd2si e.enc_both( x86_cvtt2si.bind(I32).bind(F64), - rec_rfurm.opcodes(vec![0xf2, 0x0f, 0x2c]), + rec_rfurm.opcodes(&CVTTSD2SI), ); e.enc64( x86_cvtt2si.bind(I64).bind(F64), - rec_rfurm.opcodes(vec![0xf2, 0x0f, 0x2c]).rex().w(), + rec_rfurm.opcodes(&CVTTSD2SI).rex().w(), ); // Exact square roots. - e.enc_both(sqrt.bind(F32), rec_furm.opcodes(vec![0xf3, 0x0f, 0x51])); - e.enc_both(sqrt.bind(F64), rec_furm.opcodes(vec![0xf2, 0x0f, 0x51])); + e.enc_both(sqrt.bind(F32), rec_furm.opcodes(&SQRTSS)); + e.enc_both(sqrt.bind(F64), rec_furm.opcodes(&SQRTSD)); // Rounding. The recipe looks at the opcode to pick an immediate. for inst in &[nearest, floor, ceil, trunc] { - e.enc_both_isap( - inst.bind(F32), - rec_furmi_rnd.opcodes(vec![0x66, 0x0f, 0x3a, 0x0a]), - use_sse41, - ); - e.enc_both_isap( - inst.bind(F64), - rec_furmi_rnd.opcodes(vec![0x66, 0x0f, 0x3a, 0x0b]), - use_sse41, - ); + e.enc_both_isap(inst.bind(F32), rec_furmi_rnd.opcodes(&ROUNDSS), use_sse41); + e.enc_both_isap(inst.bind(F64), rec_furmi_rnd.opcodes(&ROUNDSD), use_sse41); } // Binary arithmetic ops. - for &(inst, opc) in &[ - (fadd, 0x58), - (fsub, 0x5c), - (fmul, 0x59), - (fdiv, 0x5e), - (x86_fmin, 0x5d), - (x86_fmax, 0x5f), - ] { - e.enc_both(inst.bind(F32), rec_fa.opcodes(vec![0xf3, 0x0f, opc])); - e.enc_both(inst.bind(F64), rec_fa.opcodes(vec![0xf2, 0x0f, opc])); - } + e.enc_both(fadd.bind(F32), rec_fa.opcodes(&ADDSS)); + e.enc_both(fadd.bind(F64), rec_fa.opcodes(&ADDSD)); + + e.enc_both(fsub.bind(F32), rec_fa.opcodes(&SUBSS)); + e.enc_both(fsub.bind(F64), rec_fa.opcodes(&SUBSD)); + + e.enc_both(fmul.bind(F32), rec_fa.opcodes(&MULSS)); + e.enc_both(fmul.bind(F64), rec_fa.opcodes(&MULSD)); + + e.enc_both(fdiv.bind(F32), rec_fa.opcodes(&DIVSS)); + e.enc_both(fdiv.bind(F64), rec_fa.opcodes(&DIVSD)); + + e.enc_both(x86_fmin.bind(F32), rec_fa.opcodes(&MINSS)); + e.enc_both(x86_fmin.bind(F64), rec_fa.opcodes(&MINSD)); + + e.enc_both(x86_fmax.bind(F32), rec_fa.opcodes(&MAXSS)); + e.enc_both(x86_fmax.bind(F64), rec_fa.opcodes(&MAXSD)); // Binary bitwise ops. - for &(inst, opc) in &[(band, 0x54), (bor, 0x56), (bxor, 0x57)] { - e.enc_both(inst.bind(F32), rec_fa.opcodes(vec![0x0f, opc])); - e.enc_both(inst.bind(F64), rec_fa.opcodes(vec![0x0f, opc])); - } + // + // The F64 version is intentionally encoded using the single-precision opcode: + // the operation is identical and the encoding is one byte shorter. + e.enc_both(band.bind(F32), rec_fa.opcodes(&ANDPS)); + e.enc_both(band.bind(F64), rec_fa.opcodes(&ANDPS)); + + e.enc_both(bor.bind(F32), rec_fa.opcodes(&ORPS)); + e.enc_both(bor.bind(F64), rec_fa.opcodes(&ORPS)); + + e.enc_both(bxor.bind(F32), rec_fa.opcodes(&XORPS)); + e.enc_both(bxor.bind(F64), rec_fa.opcodes(&XORPS)); // The `andnps(x,y)` instruction computes `~x&y`, while band_not(x,y)` is `x&~y. - e.enc_both(band_not.bind(F32), rec_fax.opcodes(vec![0x0f, 0x55])); - e.enc_both(band_not.bind(F64), rec_fax.opcodes(vec![0x0f, 0x55])); + e.enc_both(band_not.bind(F32), rec_fax.opcodes(&ANDNPS)); + e.enc_both(band_not.bind(F64), rec_fax.opcodes(&ANDNPS)); // Comparisons. // // This only covers the condition codes in `supported_floatccs`, the rest are // handled by legalization patterns. - e.enc_both(fcmp.bind(F32), rec_fcscc.opcodes(vec![0x0f, 0x2e])); - e.enc_both(fcmp.bind(F64), rec_fcscc.opcodes(vec![0x66, 0x0f, 0x2e])); - e.enc_both(ffcmp.bind(F32), rec_fcmp.opcodes(vec![0x0f, 0x2e])); - e.enc_both(ffcmp.bind(F64), rec_fcmp.opcodes(vec![0x66, 0x0f, 0x2e])); + e.enc_both(fcmp.bind(F32), rec_fcscc.opcodes(&UCOMISS)); + e.enc_both(fcmp.bind(F64), rec_fcscc.opcodes(&UCOMISD)); + e.enc_both(ffcmp.bind(F32), rec_fcmp.opcodes(&UCOMISS)); + e.enc_both(ffcmp.bind(F64), rec_fcmp.opcodes(&UCOMISD)); // SIMD vector size: eventually multiple vector sizes may be supported but for now only // SSE-sized vectors are available. @@ -1833,7 +1724,7 @@ pub(crate) fn define( // PSHUFB, 8-bit shuffle using two XMM registers. for ty in ValueType::all_lane_types().filter(allowed_simd_type) { let instruction = x86_pshufb.bind_vector_from_lane(ty, sse_vector_size); - let template = rec_fa.nonrex().opcodes(vec![0x66, 0x0f, 0x38, 00]); + let template = rec_fa.nonrex().opcodes(&PSHUFB); e.enc32_isap(instruction.clone(), template.clone(), use_ssse3_simd); e.enc64_isap(instruction, template, use_ssse3_simd); } @@ -1841,9 +1732,7 @@ pub(crate) fn define( // PSHUFD, 32-bit shuffle using one XMM register and a u8 immediate. for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 32) { let instruction = x86_pshufd.bind_vector_from_lane(ty, sse_vector_size); - let template = rec_r_ib_unsigned_fpr - .nonrex() - .opcodes(vec![0x66, 0x0f, 0x70]); + let template = rec_r_ib_unsigned_fpr.nonrex().opcodes(&PSHUFD); e.enc32(instruction.clone(), template.clone()); e.enc64(instruction, template); } @@ -1856,7 +1745,7 @@ pub(crate) fn define( if ty.is_float() { e.enc_32_64_rec(instruction, rec_null_fpr, 0); } else { - let template = rec_frurm.opcodes(vec![0x66, 0x0f, 0x6e]); // MOVD/MOVQ + let template = rec_frurm.opcodes(&MOVD_LOAD_XMM); if ty.lane_bits() < 64 { // no 32-bit encodings for 64-bit widths e.enc32(instruction.clone(), template.clone()); @@ -1866,17 +1755,17 @@ pub(crate) fn define( } // SIMD insertlane - let mut x86_pinsr_mapping: HashMap, Option)> = + let mut x86_pinsr_mapping: HashMap)> = HashMap::new(); - x86_pinsr_mapping.insert(8, (vec![0x66, 0x0f, 0x3a, 0x20], Some(use_sse41_simd))); // PINSRB - x86_pinsr_mapping.insert(16, (vec![0x66, 0x0f, 0xc4], None)); // PINSRW from SSE2 - x86_pinsr_mapping.insert(32, (vec![0x66, 0x0f, 0x3a, 0x22], Some(use_sse41_simd))); // PINSRD - x86_pinsr_mapping.insert(64, (vec![0x66, 0x0f, 0x3a, 0x22], Some(use_sse41_simd))); // PINSRQ, only x86_64 + x86_pinsr_mapping.insert(8, (&PINSRB, Some(use_sse41_simd))); + x86_pinsr_mapping.insert(16, (&PINSRW, None)); + x86_pinsr_mapping.insert(32, (&PINSR, Some(use_sse41_simd))); + x86_pinsr_mapping.insert(64, (&PINSR, Some(use_sse41_simd))); for ty in ValueType::all_lane_types().filter(allowed_simd_type) { if let Some((opcode, isap)) = x86_pinsr_mapping.get(&ty.lane_bits()) { let instruction = x86_pinsr.bind_vector_from_lane(ty, sse_vector_size); - let template = rec_r_ib_unsigned_r.opcodes(opcode.clone()); + let template = rec_r_ib_unsigned_r.opcodes(opcode); if ty.lane_bits() < 64 { e.enc_32_64_maybe_isap(instruction, template.nonrex(), isap.clone()); } else { @@ -1890,36 +1779,36 @@ pub(crate) fn define( // For legalizing insertlane with floats, INSERTPS from SSE4.1. { let instruction = x86_insertps.bind_vector_from_lane(F32, sse_vector_size); - let template = rec_fa_ib.nonrex().opcodes(vec![0x66, 0x0f, 0x3a, 0x21]); + let template = rec_fa_ib.nonrex().opcodes(&INSERTPS); e.enc_32_64_maybe_isap(instruction, template, Some(use_sse41_simd)); } // For legalizing insertlane with floats, MOVSD from SSE2. { let instruction = x86_movsd.bind_vector_from_lane(F64, sse_vector_size); - let template = rec_fa.nonrex().opcodes(vec![0xf2, 0x0f, 0x10]); + let template = rec_fa.nonrex().opcodes(&MOVSD_LOAD); e.enc_32_64_maybe_isap(instruction, template, None); // from SSE2 } // For legalizing insertlane with floats, MOVLHPS from SSE. { let instruction = x86_movlhps.bind_vector_from_lane(F64, sse_vector_size); - let template = rec_fa.nonrex().opcodes(vec![0x0f, 0x16]); + let template = rec_fa.nonrex().opcodes(&MOVLHPS); e.enc_32_64_maybe_isap(instruction, template, None); // from SSE } // SIMD extractlane - let mut x86_pextr_mapping: HashMap, Option)> = + let mut x86_pextr_mapping: HashMap)> = HashMap::new(); - x86_pextr_mapping.insert(8, (vec![0x66, 0x0f, 0x3a, 0x14], Some(use_sse41_simd))); // PEXTRB - x86_pextr_mapping.insert(16, (vec![0x66, 0x0f, 0xc5], None)); // PEXTRW from SSE2, SSE4.1 has a PEXTRW that can move to reg/m16 but the opcode is four bytes - x86_pextr_mapping.insert(32, (vec![0x66, 0x0f, 0x3a, 0x16], Some(use_sse41_simd))); // PEXTRD - x86_pextr_mapping.insert(64, (vec![0x66, 0x0f, 0x3a, 0x16], Some(use_sse41_simd))); // PEXTRQ, only x86_64 + x86_pextr_mapping.insert(8, (&PEXTRB, Some(use_sse41_simd))); + x86_pextr_mapping.insert(16, (&PEXTRW_SSE2, None)); + x86_pextr_mapping.insert(32, (&PEXTR, Some(use_sse41_simd))); + x86_pextr_mapping.insert(64, (&PEXTR, Some(use_sse41_simd))); for ty in ValueType::all_lane_types().filter(allowed_simd_type) { if let Some((opcode, isap)) = x86_pextr_mapping.get(&ty.lane_bits()) { let instruction = x86_pextr.bind_vector_from_lane(ty, sse_vector_size); - let template = rec_r_ib_unsigned_gpr.opcodes(opcode.clone()); + let template = rec_r_ib_unsigned_gpr.opcodes(opcode); if ty.lane_bits() < 64 { e.enc_32_64_maybe_isap(instruction, template.nonrex(), isap.clone()); } else { @@ -1972,18 +1861,14 @@ pub(crate) fn define( let is_zero_128bit = InstructionPredicate::new_is_all_zeroes_128bit(f_unary_const, "constant_handle"); - let template = rec_vconst_optimized - .nonrex() - .opcodes(vec![0x66, 0x0f, 0xef]); // PXOR from SSE2 + let template = rec_vconst_optimized.nonrex().opcodes(&PXOR); e.enc_32_64_func(instruction.clone(), template, |builder| { builder.inst_predicate(is_zero_128bit) }); let is_ones_128bit = InstructionPredicate::new_is_all_ones_128bit(f_unary_const, "constant_handle"); - let template = rec_vconst_optimized - .nonrex() - .opcodes(vec![0x66, 0x0f, 0x74]); // PCMPEQB from SSE2 + let template = rec_vconst_optimized.nonrex().opcodes(&PCMPEQB); e.enc_32_64_func(instruction, template, |builder| { builder.inst_predicate(is_ones_128bit) }); @@ -1997,14 +1882,14 @@ pub(crate) fn define( // in memory) but some performance measurements are needed. for ty in ValueType::all_lane_types().filter(allowed_simd_type) { let instruction = vconst.bind_vector_from_lane(ty, sse_vector_size); - let template = rec_vconst.nonrex().opcodes(vec![0x0f, 0x10]); + let template = rec_vconst.nonrex().opcodes(&MOVUPS_LOAD); e.enc_32_64_maybe_isap(instruction, template, None); // from SSE } // SIMD bor using ORPS for ty in ValueType::all_lane_types().filter(allowed_simd_type) { let instruction = bor.bind_vector_from_lane(ty, sse_vector_size); - let template = rec_fa.nonrex().opcodes(vec![0x0f, 0x56]); + let template = rec_fa.nonrex().opcodes(&ORPS); e.enc_32_64_maybe_isap(instruction, template, None); // from SSE } @@ -2014,66 +1899,60 @@ pub(crate) fn define( for ty in ValueType::all_lane_types().filter(allowed_simd_type) { // Store let bound_store = store.bind_vector_from_lane(ty, sse_vector_size).bind_any(); - e.enc_32_64(bound_store.clone(), rec_fst.opcodes(vec![0x0f, 0x11])); - e.enc_32_64(bound_store.clone(), rec_fstDisp8.opcodes(vec![0x0f, 0x11])); - e.enc_32_64(bound_store, rec_fstDisp32.opcodes(vec![0x0f, 0x11])); + e.enc_32_64(bound_store.clone(), rec_fst.opcodes(&MOVUPS_STORE)); + e.enc_32_64(bound_store.clone(), rec_fstDisp8.opcodes(&MOVUPS_STORE)); + e.enc_32_64(bound_store, rec_fstDisp32.opcodes(&MOVUPS_STORE)); // Load let bound_load = load.bind_vector_from_lane(ty, sse_vector_size).bind_any(); - e.enc_32_64(bound_load.clone(), rec_fld.opcodes(vec![0x0f, 0x10])); - e.enc_32_64(bound_load.clone(), rec_fldDisp8.opcodes(vec![0x0f, 0x10])); - e.enc_32_64(bound_load, rec_fldDisp32.opcodes(vec![0x0f, 0x10])); + e.enc_32_64(bound_load.clone(), rec_fld.opcodes(&MOVUPS_LOAD)); + e.enc_32_64(bound_load.clone(), rec_fldDisp8.opcodes(&MOVUPS_LOAD)); + e.enc_32_64(bound_load, rec_fldDisp32.opcodes(&MOVUPS_LOAD)); // Spill let bound_spill = spill.bind_vector_from_lane(ty, sse_vector_size); - e.enc_32_64(bound_spill, rec_fspillSib32.opcodes(vec![0x0f, 0x11])); + e.enc_32_64(bound_spill, rec_fspillSib32.opcodes(&MOVUPS_STORE)); let bound_regspill = regspill.bind_vector_from_lane(ty, sse_vector_size); - e.enc_32_64(bound_regspill, rec_fregspill32.opcodes(vec![0x0f, 0x11])); + e.enc_32_64(bound_regspill, rec_fregspill32.opcodes(&MOVUPS_STORE)); // Fill let bound_fill = fill.bind_vector_from_lane(ty, sse_vector_size); - e.enc_32_64(bound_fill, rec_ffillSib32.opcodes(vec![0x0f, 0x10])); + e.enc_32_64(bound_fill, rec_ffillSib32.opcodes(&MOVUPS_LOAD)); let bound_regfill = regfill.bind_vector_from_lane(ty, sse_vector_size); - e.enc_32_64(bound_regfill, rec_fregfill32.opcodes(vec![0x0f, 0x10])); + e.enc_32_64(bound_regfill, rec_fregfill32.opcodes(&MOVUPS_LOAD)); let bound_fill_nop = fill_nop.bind_vector_from_lane(ty, sse_vector_size); e.enc_32_64_rec(bound_fill_nop, rec_ffillnull, 0); // Regmove let bound_regmove = regmove.bind_vector_from_lane(ty, sse_vector_size); - e.enc_32_64(bound_regmove, rec_frmov.opcodes(vec![0x0f, 0x28])); + e.enc_32_64(bound_regmove, rec_frmov.opcodes(&MOVAPS_LOAD)); // Copy let bound_copy = copy.bind_vector_from_lane(ty, sse_vector_size); - e.enc_32_64(bound_copy, rec_furm.opcodes(vec![0x0f, 0x28])); // MOVAPS from SSE + e.enc_32_64(bound_copy, rec_furm.opcodes(&MOVAPS_LOAD)); let bound_copy_nop = copy_nop.bind_vector_from_lane(ty, sse_vector_size); e.enc_32_64_rec(bound_copy_nop, rec_stacknull, 0); } // SIMD integer addition - for (ty, opcodes) in &[ - (I8, &[0x66, 0x0f, 0xfc]), // PADDB from SSE2 - (I16, &[0x66, 0x0f, 0xfd]), // PADDW from SSE2 - (I32, &[0x66, 0x0f, 0xfe]), // PADDD from SSE2 - (I64, &[0x66, 0x0f, 0xd4]), // PADDQ from SSE2 - ] { + for (ty, opcodes) in &[(I8, &PADDB), (I16, &PADDW), (I32, &PADDD), (I64, &PADDQ)] { let iadd = iadd.bind_vector_from_lane(ty.clone(), sse_vector_size); - e.enc_32_64(iadd, rec_fa.opcodes(opcodes.to_vec())); + e.enc_32_64(iadd, rec_fa.opcodes(*opcodes)); } // SIMD icmp using PCMPEQ* - let mut pcmpeq_mapping: HashMap, Option)> = - HashMap::new(); - pcmpeq_mapping.insert(8, (vec![0x66, 0x0f, 0x74], None)); // PCMPEQB from SSE2 - pcmpeq_mapping.insert(16, (vec![0x66, 0x0f, 0x75], None)); // PCMPEQW from SSE2 - pcmpeq_mapping.insert(32, (vec![0x66, 0x0f, 0x76], None)); // PCMPEQD from SSE2 - pcmpeq_mapping.insert(64, (vec![0x66, 0x0f, 0x38, 0x29], Some(use_sse41_simd))); // PCMPEQQ from SSE4.1 + let mut pcmpeq_mapping: HashMap)> = HashMap::new(); + pcmpeq_mapping.insert(8, (&PCMPEQB, None)); + pcmpeq_mapping.insert(16, (&PCMPEQW, None)); + pcmpeq_mapping.insert(32, (&PCMPEQD, None)); + pcmpeq_mapping.insert(64, (&PCMPEQQ, Some(use_sse41_simd))); for ty in ValueType::all_lane_types().filter(|t| t.is_int() && allowed_simd_type(t)) { if let Some((opcodes, isa_predicate)) = pcmpeq_mapping.get(&ty.lane_bits()) { let instruction = icmp.bind_vector_from_lane(ty, sse_vector_size); let f_int_compare = formats.get(formats.by_name("IntCompare")); let has_eq_condition_code = InstructionPredicate::new_has_condition_code(f_int_compare, IntCC::Equal, "cond"); - let template = rec_icscc_fpr.nonrex().opcodes(opcodes.clone()); + let template = rec_icscc_fpr.nonrex().opcodes(*opcodes); e.enc_32_64_func(instruction, template, |builder| { let builder = builder.inst_predicate(has_eq_condition_code); if let Some(p) = isa_predicate { @@ -2088,13 +1967,13 @@ pub(crate) fn define( // Reference type instructions // Null references implemented as iconst 0. - e.enc32(null.bind_ref(R32), rec_pu_id_ref.opcodes(vec![0xb8])); + e.enc32(null.bind_ref(R32), rec_pu_id_ref.opcodes(&MOV_IMM)); - e.enc64(null.bind_ref(R64), rec_pu_id_ref.rex().opcodes(vec![0xb8])); - e.enc64(null.bind_ref(R64), rec_pu_id_ref.opcodes(vec![0xb8])); + e.enc64(null.bind_ref(R64), rec_pu_id_ref.rex().opcodes(&MOV_IMM)); + e.enc64(null.bind_ref(R64), rec_pu_id_ref.opcodes(&MOV_IMM)); // is_null, implemented by testing whether the value is 0. - e.enc_r32_r64_rex_only(is_null, rec_is_zero.opcodes(vec![0x85])); + e.enc_r32_r64_rex_only(is_null, rec_is_zero.opcodes(&TEST_REG)); // safepoint instruction calls sink, no actual encoding. e.enc32_rec(safepoint, rec_safepoint, 0); diff --git a/cranelift/codegen/meta/src/isa/x86/mod.rs b/cranelift/codegen/meta/src/isa/x86/mod.rs index 6ced509396..e18c54ad64 100644 --- a/cranelift/codegen/meta/src/isa/x86/mod.rs +++ b/cranelift/codegen/meta/src/isa/x86/mod.rs @@ -9,6 +9,7 @@ use crate::shared::Definitions as SharedDefinitions; mod encodings; mod instructions; mod legalize; +mod opcodes; mod recipes; mod registers; mod settings; diff --git a/cranelift/codegen/meta/src/isa/x86/opcodes.rs b/cranelift/codegen/meta/src/isa/x86/opcodes.rs new file mode 100644 index 0000000000..12b60e532b --- /dev/null +++ b/cranelift/codegen/meta/src/isa/x86/opcodes.rs @@ -0,0 +1,377 @@ +//! Static, named definitions of instruction opcodes. + +/// Empty opcode for use as a default. +pub static EMPTY: [u8; 0] = []; + +/// Add with carry flag r{16,32,64} to r/m of the same size. +pub static ADC: [u8; 1] = [0x11]; + +/// Add r{16,32,64} to r/m of the same size. +pub static ADD: [u8; 1] = [0x01]; + +/// Add imm{16,32} to r/m{16,32,64}, possibly sign-extended. +pub static ADD_IMM: [u8; 1] = [0x81]; + +/// Add sign-extended imm8 to r/m{16,32,64}. +pub static ADD_IMM8_SIGN_EXTEND: [u8; 1] = [0x83]; + +/// Add the low double-precision floating-point value from xmm2/mem to xmm1 +/// and store the result in xmm1. +pub static ADDSD: [u8; 3] = [0xf2, 0x0f, 0x58]; + +/// Add the low single-precision floating-point value from xmm2/mem to xmm1 +/// and store the result in xmm1. +pub static ADDSS: [u8; 3] = [0xf3, 0x0f, 0x58]; + +/// r/m{16,32,64} AND register of the same size (Intel docs have a typo). +pub static AND: [u8; 1] = [0x21]; + +/// imm{16,32} AND r/m{16,32,64}, possibly sign-extended. +pub static AND_IMM: [u8; 1] = [0x81]; + +/// r/m{16,32,64} AND sign-extended imm8. +pub static AND_IMM8_SIGN_EXTEND: [u8; 1] = [0x83]; + +/// Return the bitwise logical AND NOT of packed single-precision floating-point +/// values in xmm1 and xmm2/mem. +pub static ANDNPS: [u8; 2] = [0x0f, 0x55]; + +/// Return the bitwise logical AND of packed single-precision floating-point values +/// in xmm1 and xmm2/mem. +pub static ANDPS: [u8; 2] = [0x0f, 0x54]; + +/// Bit scan forward (stores index of first encountered 1 from the front). +pub static BIT_SCAN_FORWARD: [u8; 2] = [0x0f, 0xbc]; + +/// Bit scan reverse (stores index of first encountered 1 from the back). +pub static BIT_SCAN_REVERSE: [u8; 2] = [0x0f, 0xbd]; + +/// Call near, relative, displacement relative to next instruction (sign-extended). +pub static CALL_RELATIVE: [u8; 1] = [0xe8]; + +/// Move r/m{16,32,64} if overflow (OF=1). +pub static CMOV_OVERFLOW: [u8; 2] = [0x0f, 0x40]; + +/// Compare imm{16,32} with r/m{16,32,64} (sign-extended if 64). +pub static CMP_IMM: [u8; 1] = [0x81]; + +/// Compare imm8 with r/m{16,32,64}. +pub static CMP_IMM8: [u8; 1] = [0x83]; + +/// Compare r{16,32,64} with r/m of the same size. +pub static CMP_REG: [u8; 1] = [0x39]; + +/// Convert scalar double-precision floating-point value to scalar single-precision +/// floating-point value. +pub static CVTSD2SS: [u8; 3] = [0xf2, 0x0f, 0x5a]; + +/// Convert doubleword integer to scalar double-precision floating-point value. +pub static CVTSI2SD: [u8; 3] = [0xf2, 0x0f, 0x2a]; + +/// Convert doubleword integer to scalar single-precision floating-point value. +pub static CVTSI2SS: [u8; 3] = [0xf3, 0x0f, 0x2a]; + +/// Convert scalar single-precision floating-point value to scalar double-precision +/// float-point value. +pub static CVTSS2SD: [u8; 3] = [0xf3, 0x0f, 0x5a]; + +/// Convert with truncation scalar double-precision floating-point value to signed +/// integer. +pub static CVTTSD2SI: [u8; 3] = [0xf2, 0x0f, 0x2c]; + +/// Convert with truncation scalar single-precision floating-point value to integer. +pub static CVTTSS2SI: [u8; 3] = [0xf3, 0x0f, 0x2c]; + +/// Unsigned divide for {16,32,64}-bit. +pub static DIV: [u8; 1] = [0xf7]; + +/// Divide low double-precision floating-point value in xmm1 by low double-precision +/// floating-point value in xmm2/m64. +pub static DIVSD: [u8; 3] = [0xf2, 0x0f, 0x5e]; + +/// Divide low single-precision floating-point value in xmm1 by low single-precision +/// floating-point value in xmm2/m32. +pub static DIVSS: [u8; 3] = [0xf3, 0x0f, 0x5e]; + +/// Signed divide for {16,32,64}-bit. +pub static IDIV: [u8; 1] = [0xf7]; + +/// Signed multiply for {16,32,64}-bit, generic registers. +pub static IMUL: [u8; 2] = [0x0f, 0xaf]; + +/// Signed multiply for {16,32,64}-bit, storing into RDX:RAX. +pub static IMUL_RDX_RAX: [u8; 1] = [0xf7]; + +/// Insert scalar single-precision floating-point value. +pub static INSERTPS: [u8; 4] = [0x66, 0x0f, 0x3a, 0x21]; + +/// Either: +/// 1. Jump near, absolute indirect, RIP = 64-bit offset from register or memory. +/// 2. Jump far, absolute indirect, address given in m16:64. +pub static JUMP_ABSOLUTE: [u8; 1] = [0xff]; + +/// Jump near, relative, RIP = RIP + 32-bit displacement sign extended to 64 bits. +pub static JUMP_NEAR_RELATIVE: [u8; 1] = [0xe9]; + +/// Jump near (rel32) if overflow (OF=1). +pub static JUMP_NEAR_IF_OVERFLOW: [u8; 2] = [0x0f, 0x80]; + +/// Jump short, relative, RIP = RIP + 8-bit displacement sign extended to 64 bits. +pub static JUMP_SHORT: [u8; 1] = [0xeb]; + +/// Jump short (rel8) if equal (ZF=1). +pub static JUMP_SHORT_IF_EQUAL: [u8; 1] = [0x74]; + +/// Jump short (rel8) if not equal (ZF=0). +pub static JUMP_SHORT_IF_NOT_EQUAL: [u8; 1] = [0x75]; + +/// Jump short (rel8) if overflow (OF=1). +pub static JUMP_SHORT_IF_OVERFLOW: [u8; 1] = [0x70]; + +/// Store effective address for m in register r{16,32,64}. +pub static LEA: [u8; 1] = [0x8d]; + +/// Count the number of leading zero bits. +pub static LZCNT: [u8; 3] = [0xf3, 0x0f, 0xbd]; + +/// Return the maximum scalar double-precision floating-point value between +/// xmm2/m64 and xmm1. +pub static MAXSD: [u8; 3] = [0xf2, 0x0f, 0x5f]; + +/// Return the maximum scalar single-precision floating-point value between +/// xmm2/m32 and xmm1. +pub static MAXSS: [u8; 3] = [0xf3, 0x0f, 0x5f]; + +/// Return the minimum scalar double-precision floating-point value between +/// xmm2/m64 and xmm1. +pub static MINSD: [u8; 3] = [0xf2, 0x0f, 0x5d]; + +/// Return the minimum scalar single-precision floating-point value between +/// xmm2/m32 and xmm1. +pub static MINSS: [u8; 3] = [0xf3, 0x0f, 0x5d]; + +/// Move r8 to r/m8. +pub static MOV_BYTE_STORE: [u8; 1] = [0x88]; + +/// Move imm{16,32,64} to same-sized register. +pub static MOV_IMM: [u8; 1] = [0xb8]; + +/// Move imm{16,32} to r{16,32,64}, sign-extended if 64-bit target. +pub static MOV_IMM_SIGNEXTEND: [u8; 1] = [0xc7]; + +/// Move {r/m16, r/m32, r/m64} to same-sized register. +pub static MOV_LOAD: [u8; 1] = [0x8b]; + +/// Move r16 to r/m16. +pub static MOV_STORE_16: [u8; 2] = [0x66, 0x89]; + +/// Move {r16, r32, r64} to same-sized register or memory. +pub static MOV_STORE: [u8; 1] = [0x89]; + +/// Move aligned packed single-precision floating-point values from x/m to xmm (SSE). +pub static MOVAPS_LOAD: [u8; 2] = [0x0f, 0x28]; + +/// Move doubleword from r/m32 to xmm (SSE2). Quadword with REX prefix. +pub static MOVD_LOAD_XMM: [u8; 3] = [0x66, 0x0f, 0x6e]; + +/// Move doubleword from xmm to r/m32 (SSE2). Quadword with REX prefix. +pub static MOVD_STORE_XMM: [u8; 3] = [0x66, 0x0f, 0x7e]; + +/// Move packed single-precision floating-point values low to high (SSE). +pub static MOVLHPS: [u8; 2] = [0x0f, 0x16]; + +/// Move scalar double-precision floating-point value (from reg/mem to reg). +pub static MOVSD_LOAD: [u8; 3] = [0xf2, 0x0f, 0x10]; + +/// Move scalar double-precision floating-point value (from reg to reg/mem). +pub static MOVSD_STORE: [u8; 3] = [0xf2, 0x0f, 0x11]; + +/// Move scalar single-precision floating-point value (from reg to reg/mem). +pub static MOVSS_STORE: [u8; 3] = [0xf3, 0x0f, 0x11]; + +/// Move scalar single-precision floating-point-value (from reg/mem to reg). +pub static MOVSS_LOAD: [u8; 3] = [0xf3, 0x0f, 0x10]; + +/// Move byte to register with sign-extension. +pub static MOVSX_BYTE: [u8; 2] = [0x0f, 0xbe]; + +/// Move word to register with sign-extension. +pub static MOVSX_WORD: [u8; 2] = [0x0f, 0xbf]; + +/// Move doubleword to register with sign-extension. +pub static MOVSXD: [u8; 1] = [0x63]; + +/// Move unaligned packed single-precision floating-point from x/m to xmm (SSE). +pub static MOVUPS_LOAD: [u8; 2] = [0x0f, 0x10]; + +/// Move unaligned packed single-precision floating-point value from xmm to x/m (SSE). +pub static MOVUPS_STORE: [u8; 2] = [0x0f, 0x11]; + +/// Move byte to register with zero-extension. +pub static MOVZX_BYTE: [u8; 2] = [0x0f, 0xb6]; + +/// Move word to register with zero-extension. +pub static MOVZX_WORD: [u8; 2] = [0x0f, 0xb7]; + +/// Unsigned multiply for {16,32,64}-bit. +pub static MUL: [u8; 1] = [0xf7]; + +/// Multiply the low double-precision floating-point value in xmm2/m64 by the +/// low double-precision floating-point value in xmm1. +pub static MULSD: [u8; 3] = [0xf2, 0x0f, 0x59]; + +/// Multiply the low single-precision floating-point value in xmm2/m32 by the +/// low single-precision floating-point value in xmm1. +pub static MULSS: [u8; 3] = [0xf3, 0x0f, 0x59]; + +/// Reverse each bit of r/m{16,32,64}. +pub static NOT: [u8; 1] = [0xf7]; + +/// r{16,32,64} OR register of same size. +pub static OR: [u8; 1] = [0x09]; + +/// imm{16,32} OR r/m{16,32,64}, possibly sign-extended. +pub static OR_IMM: [u8; 1] = [0x81]; + +/// r/m{16,32,64} OR sign-extended imm8. +pub static OR_IMM8_SIGN_EXTEND: [u8; 1] = [0x83]; + +/// Return the bitwise logical OR of packed single-precision values in xmm and x/m (SSE). +pub static ORPS: [u8; 2] = [0x0f, 0x56]; + +/// Add packed byte integers from xmm2/m128 and xmm1 (SSE2). +pub static PADDB: [u8; 3] = [0x66, 0x0f, 0xfc]; + +/// Add packed doubleword integers from xmm2/m128 and xmm1 (SSE2). +pub static PADDD: [u8; 3] = [0x66, 0x0f, 0xfe]; + +/// Add packed quadword integers from xmm2/m128 and xmm1 (SSE2). +pub static PADDQ: [u8; 3] = [0x66, 0x0f, 0xd4]; + +/// Add packed word integers from xmm2/m128 and xmm1 (SSE2). +pub static PADDW: [u8; 3] = [0x66, 0x0f, 0xfd]; + +/// Compare packed data for equal (SSE2). +pub static PCMPEQB: [u8; 3] = [0x66, 0x0f, 0x74]; + +/// Compare packed data for equal (SSE2). +pub static PCMPEQD: [u8; 3] = [0x66, 0x0f, 0x76]; + +/// Compare packed data for equal (SSE4.1). +pub static PCMPEQQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x29]; + +/// Compare packed data for equal (SSE2). +pub static PCMPEQW: [u8; 3] = [0x66, 0x0f, 0x75]; + +/// Extract doubleword or quadword, depending on REX.W (SSE4.1). +pub static PEXTR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x16]; + +/// Extract byte (SSE4.1). +pub static PEXTRB: [u8; 4] = [0x66, 0x0f, 0x3a, 0x14]; + +/// Extract word (SSE2). There is a 4-byte SSE4.1 variant that can also move to m/16. +pub static PEXTRW_SSE2: [u8; 3] = [0x66, 0x0f, 0xc5]; + +/// Insert doubleword or quadword, depending on REX.W (SSE4.1). +pub static PINSR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x22]; + +/// Insert byte (SSE4.1). +pub static PINSRB: [u8; 4] = [0x66, 0x0f, 0x3a, 0x20]; + +/// Insert word (SSE2). +pub static PINSRW: [u8; 3] = [0x66, 0x0f, 0xc4]; + +/// Pop top of stack into r{16,32,64}; increment stack pointer. +pub static POP_REG: [u8; 1] = [0x58]; + +/// Returns the count of number of bits set to 1. +pub static POPCNT: [u8; 3] = [0xf3, 0x0f, 0xb8]; + +/// Shuffle bytes in xmm1 according to contents of xmm2/m128 (SSE3). +pub static PSHUFB: [u8; 4] = [0x66, 0x0f, 0x38, 0x00]; + +/// Shuffle the doublewords in xmm2/m128 based on the encoding in imm8 and +/// store the result in xmm1 (SSE2). +pub static PSHUFD: [u8; 3] = [0x66, 0x0f, 0x70]; + +/// Push r{16,32,64}. +pub static PUSH_REG: [u8; 1] = [0x50]; + +/// Logical exclusive OR (SSE2). +pub static PXOR: [u8; 3] = [0x66, 0x0f, 0xef]; + +/// Near return to calling procedure. +pub static RET_NEAR: [u8; 1] = [0xc3]; + +/// General rotation opcode. Kind of rotation depends on encoding. +pub static ROTATE_CL: [u8; 1] = [0xd3]; + +/// General rotation opcode. Kind of rotation depends on encoding. +pub static ROTATE_IMM8: [u8; 1] = [0xc1]; + +/// Round scalar doubl-precision floating-point values. +pub static ROUNDSD: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0b]; + +/// Round scalar single-precision floating-point values. +pub static ROUNDSS: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0a]; + +/// Subtract with borrow r{16,32,64} from r/m of the same size. +pub static SBB: [u8; 1] = [0x19]; + +/// Set byte if overflow (OF=1). +pub static SET_BYTE_IF_OVERFLOW: [u8; 2] = [0x0f, 0x90]; + +/// Compute square root of scalar double-precision floating-point value. +pub static SQRTSD: [u8; 3] = [0xf2, 0x0f, 0x51]; + +/// Compute square root of scalar single-precision value. +pub static SQRTSS: [u8; 3] = [0xf3, 0x0f, 0x51]; + +/// Subtract r{16,32,64} from r/m of same size. +pub static SUB: [u8; 1] = [0x29]; + +/// Subtract the low double-precision floating-point value in xmm2/m64 from xmm1 +/// and store the result in xmm1. +pub static SUBSD: [u8; 3] = [0xf2, 0x0f, 0x5c]; + +/// Subtract the low single-precision floating-point value in xmm2/m32 from xmm1 +/// and store the result in xmm1. +pub static SUBSS: [u8; 3] = [0xf3, 0x0f, 0x5c]; + +/// AND r8 with r/m8; set SF, ZF, PF according to result. +pub static TEST_BYTE_REG: [u8; 1] = [0x84]; + +/// AND {r16, r32, r64} with r/m of the same size; set SF, ZF, PF according to result. +pub static TEST_REG: [u8; 1] = [0x85]; + +/// Count the number of trailing zero bits. +pub static TZCNT: [u8; 3] = [0xf3, 0x0f, 0xbc]; + +/// Compare low double-precision floating-point values in xmm1 and xmm2/mem64 +/// and set the EFLAGS flags accordingly. +pub static UCOMISD: [u8; 3] = [0x66, 0x0f, 0x2e]; + +/// Compare low single-precision floating-point values in xmm1 and xmm2/mem32 +/// and set the EFLAGS flags accordingly. +pub static UCOMISS: [u8; 2] = [0x0f, 0x2e]; + +/// Raise invalid opcode instruction. +pub static UNDEFINED2: [u8; 2] = [0x0f, 0x0b]; + +/// imm{16,32} XOR r/m{16,32,64}, possibly sign-extended. +pub static XOR_IMM: [u8; 1] = [0x81]; + +/// r/m{16,32,64} XOR sign-extended imm8. +pub static XOR_IMM8_SIGN_EXTEND: [u8; 1] = [0x83]; + +/// r/m{16,32,64} XOR register of the same size. +pub static XOR: [u8; 1] = [0x31]; + +/// r/m8 XOR r8. +pub static XORB: [u8; 1] = [0x30]; + +/// Bitwise logical XOR of packed double-precision floating-point values. +pub static XORPD: [u8; 3] = [0x66, 0x0f, 0x57]; + +/// Bitwise logical XOR of packed single-precision floating-point values. +pub static XORPS: [u8; 2] = [0x0f, 0x57]; diff --git a/cranelift/codegen/meta/src/isa/x86/recipes.rs b/cranelift/codegen/meta/src/isa/x86/recipes.rs index 13d9dcf833..ce0999c04c 100644 --- a/cranelift/codegen/meta/src/isa/x86/recipes.rs +++ b/cranelift/codegen/meta/src/isa/x86/recipes.rs @@ -10,6 +10,8 @@ use crate::cdsl::regs::IsaRegs; use crate::cdsl::settings::SettingGroup; use crate::shared::Definitions as SharedDefinitions; +use crate::isa::x86::opcodes; + /// Helper data structure to create recipes and template recipes. /// It contains all the recipes and recipe templates that might be used in the encodings crate of /// this same directory. @@ -186,7 +188,7 @@ pub struct Template<'builder> { /// Value of the RRR bits (between 0 and 0b111). rrr_bits: u16, /// Opcode bytes. - op_bytes: Vec, + op_bytes: &'static [u8], } impl<'builder> Template<'builder> { @@ -204,7 +206,7 @@ impl<'builder> Template<'builder> { rex: false, w_bit: 0, rrr_bits: 0, - op_bytes: Vec::new(), + op_bytes: &opcodes::EMPTY, } } @@ -226,7 +228,7 @@ impl<'builder> Template<'builder> { } // Copy setters. - pub fn opcodes(&self, op_bytes: Vec) -> Self { + pub fn opcodes(&self, op_bytes: &'static [u8]) -> Self { assert!(!op_bytes.is_empty()); let mut copy = self.clone(); copy.op_bytes = op_bytes;