diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs index 7e9f74475e..f24232f39a 100644 --- a/cranelift/codegen/meta/src/isa/x86/encodings.rs +++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs @@ -1835,7 +1835,10 @@ fn define_simd( // Load let bound_load = load.bind(vector(ty, sse_vector_size)).bind(Any); - e.enc_32_64(bound_load.clone(), rec_fld.opcodes(&MOVUPS_LOAD)); + e.enc_32_64( + bound_load.clone(), + rec_fld.opcodes(&MOVUPS_LOAD).infer_rex(), + ); e.enc_32_64(bound_load.clone(), rec_fldDisp8.opcodes(&MOVUPS_LOAD)); e.enc_32_64(bound_load, rec_fldDisp32.opcodes(&MOVUPS_LOAD)); diff --git a/cranelift/codegen/meta/src/isa/x86/recipes.rs b/cranelift/codegen/meta/src/isa/x86/recipes.rs index c9794a02da..5c7cb0519b 100644 --- a/cranelift/codegen/meta/src/isa/x86/recipes.rs +++ b/cranelift/codegen/meta/src/isa/x86/recipes.rs @@ -2002,7 +2002,7 @@ pub(crate) fn define<'shared>( ); // XX /r float load with no offset. - recipes.add_template_recipe( + recipes.add_template_inferred( EncodingRecipeBuilder::new("fld", &formats.load, 1) .operands_in(vec![gpr]) .operands_out(vec![fpr]) @@ -2026,6 +2026,7 @@ pub(crate) fn define<'shared>( } "#, ), + "size_plus_maybe_sib_or_offset_for_inreg_0_plus_rex_prefix_for_inreg0_outreg0", ); let has_small_offset = diff --git a/cranelift/codegen/src/isa/x86/enc_tables.rs b/cranelift/codegen/src/isa/x86/enc_tables.rs index 27f7ed43db..076fef3115 100644 --- a/cranelift/codegen/src/isa/x86/enc_tables.rs +++ b/cranelift/codegen/src/isa/x86/enc_tables.rs @@ -139,6 +139,23 @@ fn size_plus_maybe_sib_or_offset_inreg1_plus_rex_prefix_for_inreg0_inreg1( + if needs_rex { 1 } else { 0 } } +/// Calculates the size while inferring if the first input register (inreg0) and first output +/// register (outreg0) require a dynamic REX and if the first input register (inreg0) requires a +/// SIB or offset. +fn size_plus_maybe_sib_or_offset_for_inreg_0_plus_rex_prefix_for_inreg0_outreg0( + sizing: &RecipeSizing, + enc: Encoding, + inst: Inst, + divert: &RegDiversions, + func: &Function, +) -> u8 { + let needs_rex = (EncodingBits::from(enc.bits()).rex_w() != 0) + || test_input(0, inst, divert, func, is_extended_reg) + || test_result(0, inst, divert, func, is_extended_reg); + size_plus_maybe_sib_or_offset_for_inreg_0(sizing, enc, inst, divert, func) + + if needs_rex { 1 } else { 0 } +} + /// Infers whether a dynamic REX prefix will be emitted, for use with one input reg. /// /// A REX prefix is known to be emitted if either: diff --git a/cranelift/filetests/filetests/isa/x86/binary64.clif b/cranelift/filetests/filetests/isa/x86/binary64.clif index c241d33769..ab5d516b40 100644 --- a/cranelift/filetests/filetests/isa/x86/binary64.clif +++ b/cranelift/filetests/filetests/isa/x86/binary64.clif @@ -1682,8 +1682,11 @@ block0: function %V128() { block0: - [-,%r10] v3 = iconst.i64 0x2102_0304_f1f2_f3f4 ; bin: 49 ba 21020304f1f2f3f4 - [-, %xmm9] v4 = vconst.i32x4 [0 1 2 3] ; bin: 44 0f 10 0d 00000005 PCRelRodata4(23) - store v4, v3 ; bin: heap_oob 45 0f 11 0a + [-,%r10] v3 = iconst.i64 0x2102_0304_f1f2_f3f4 ; bin: 49 ba 21020304f1f2f3f4 + [-, %xmm9] v4 = vconst.i32x4 [0 1 2 3] ; bin: 44 0f 10 0d 0000000f PCRelRodata4(33) + store v4, v3 ; bin: heap_oob 45 0f 11 0a + + [-, %r11] v5 = iconst.i64 0x1234 + [-, %xmm2] v6 = load.i32x4 v5 ; bin: heap_oob 41 0f 10 13 return }