From e425bfcebd15be73e5588f84c22fcad8c49e7e41 Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Wed, 1 Apr 2020 10:06:12 -0700 Subject: [PATCH] Infer REX prefixes for SIMD load and store with displacement --- .../codegen/meta/src/isa/x86/encodings.rs | 8 +++--- cranelift/codegen/meta/src/isa/x86/recipes.rs | 6 +++-- cranelift/codegen/src/isa/x86/enc_tables.rs | 15 +++++++++++ .../isa/x86/simd-memory-binemit.clif | 25 ++++++++++++++++--- 4 files changed, 44 insertions(+), 10 deletions(-) diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs index 99402fc9d5..258d4a68aa 100644 --- a/cranelift/codegen/meta/src/isa/x86/encodings.rs +++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs @@ -1866,8 +1866,8 @@ fn define_simd( // Store let bound_store = store.bind(vector(ty, sse_vector_size)).bind(Any); e.enc_both_inferred(bound_store.clone(), rec_fst.opcodes(&MOVUPS_STORE)); - e.enc_both(bound_store.clone(), rec_fstDisp8.opcodes(&MOVUPS_STORE)); - e.enc_both(bound_store, rec_fstDisp32.opcodes(&MOVUPS_STORE)); + e.enc_both_inferred(bound_store.clone(), rec_fstDisp8.opcodes(&MOVUPS_STORE)); + e.enc_both_inferred(bound_store, rec_fstDisp32.opcodes(&MOVUPS_STORE)); // Store complex let bound_store_complex = store_complex.bind(vector(ty, sse_vector_size)); @@ -1887,8 +1887,8 @@ fn define_simd( // Load let bound_load = load.bind(vector(ty, sse_vector_size)).bind(Any); e.enc_both_inferred(bound_load.clone(), rec_fld.opcodes(&MOVUPS_LOAD)); - e.enc_both(bound_load.clone(), rec_fldDisp8.opcodes(&MOVUPS_LOAD)); - e.enc_both(bound_load, rec_fldDisp32.opcodes(&MOVUPS_LOAD)); + e.enc_both_inferred(bound_load.clone(), rec_fldDisp8.opcodes(&MOVUPS_LOAD)); + e.enc_both_inferred(bound_load, rec_fldDisp32.opcodes(&MOVUPS_LOAD)); // Load complex let bound_load_complex = load_complex.bind(vector(ty, sse_vector_size)); diff --git a/cranelift/codegen/meta/src/isa/x86/recipes.rs b/cranelift/codegen/meta/src/isa/x86/recipes.rs index eb83eb15ba..439724803a 100644 --- a/cranelift/codegen/meta/src/isa/x86/recipes.rs +++ b/cranelift/codegen/meta/src/isa/x86/recipes.rs @@ -1604,7 +1604,7 @@ pub(crate) fn define<'shared>( ); // XX /r register-indirect store with 8-bit offset of FPR. - recipes.add_template_recipe( + recipes.add_template_inferred( EncodingRecipeBuilder::new("fstDisp8", &formats.store, 2) .operands_in(vec![fpr, gpr]) .inst_predicate(has_small_offset) @@ -1626,6 +1626,7 @@ pub(crate) fn define<'shared>( sink.put1(offset as u8); "#, ), + "size_plus_maybe_sib_inreg1_plus_rex_prefix_for_inreg0_inreg1", ); // XX /r register-indirect store with 32-bit offset. @@ -1682,7 +1683,7 @@ pub(crate) fn define<'shared>( ); // XX /r register-indirect store with 32-bit offset of FPR. - recipes.add_template_recipe( + recipes.add_template_inferred( EncodingRecipeBuilder::new("fstDisp32", &formats.store, 5) .operands_in(vec![fpr, gpr]) .clobbers_flags(false) @@ -1703,6 +1704,7 @@ pub(crate) fn define<'shared>( sink.put4(offset as u32); "#, ), + "size_plus_maybe_sib_inreg1_plus_rex_prefix_for_inreg0_inreg1", ); } diff --git a/cranelift/codegen/src/isa/x86/enc_tables.rs b/cranelift/codegen/src/isa/x86/enc_tables.rs index e0ec976fdd..2bffd9e167 100644 --- a/cranelift/codegen/src/isa/x86/enc_tables.rs +++ b/cranelift/codegen/src/isa/x86/enc_tables.rs @@ -139,6 +139,21 @@ fn size_plus_maybe_sib_or_offset_inreg1_plus_rex_prefix_for_inreg0_inreg1( + if needs_rex { 1 } else { 0 } } +/// Calculates the size while inferring if the first and second input registers (inreg0, inreg1) +/// require a dynamic REX prefix and if the second input register (inreg1) requires a SIB. +fn size_plus_maybe_sib_inreg1_plus_rex_prefix_for_inreg0_inreg1( + sizing: &RecipeSizing, + enc: Encoding, + inst: Inst, + divert: &RegDiversions, + func: &Function, +) -> u8 { + let needs_rex = (EncodingBits::from(enc.bits()).rex_w() != 0) + || test_input(0, inst, divert, func, is_extended_reg) + || test_input(1, inst, divert, func, is_extended_reg); + size_plus_maybe_sib_for_inreg_1(sizing, enc, inst, divert, func) + if needs_rex { 1 } else { 0 } +} + /// Calculates the size while inferring if the first input register (inreg0) and first output /// register (outreg0) require a dynamic REX and if the first input register (inreg0) requires a /// SIB or offset. diff --git a/cranelift/filetests/filetests/isa/x86/simd-memory-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-memory-binemit.clif index a5d649125f..f9c7f1d485 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-memory-binemit.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-memory-binemit.clif @@ -7,10 +7,6 @@ block0(v0: i64 [%rax]): [-, %xmm0] v10 = load.i32x4 v0 ; bin: heap_oob 0f 10 00 [-] store v10, v0 ; bin: heap_oob 0f 11 00 - ; use displacement -[-, %xmm0] v11 = load.f32x4 v0+42 ; bin: heap_oob 40 0f 10 40 2a -[-] store v11, v0+42 ; bin: heap_oob 40 0f 11 40 2a - ; use REX prefix [-, %xmm8] v12 = load.i8x16 v0 ; bin: heap_oob 44 0f 10 00 [-] store v12, v0 ; bin: heap_oob 44 0f 11 00 @@ -18,6 +14,27 @@ block0(v0: i64 [%rax]): return } +function %load_store_with_displacement(i64) { +block0(v0: i64 [%rax]): + ; use 8-bit displacement +[-, %xmm0] v1 = load.f32x4 v0+42 ; bin: heap_oob 0f 10 40 2a +[-] store v1, v0+42 ; bin: heap_oob 0f 11 40 2a + + ; use 8-bit displacement with REX prefix +[-, %xmm8] v2 = load.i8x16 v0 ; bin: heap_oob 44 0f 10 00 +[-] store v2, v0 ; bin: heap_oob 44 0f 11 00 + + ; use 32-bit displacement +[-, %xmm0] v3 = load.f32x4 v0+256 ; bin: heap_oob 0f 10 80 00000100 +[-] store v3, v0+256 ; bin: heap_oob 0f 11 80 00000100 + + ; use 32-bit displacement with REX prefix +[-, %xmm8] v4 = load.f32x4 v0+256 ; bin: heap_oob 44 0f 10 80 00000100 +[-] store v4, v0+256 ; bin: heap_oob 44 0f 11 80 00000100 + + return +} + function %load_store_complex(i64, i64) { block0(v0: i64 [%rax], v1: i64 [%rbx]): ; %xmm1 corresponds to ModR/M 0x04; the 0b100 in the R/M slot indicates a SIB byte follows