From 99380fad1af9c029ae2b77c083eb123bc2dc429b Mon Sep 17 00:00:00 2001 From: Andy Wortman Date: Mon, 16 Sep 2019 14:35:55 +0000 Subject: [PATCH] Use 'xor r, r' to set registers to 0 instead of mov (#766) --- .../codegen/meta/src/cdsl/instructions.rs | 16 +++++++ .../codegen/meta/src/isa/x86/encodings.rs | 30 +++++++++++++ cranelift/codegen/meta/src/isa/x86/recipes.rs | 12 +++++ cranelift/codegen/src/predicates.rs | 6 +++ .../floating-point-zero-constants-32bit.clif | 17 +++++++ .../x86/floating-point-zero-constants.clif | 31 +++++++++++++ .../filetests/isa/x86/isub_imm-i8.clif | 8 ++-- .../x86/optimized-zero-constants-32bit.clif | 36 ++++++++++++++- .../isa/x86/optimized-zero-constants.clif | 44 ++++++++++++++++++- 9 files changed, 193 insertions(+), 7 deletions(-) create mode 100644 cranelift/filetests/filetests/isa/x86/floating-point-zero-constants-32bit.clif create mode 100644 cranelift/filetests/filetests/isa/x86/floating-point-zero-constants.clif diff --git a/cranelift/codegen/meta/src/cdsl/instructions.rs b/cranelift/codegen/meta/src/cdsl/instructions.rs index f061be94ed..2737099cc4 100644 --- a/cranelift/codegen/meta/src/cdsl/instructions.rs +++ b/cranelift/codegen/meta/src/cdsl/instructions.rs @@ -606,6 +606,8 @@ pub enum FormatPredicateKind { /// `2^scale`. IsUnsignedInt(usize, usize), + /// Is the immediate format field member an integer equal to zero? + IsZeroInt, /// Is the immediate format field member equal to zero? (float32 version) IsZero32BitFloat, @@ -679,6 +681,9 @@ impl FormatPredicateNode { "predicates::is_unsigned_int({}, {}, {})", self.member_name, width, scale ), + FormatPredicateKind::IsZeroInt => { + format!("predicates::is_zero_int({})", self.member_name) + } FormatPredicateKind::IsZero32BitFloat => { format!("predicates::is_zero_32_bit_float({})", self.member_name) } @@ -891,6 +896,17 @@ impl InstructionPredicate { )) } + pub fn new_is_zero_int( + format: &InstructionFormat, + field_name: &'static str, + ) -> InstructionPredicateNode { + InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new( + format, + field_name, + FormatPredicateKind::IsZeroInt, + )) + } + pub fn new_is_zero_32bit_float( format: &InstructionFormat, field_name: &'static str, diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs index f16ca93717..9bdb751b26 100644 --- a/cranelift/codegen/meta/src/isa/x86/encodings.rs +++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs @@ -614,6 +614,7 @@ pub(crate) fn define( let rec_trapif = r.recipe("trapif"); let rec_trapff = r.recipe("trapff"); let rec_u_id = r.template("u_id"); + let rec_u_id_z = r.template("u_id_z"); let rec_umr = r.template("umr"); let rec_umr_reg_to_ssa = r.template("umr_reg_to_ssa"); let rec_ur = r.template("ur"); @@ -750,6 +751,35 @@ pub(crate) fn define( } e.enc64(bconst.bind(B64), rec_pu_id_bool.opcodes(vec![0xb8]).rex()); + let is_zero_int = InstructionPredicate::new_is_zero_int(f_unary_imm, "imm"); + e.enc_both_instp( + iconst.bind(I8), + rec_u_id_z.opcodes(vec![0x30]), + is_zero_int.clone(), + ); + // You may expect that i16 encodings would have an 0x66 prefix on the opcode to indicate that + // encodings should be on 16-bit operands (f.ex, "xor %ax, %ax"). Cranelift currently does not + // know that it can drop the 0x66 prefix and clear the upper half of a 32-bit register in these + // scenarios, so we explicitly select a wider but permissible opcode. + // + // This effectively formalizes the i16->i32 widening that Cranelift performs when there isn't + // an appropriate i16 encoding available. + e.enc_both_instp( + iconst.bind(I16), + rec_u_id_z.opcodes(vec![0x31]), + is_zero_int.clone(), + ); + e.enc_both_instp( + iconst.bind(I32), + rec_u_id_z.opcodes(vec![0x31]), + is_zero_int.clone(), + ); + e.enc_x86_64_instp( + iconst.bind(I64), + rec_u_id_z.opcodes(vec![0x31]), + is_zero_int, + ); + // Shifts and rotates. // Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit // and 16-bit shifts would need explicit masking. diff --git a/cranelift/codegen/meta/src/isa/x86/recipes.rs b/cranelift/codegen/meta/src/isa/x86/recipes.rs index 8176effc42..11a9972d98 100644 --- a/cranelift/codegen/meta/src/isa/x86/recipes.rs +++ b/cranelift/codegen/meta/src/isa/x86/recipes.rs @@ -1023,6 +1023,18 @@ pub(crate) fn define<'shared>( ), ); + // XX+rd id unary with zero immediate. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("u_id_z", f_unary_imm, 1) + .operands_out(vec![gpr]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink); + modrm_rr(out_reg0, out_reg0, sink); + "#, + ), + ); + // XX /n Unary with floating point 32-bit immediate equal to zero. { let format = formats.get(f_unary_ieee32); diff --git a/cranelift/codegen/src/predicates.rs b/cranelift/codegen/src/predicates.rs index 978e733c66..f900546111 100644 --- a/cranelift/codegen/src/predicates.rs +++ b/cranelift/codegen/src/predicates.rs @@ -11,6 +11,12 @@ use crate::ir; +/// Check that an integer value is zero. +#[allow(dead_code)] +pub fn is_zero_int>(x: T) -> bool { + x.into() == 0 +} + /// Check that a 64-bit floating point value is zero. #[allow(dead_code)] pub fn is_zero_64_bit_float>(x: T) -> bool { diff --git a/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants-32bit.clif b/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants-32bit.clif new file mode 100644 index 0000000000..8021375558 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants-32bit.clif @@ -0,0 +1,17 @@ +; Check that floating-point and integer constants equal to zero are optimized correctly. +test binemit +target i686 + +function %foo() -> f32 fast { +ebb0: + ; asm: xorps %xmm0, %xmm0 + [-,%xmm0] v0 = f32const 0.0 ; bin: 0f 57 c0 + return v0 +} + +function %bar() -> f64 fast { +ebb0: + ; asm: xorpd %xmm0, %xmm0 + [-,%xmm0] v1 = f64const 0.0 ; bin: 66 0f 57 c0 + return v1 +} diff --git a/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants.clif b/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants.clif new file mode 100644 index 0000000000..049320870e --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants.clif @@ -0,0 +1,31 @@ +; Check that floating-point constants equal to zero are optimized correctly. +test binemit +target x86_64 + +function %zero_const_32bit_no_rex() -> f32 fast { +ebb0: + ; asm: xorps %xmm0, %xmm0 + [-,%xmm0] v0 = f32const 0.0 ; bin: 40 0f 57 c0 + return v0 +} + +function %zero_const_32bit_rex() -> f32 fast { +ebb0: + ; asm: xorps %xmm8, %xmm8 + [-,%xmm8] v1 = f32const 0.0 ; bin: 45 0f 57 c0 + return v1 +} + +function %zero_const_64bit_no_rex() -> f64 fast { +ebb0: + ; asm: xorpd %xmm0, %xmm0 + [-,%xmm0] v0 = f64const 0.0 ; bin: 66 40 0f 57 c0 + return v0 +} + +function %zero_const_64bit_rex() -> f64 fast { +ebb0: + ; asm: xorpd %xmm8, %xmm8 + [-,%xmm8] v1 = f64const 0.0 ; bin: 66 45 0f 57 c0 + return v1 +} diff --git a/cranelift/filetests/filetests/isa/x86/isub_imm-i8.clif b/cranelift/filetests/filetests/isa/x86/isub_imm-i8.clif index 8958b1afa4..1ca70ebbbe 100644 --- a/cranelift/filetests/filetests/isa/x86/isub_imm-i8.clif +++ b/cranelift/filetests/filetests/isa/x86/isub_imm-i8.clif @@ -5,9 +5,9 @@ function u0:0(i8) -> i8 fast { ebb0(v0: i8): v1 = iconst.i8 0 v2 = isub v1, v0 - ; check: v4 = uextend.i32 v0 - ; nextln: v6 = iconst.i32 0 - ; nextln = isub v6, v4 - ; nextln = ireduce.i8 v5 + ; check: v3 = uextend.i32 v0 + ; nextln: v5 = iconst.i32 0 + ; nextln = isub v5, v3 + ; nextln = ireduce.i8 v4 return v2 } diff --git a/cranelift/filetests/filetests/isa/x86/optimized-zero-constants-32bit.clif b/cranelift/filetests/filetests/isa/x86/optimized-zero-constants-32bit.clif index c90e455527..21f936c4b9 100644 --- a/cranelift/filetests/filetests/isa/x86/optimized-zero-constants-32bit.clif +++ b/cranelift/filetests/filetests/isa/x86/optimized-zero-constants-32bit.clif @@ -1,5 +1,6 @@ -; Check that floating-point constants equal to zero are optimized correctly. +; Check that floating-point and integer constants equal to zero are optimized correctly. test binemit +set opt_level=best target i686 function %foo() -> f32 fast { @@ -16,3 +17,36 @@ ebb0: return v1 } +function %zero_dword() -> i32 fast { +ebb0: + ; asm: xor %eax, %eax + [-,%rax] v0 = iconst.i32 0 ; bin: 31 c0 + ; asm: xor %edi, %edi + [-,%rdi] v1 = iconst.i32 0 ; bin: 31 ff + return v0 +} + +function %zero_word() -> i16 fast { +ebb0: + ; while you may expect this to be encoded like 6631c0, aka + ; xor %ax, %ax, the upper 16 bits of the register used for + ; i16 are left undefined, so it's not wrong to clear them. + ; + ; discarding the 66 prefix is shorter, so this test expects + ; that we do so. + ; + ; asm: xor %eax, %eax + [-,%rax] v0 = iconst.i16 0 ; bin: 31 c0 + ; asm: xor %edi, %edi + [-,%rdi] v1 = iconst.i16 0 ; bin: 31 ff + return v0 +} + +function %zero_byte() -> i8 fast { +ebb0: + ; asm: xor %al, %al + [-,%rax] v0 = iconst.i8 0 ; bin: 30 c0 + ; asm: xor %dh, %dh + [-,%rdi] v1 = iconst.i8 0 ; bin: 30 ff + return v0 +} diff --git a/cranelift/filetests/filetests/isa/x86/optimized-zero-constants.clif b/cranelift/filetests/filetests/isa/x86/optimized-zero-constants.clif index 44060e9b97..4a1ad00ff4 100644 --- a/cranelift/filetests/filetests/isa/x86/optimized-zero-constants.clif +++ b/cranelift/filetests/filetests/isa/x86/optimized-zero-constants.clif @@ -1,11 +1,12 @@ ; Check that floating-point constants equal to zero are optimized correctly. test binemit +set opt_level=best target x86_64 function %zero_const_32bit_no_rex() -> f32 fast { ebb0: ; asm: xorps %xmm0, %xmm0 - [-,%xmm0] v0 = f32const 0.0 ; bin: 40 0f 57 c0 + [-,%xmm0] v0 = f32const 0.0 ; bin: 0f 57 c0 return v0 } @@ -19,7 +20,7 @@ ebb0: function %zero_const_64bit_no_rex() -> f64 fast { ebb0: ; asm: xorpd %xmm0, %xmm0 - [-,%xmm0] v0 = f64const 0.0 ; bin: 66 40 0f 57 c0 + [-,%xmm0] v0 = f64const 0.0 ; bin: 66 0f 57 c0 return v0 } @@ -30,3 +31,42 @@ ebb0: return v1 } +function %imm_zero_register() -> i64 fast { +ebb0: + ; asm: xor %eax, %eax + [-,%rax] v0 = iconst.i64 0 ; bin: 31 c0 + ; asm: xor %edi, %edi + [-,%rdi] v1 = iconst.i64 0 ; bin: 31 ff + ; asm: xor %r8, r8 + [-,%r8] v2 = iconst.i64 0 ; bin: 45 31 c0 + ; asm: xor %r15, %r15 + [-,%r15] v4 = iconst.i64 0 ; bin: 45 31 ff + return v0 +} + +function %zero_word() -> i16 fast { +ebb0: + ; while you may expect this to be encoded like 6631c0, aka + ; xor %ax, %ax, the upper 16 bits of the register used for + ; i16 are left undefined, so it's not wrong to clear them. + ; + ; discarding the 66 prefix is shorter, so this test expects + ; that we do so. + ; + ; asm: xor %eax, %eax + [-,%rax] v0 = iconst.i16 0 ; bin: 31 c0 + ; asm: xor %edi, %edi + [-,%rdi] v1 = iconst.i16 0 ; bin: 31 ff + return v0 +} + +function %zero_byte() -> i8 fast { +ebb0: + ; asm: xor %r8b, %r8b + [-,%r15] v0 = iconst.i8 0 ; bin: 45 30 ff + ; asm: xor %al, %al + [-,%rax] v1 = iconst.i8 0 ; bin: 30 c0 + ; asm: xor %dh, %dh + [-,%rdi] v2 = iconst.i8 0 ; bin: 30 ff + return v0 +}