Use 'xor r, r' to set registers to 0 instead of mov (#766)

This commit is contained in:
Andy Wortman
2019-09-16 14:35:55 +00:00
committed by Benjamin Bouvier
parent b95508c51a
commit 99380fad1a
9 changed files with 193 additions and 7 deletions

View File

@@ -606,6 +606,8 @@ pub enum FormatPredicateKind {
/// `2^scale`. /// `2^scale`.
IsUnsignedInt(usize, usize), IsUnsignedInt(usize, usize),
/// Is the immediate format field member an integer equal to zero?
IsZeroInt,
/// Is the immediate format field member equal to zero? (float32 version) /// Is the immediate format field member equal to zero? (float32 version)
IsZero32BitFloat, IsZero32BitFloat,
@@ -679,6 +681,9 @@ impl FormatPredicateNode {
"predicates::is_unsigned_int({}, {}, {})", "predicates::is_unsigned_int({}, {}, {})",
self.member_name, width, scale self.member_name, width, scale
), ),
FormatPredicateKind::IsZeroInt => {
format!("predicates::is_zero_int({})", self.member_name)
}
FormatPredicateKind::IsZero32BitFloat => { FormatPredicateKind::IsZero32BitFloat => {
format!("predicates::is_zero_32_bit_float({})", self.member_name) format!("predicates::is_zero_32_bit_float({})", self.member_name)
} }
@@ -891,6 +896,17 @@ impl InstructionPredicate {
)) ))
} }
pub fn new_is_zero_int(
format: &InstructionFormat,
field_name: &'static str,
) -> InstructionPredicateNode {
InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new(
format,
field_name,
FormatPredicateKind::IsZeroInt,
))
}
pub fn new_is_zero_32bit_float( pub fn new_is_zero_32bit_float(
format: &InstructionFormat, format: &InstructionFormat,
field_name: &'static str, field_name: &'static str,

View File

@@ -614,6 +614,7 @@ pub(crate) fn define(
let rec_trapif = r.recipe("trapif"); let rec_trapif = r.recipe("trapif");
let rec_trapff = r.recipe("trapff"); let rec_trapff = r.recipe("trapff");
let rec_u_id = r.template("u_id"); let rec_u_id = r.template("u_id");
let rec_u_id_z = r.template("u_id_z");
let rec_umr = r.template("umr"); let rec_umr = r.template("umr");
let rec_umr_reg_to_ssa = r.template("umr_reg_to_ssa"); let rec_umr_reg_to_ssa = r.template("umr_reg_to_ssa");
let rec_ur = r.template("ur"); let rec_ur = r.template("ur");
@@ -750,6 +751,35 @@ pub(crate) fn define(
} }
e.enc64(bconst.bind(B64), rec_pu_id_bool.opcodes(vec![0xb8]).rex()); e.enc64(bconst.bind(B64), rec_pu_id_bool.opcodes(vec![0xb8]).rex());
let is_zero_int = InstructionPredicate::new_is_zero_int(f_unary_imm, "imm");
e.enc_both_instp(
iconst.bind(I8),
rec_u_id_z.opcodes(vec![0x30]),
is_zero_int.clone(),
);
// You may expect that i16 encodings would have an 0x66 prefix on the opcode to indicate that
// encodings should be on 16-bit operands (f.ex, "xor %ax, %ax"). Cranelift currently does not
// know that it can drop the 0x66 prefix and clear the upper half of a 32-bit register in these
// scenarios, so we explicitly select a wider but permissible opcode.
//
// This effectively formalizes the i16->i32 widening that Cranelift performs when there isn't
// an appropriate i16 encoding available.
e.enc_both_instp(
iconst.bind(I16),
rec_u_id_z.opcodes(vec![0x31]),
is_zero_int.clone(),
);
e.enc_both_instp(
iconst.bind(I32),
rec_u_id_z.opcodes(vec![0x31]),
is_zero_int.clone(),
);
e.enc_x86_64_instp(
iconst.bind(I64),
rec_u_id_z.opcodes(vec![0x31]),
is_zero_int,
);
// Shifts and rotates. // Shifts and rotates.
// Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit // Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit
// and 16-bit shifts would need explicit masking. // and 16-bit shifts would need explicit masking.

View File

@@ -1023,6 +1023,18 @@ pub(crate) fn define<'shared>(
), ),
); );
// XX+rd id unary with zero immediate.
recipes.add_template_recipe(
EncodingRecipeBuilder::new("u_id_z", f_unary_imm, 1)
.operands_out(vec![gpr])
.emit(
r#"
{{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink);
modrm_rr(out_reg0, out_reg0, sink);
"#,
),
);
// XX /n Unary with floating point 32-bit immediate equal to zero. // XX /n Unary with floating point 32-bit immediate equal to zero.
{ {
let format = formats.get(f_unary_ieee32); let format = formats.get(f_unary_ieee32);

View File

@@ -11,6 +11,12 @@
use crate::ir; use crate::ir;
/// Check that an integer value is zero.
#[allow(dead_code)]
pub fn is_zero_int<T: Into<i64>>(x: T) -> bool {
x.into() == 0
}
/// Check that a 64-bit floating point value is zero. /// Check that a 64-bit floating point value is zero.
#[allow(dead_code)] #[allow(dead_code)]
pub fn is_zero_64_bit_float<T: Into<ir::immediates::Ieee64>>(x: T) -> bool { pub fn is_zero_64_bit_float<T: Into<ir::immediates::Ieee64>>(x: T) -> bool {

View File

@@ -0,0 +1,17 @@
; Check that floating-point and integer constants equal to zero are optimized correctly.
test binemit
target i686
function %foo() -> f32 fast {
ebb0:
; asm: xorps %xmm0, %xmm0
[-,%xmm0] v0 = f32const 0.0 ; bin: 0f 57 c0
return v0
}
function %bar() -> f64 fast {
ebb0:
; asm: xorpd %xmm0, %xmm0
[-,%xmm0] v1 = f64const 0.0 ; bin: 66 0f 57 c0
return v1
}

View File

@@ -0,0 +1,31 @@
; Check that floating-point constants equal to zero are optimized correctly.
test binemit
target x86_64
function %zero_const_32bit_no_rex() -> f32 fast {
ebb0:
; asm: xorps %xmm0, %xmm0
[-,%xmm0] v0 = f32const 0.0 ; bin: 40 0f 57 c0
return v0
}
function %zero_const_32bit_rex() -> f32 fast {
ebb0:
; asm: xorps %xmm8, %xmm8
[-,%xmm8] v1 = f32const 0.0 ; bin: 45 0f 57 c0
return v1
}
function %zero_const_64bit_no_rex() -> f64 fast {
ebb0:
; asm: xorpd %xmm0, %xmm0
[-,%xmm0] v0 = f64const 0.0 ; bin: 66 40 0f 57 c0
return v0
}
function %zero_const_64bit_rex() -> f64 fast {
ebb0:
; asm: xorpd %xmm8, %xmm8
[-,%xmm8] v1 = f64const 0.0 ; bin: 66 45 0f 57 c0
return v1
}

View File

@@ -5,9 +5,9 @@ function u0:0(i8) -> i8 fast {
ebb0(v0: i8): ebb0(v0: i8):
v1 = iconst.i8 0 v1 = iconst.i8 0
v2 = isub v1, v0 v2 = isub v1, v0
; check: v4 = uextend.i32 v0 ; check: v3 = uextend.i32 v0
; nextln: v6 = iconst.i32 0 ; nextln: v5 = iconst.i32 0
; nextln = isub v6, v4 ; nextln = isub v5, v3
; nextln = ireduce.i8 v5 ; nextln = ireduce.i8 v4
return v2 return v2
} }

View File

@@ -1,5 +1,6 @@
; Check that floating-point constants equal to zero are optimized correctly. ; Check that floating-point and integer constants equal to zero are optimized correctly.
test binemit test binemit
set opt_level=best
target i686 target i686
function %foo() -> f32 fast { function %foo() -> f32 fast {
@@ -16,3 +17,36 @@ ebb0:
return v1 return v1
} }
function %zero_dword() -> i32 fast {
ebb0:
; asm: xor %eax, %eax
[-,%rax] v0 = iconst.i32 0 ; bin: 31 c0
; asm: xor %edi, %edi
[-,%rdi] v1 = iconst.i32 0 ; bin: 31 ff
return v0
}
function %zero_word() -> i16 fast {
ebb0:
; while you may expect this to be encoded like 6631c0, aka
; xor %ax, %ax, the upper 16 bits of the register used for
; i16 are left undefined, so it's not wrong to clear them.
;
; discarding the 66 prefix is shorter, so this test expects
; that we do so.
;
; asm: xor %eax, %eax
[-,%rax] v0 = iconst.i16 0 ; bin: 31 c0
; asm: xor %edi, %edi
[-,%rdi] v1 = iconst.i16 0 ; bin: 31 ff
return v0
}
function %zero_byte() -> i8 fast {
ebb0:
; asm: xor %al, %al
[-,%rax] v0 = iconst.i8 0 ; bin: 30 c0
; asm: xor %dh, %dh
[-,%rdi] v1 = iconst.i8 0 ; bin: 30 ff
return v0
}

View File

@@ -1,11 +1,12 @@
; Check that floating-point constants equal to zero are optimized correctly. ; Check that floating-point constants equal to zero are optimized correctly.
test binemit test binemit
set opt_level=best
target x86_64 target x86_64
function %zero_const_32bit_no_rex() -> f32 fast { function %zero_const_32bit_no_rex() -> f32 fast {
ebb0: ebb0:
; asm: xorps %xmm0, %xmm0 ; asm: xorps %xmm0, %xmm0
[-,%xmm0] v0 = f32const 0.0 ; bin: 40 0f 57 c0 [-,%xmm0] v0 = f32const 0.0 ; bin: 0f 57 c0
return v0 return v0
} }
@@ -19,7 +20,7 @@ ebb0:
function %zero_const_64bit_no_rex() -> f64 fast { function %zero_const_64bit_no_rex() -> f64 fast {
ebb0: ebb0:
; asm: xorpd %xmm0, %xmm0 ; asm: xorpd %xmm0, %xmm0
[-,%xmm0] v0 = f64const 0.0 ; bin: 66 40 0f 57 c0 [-,%xmm0] v0 = f64const 0.0 ; bin: 66 0f 57 c0
return v0 return v0
} }
@@ -30,3 +31,42 @@ ebb0:
return v1 return v1
} }
function %imm_zero_register() -> i64 fast {
ebb0:
; asm: xor %eax, %eax
[-,%rax] v0 = iconst.i64 0 ; bin: 31 c0
; asm: xor %edi, %edi
[-,%rdi] v1 = iconst.i64 0 ; bin: 31 ff
; asm: xor %r8, r8
[-,%r8] v2 = iconst.i64 0 ; bin: 45 31 c0
; asm: xor %r15, %r15
[-,%r15] v4 = iconst.i64 0 ; bin: 45 31 ff
return v0
}
function %zero_word() -> i16 fast {
ebb0:
; while you may expect this to be encoded like 6631c0, aka
; xor %ax, %ax, the upper 16 bits of the register used for
; i16 are left undefined, so it's not wrong to clear them.
;
; discarding the 66 prefix is shorter, so this test expects
; that we do so.
;
; asm: xor %eax, %eax
[-,%rax] v0 = iconst.i16 0 ; bin: 31 c0
; asm: xor %edi, %edi
[-,%rdi] v1 = iconst.i16 0 ; bin: 31 ff
return v0
}
function %zero_byte() -> i8 fast {
ebb0:
; asm: xor %r8b, %r8b
[-,%r15] v0 = iconst.i8 0 ; bin: 45 30 ff
; asm: xor %al, %al
[-,%rax] v1 = iconst.i8 0 ; bin: 30 c0
; asm: xor %dh, %dh
[-,%rdi] v2 = iconst.i8 0 ; bin: 30 ff
return v0
}