Optimize vconst for x86 when immediate contains all zeroes or ones

Instead of using MOVUPS to expensively load bits from memory, this change uses a predicate to optimize vconst without a memory access:
 - when the 128-bit immediate is all zeroes in all bits, use PXOR to zero out an XMM register
 - when the 128-bit immediate is all ones in all bits, use PCMPEQB to set an XMM register to all ones

This leaves the constant data in the constant pool, which may increase code size (TODO)
This commit is contained in:
Andrew Brown
2019-08-28 15:29:40 -07:00
parent 694de912a5
commit 702155b19b
7 changed files with 154 additions and 3 deletions

View File

@@ -31,6 +31,18 @@ pub fn is_zero_32_bit_float<T: Into<ir::immediates::Ieee32>>(x: T) -> bool {
x32.bits() == 0
}
/// Check that a 128-bit vector contains all zeroes.
#[allow(dead_code)]
pub fn is_all_zeroes_128_bit<'b, T: PartialEq<&'b [u8; 16]>>(x: T) -> bool {
x.eq(&&[0; 16])
}
/// Check that a 128-bit vector contains all ones.
#[allow(dead_code)]
pub fn is_all_ones_128_bit<'b, T: PartialEq<&'b [u8; 16]>>(x: T) -> bool {
x.eq(&&[0xff; 16])
}
/// Check that `x` is the same as `y`.
#[allow(dead_code)]
pub fn is_equal<T: Eq + Copy, O: Into<T> + Copy>(x: T, y: O) -> bool {
@@ -109,4 +121,19 @@ mod tests {
assert!(!is_signed_int(x1, 16, 4));
assert!(!is_signed_int(x2, 16, 4));
}
#[test]
fn is_all_zeroes() {
assert!(is_all_zeroes_128_bit(&[0; 16]));
assert!(is_all_zeroes_128_bit(vec![
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
]));
assert!(!is_all_zeroes_128_bit(&[1; 16]));
}
#[test]
fn is_all_ones() {
assert!(!is_all_ones_128_bit(&[0; 16]));
assert!(is_all_ones_128_bit(&[0xff; 16]));
}
}