Optimize vconst for x86 when immediate contains all zeroes or ones
Instead of using MOVUPS to expensively load bits from memory, this change uses a predicate to optimize vconst without a memory access: - when the 128-bit immediate is all zeroes in all bits, use PXOR to zero out an XMM register - when the 128-bit immediate is all ones in all bits, use PCMPEQB to set an XMM register to all ones This leaves the constant data in the constant pool, which may increase code size (TODO)
This commit is contained in:
@@ -31,6 +31,18 @@ pub fn is_zero_32_bit_float<T: Into<ir::immediates::Ieee32>>(x: T) -> bool {
|
||||
x32.bits() == 0
|
||||
}
|
||||
|
||||
/// Check that a 128-bit vector contains all zeroes.
|
||||
#[allow(dead_code)]
|
||||
pub fn is_all_zeroes_128_bit<'b, T: PartialEq<&'b [u8; 16]>>(x: T) -> bool {
|
||||
x.eq(&&[0; 16])
|
||||
}
|
||||
|
||||
/// Check that a 128-bit vector contains all ones.
|
||||
#[allow(dead_code)]
|
||||
pub fn is_all_ones_128_bit<'b, T: PartialEq<&'b [u8; 16]>>(x: T) -> bool {
|
||||
x.eq(&&[0xff; 16])
|
||||
}
|
||||
|
||||
/// Check that `x` is the same as `y`.
|
||||
#[allow(dead_code)]
|
||||
pub fn is_equal<T: Eq + Copy, O: Into<T> + Copy>(x: T, y: O) -> bool {
|
||||
@@ -109,4 +121,19 @@ mod tests {
|
||||
assert!(!is_signed_int(x1, 16, 4));
|
||||
assert!(!is_signed_int(x2, 16, 4));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn is_all_zeroes() {
|
||||
assert!(is_all_zeroes_128_bit(&[0; 16]));
|
||||
assert!(is_all_zeroes_128_bit(vec![
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
]));
|
||||
assert!(!is_all_zeroes_128_bit(&[1; 16]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn is_all_ones() {
|
||||
assert!(!is_all_ones_128_bit(&[0; 16]));
|
||||
assert!(is_all_ones_128_bit(&[0xff; 16]));
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user