Use u32 in Type API (#4280)

Move from passing and returning u8 and u16 values to u32 in many of
the functions. This removes a number of type conversions and gives
a small compilation time speedup, around ~0.7% on my aarch64 machine.

Copyright (c) 2022, Arm Limited.
This commit is contained in:
Sam Parker
2022-06-30 20:43:36 +01:00
committed by GitHub
parent 95836ba114
commit a2d49ebf27
13 changed files with 27 additions and 28 deletions

View File

@@ -99,8 +99,8 @@ impl StackMap {
pub fn get_bit(&self, bit_index: usize) -> bool {
assert!(bit_index < NUM_BITS * self.bitmap.len());
let word_index = bit_index / NUM_BITS;
let word_offset = (bit_index % NUM_BITS) as u8;
self.bitmap[word_index].contains(word_offset)
let word_offset = bit_index % NUM_BITS;
self.bitmap[word_index].contains(word_offset as u32)
}
/// Returns the raw bitmap that represents this stack map.

View File

@@ -37,7 +37,7 @@ where
}
/// Check if this BitSet contains the number num
pub fn contains(&self, num: u8) -> bool {
pub fn contains(&self, num: u32) -> bool {
debug_assert!((num as usize) < Self::bits());
debug_assert!((num as usize) < Self::max_bits());
self.0.into() & (1 << num) != 0

View File

@@ -76,7 +76,7 @@ impl Imm64 {
/// Sign extend this immediate as if it were a signed integer of the given
/// power-of-two width.
pub fn sign_extend_from_width(&mut self, bit_width: u16) {
pub fn sign_extend_from_width(&mut self, bit_width: u32) {
debug_assert!(bit_width.is_power_of_two());
if bit_width >= 64 {

View File

@@ -54,7 +54,7 @@ impl Type {
}
/// Get log_2 of the number of bits in a lane.
pub fn log2_lane_bits(self) -> u8 {
pub fn log2_lane_bits(self) -> u32 {
match self.lane_type() {
B1 => 0,
B8 | I8 => 3,
@@ -67,7 +67,7 @@ impl Type {
}
/// Get the number of bits in a lane.
pub fn lane_bits(self) -> u8 {
pub fn lane_bits(self) -> u32 {
match self.lane_type() {
B1 => 1,
B8 | I8 => 8,
@@ -284,25 +284,25 @@ impl Type {
/// will be a number in the range 0-8.
///
/// A scalar type is the same as a SIMD vector type with one lane, so it returns 0.
pub fn log2_lane_count(self) -> u8 {
self.0.saturating_sub(constants::LANE_BASE) >> 4
pub fn log2_lane_count(self) -> u32 {
(self.0.saturating_sub(constants::LANE_BASE) >> 4) as u32
}
/// Get the number of lanes in this SIMD vector type.
///
/// A scalar type is the same as a SIMD vector type with one lane, so it returns 1.
pub fn lane_count(self) -> u16 {
pub fn lane_count(self) -> u32 {
1 << self.log2_lane_count()
}
/// Get the total number of bits used to represent this type.
pub fn bits(self) -> u16 {
u16::from(self.lane_bits()) * self.lane_count()
pub fn bits(self) -> u32 {
self.lane_bits() * self.lane_count()
}
/// Get the number of bytes used to store this type in memory.
pub fn bytes(self) -> u32 {
(u32::from(self.bits()) + 7) / 8
(self.bits() + 7) / 8
}
/// Get a SIMD vector type with `n` times more lanes than this one.
@@ -311,7 +311,7 @@ impl Type {
///
/// If this is already a SIMD vector type, this produces a SIMD vector type with `n *
/// self.lane_count()` lanes.
pub fn by(self, n: u16) -> Option<Self> {
pub fn by(self, n: u32) -> Option<Self> {
if self.lane_bits() == 0 || !n.is_power_of_two() {
return None;
}

View File

@@ -219,7 +219,8 @@ fn extend_input_to_reg<C: LowerCtx<I = Inst>>(
let ext_mode = match (input_size, requested_size) {
(a, b) if a == b => return put_input_in_reg(ctx, spec),
(1, 8) => return put_input_in_reg(ctx, spec),
(a, b) => ExtMode::new(a, b).unwrap_or_else(|| panic!("invalid extension: {} -> {}", a, b)),
(a, b) => ExtMode::new(a.try_into().unwrap(), b.try_into().unwrap())
.unwrap_or_else(|| panic!("invalid extension: {} -> {}", a, b)),
};
let src = input_to_reg_mem(ctx, spec);

View File

@@ -125,9 +125,7 @@ where
let inputs = self.lower_ctx.get_value_as_source_or_const(val);
if let Some(c) = inputs.constant {
let mask = 1_u64
.checked_shl(ty.bits() as u32)
.map_or(u64::MAX, |x| x - 1);
let mask = 1_u64.checked_shl(ty.bits()).map_or(u64::MAX, |x| x - 1);
return Imm8Gpr::new(Imm8Reg::Imm8 {
imm: (c & mask) as u8,
})
@@ -218,9 +216,7 @@ where
#[inline]
fn const_to_type_masked_imm8(&mut self, c: u64, ty: Type) -> Imm8Gpr {
let mask = 1_u64
.checked_shl(ty.bits() as u32)
.map_or(u64::MAX, |x| x - 1);
let mask = 1_u64.checked_shl(ty.bits()).map_or(u64::MAX, |x| x - 1);
Imm8Gpr::new(Imm8Reg::Imm8 {
imm: (c & mask) as u8,
})

View File

@@ -7,7 +7,7 @@ use std::ops::{Add, BitAnd, Not, Sub};
/// Returns the size (in bits) of a given type.
pub fn ty_bits(ty: Type) -> usize {
usize::from(ty.bits())
ty.bits() as usize
}
/// Is the type represented by an integer (not float) at the machine level?

View File

@@ -208,7 +208,7 @@ macro_rules! isle_prelude_methods {
#[inline]
fn ty_bits_u16(&mut self, ty: Type) -> u16 {
ty.bits()
ty.bits().try_into().unwrap()
}
#[inline]
@@ -410,7 +410,7 @@ macro_rules! isle_prelude_methods {
}
#[inline]
fn multi_lane(&mut self, ty: Type) -> Option<(u8, u16)> {
fn multi_lane(&mut self, ty: Type) -> Option<(u32, u32)> {
if ty.lane_count() > 1 {
Some((ty.lane_bits(), ty.lane_count()))
} else {

View File

@@ -430,7 +430,7 @@
;; Match a multi-lane type, extracting (# bits per lane, # lanes) from the given
;; type. Will only match when there is more than one lane.
(decl multi_lane (u8 u16) Type)
(decl multi_lane (u32 u32) Type)
(extern extractor multi_lane multi_lane)
;; Match the instruction that defines the given value, if any.

View File

@@ -922,7 +922,7 @@ mod simplify {
if !const_data.iter().all(|&b| b == 0 || b == 0xFF) {
return;
}
let new_type = B8.by(old_cond_type.bytes() as u16).unwrap();
let new_type = B8.by(old_cond_type.bytes()).unwrap();
(pos.ins().raw_bitcast(new_type, args[0]), new_type)
}
_ => return,

View File

@@ -535,7 +535,9 @@ fn souper_type_of(dfg: &ir::DataFlowGraph, val: ir::Value) -> Option<ast::Type>
let ty = dfg.value_type(val);
assert!(ty.is_int() || ty.is_bool());
assert_eq!(ty.lane_count(), 1);
Some(ast::Type { width: ty.bits() })
Some(ast::Type {
width: ty.bits().try_into().unwrap(),
})
}
#[derive(Debug)]

View File

@@ -1646,7 +1646,7 @@ impl<'a> Verifier<'a> {
// We must be specific about the opcodes above because other instructions are using
// the same formats.
let ty = self.func.dfg.value_type(arg);
if u16::from(lane) >= ty.lane_count() {
if lane as u32 >= ty.lane_count() {
errors.fatal((
inst,
self.context(inst),

View File

@@ -381,7 +381,7 @@ impl<'a> Lexer<'a> {
};
if is_vector {
if number <= u32::from(u16::MAX) {
base_type.by(number as u16).map(Token::Type)
base_type.by(number).map(Token::Type)
} else {
None
}