Implement bit operations for Cranelift interpreter
Implemented for the Cranelift interpreter: - `Bitrev` to reverse the order of the bits in an integer. - `Cls` to count the leading bits which are the same as the sign bit in an integer, yielding one less than the size of the integer for 0 and -1. - `Clz` to count the number of leading zeros in the bitwise representation of the integer. - `Ctz` to count the number of trailing zeros in the bitwise representation of the integer. - `Popcnt` to count the number of ones in the bitwise representation of the integer. Copyright (c) 2021, Arm Limited
This commit is contained in:
40
cranelift/filetests/filetests/runtests/bitrev.clif
Normal file
40
cranelift/filetests/filetests/runtests/bitrev.clif
Normal file
@@ -0,0 +1,40 @@
|
||||
test interpret
|
||||
test run
|
||||
target aarch64
|
||||
target x86_64
|
||||
|
||||
function %bitrev_i8(i8) -> i8 {
|
||||
block0(v0: i8):
|
||||
v1 = bitrev v0
|
||||
return v1
|
||||
}
|
||||
; run: %bitrev_i8(1) == -128
|
||||
; run: %bitrev_i8(64) == 2
|
||||
; run: %bitrev_i8(-1) == -1
|
||||
|
||||
function %bitrev_i16(i16) -> i16 {
|
||||
block0(v0: i16):
|
||||
v1 = bitrev v0
|
||||
return v1
|
||||
}
|
||||
; run: %bitrev_i16(1) == -32768
|
||||
; run: %bitrev_i16(16384) == 2
|
||||
; run: %bitrev_i16(-1) == -1
|
||||
|
||||
function %bitrev_i32(i32) -> i32 {
|
||||
block0(v0: i32):
|
||||
v1 = bitrev v0
|
||||
return v1
|
||||
}
|
||||
; run: %bitrev_i32(1) == -2147483648
|
||||
; run: %bitrev_i32(1073741824) == 2
|
||||
; run: %bitrev_i32(-1) == -1
|
||||
|
||||
function %bitrev_i64(i64) -> i64 {
|
||||
block0(v0: i64):
|
||||
v1 = bitrev v0
|
||||
return v1
|
||||
}
|
||||
; run: %bitrev_i64(1) == -9223372036854775808
|
||||
; run: %bitrev_i64(4611686018427387904) == 2
|
||||
; run: %bitrev_i64(-1) == -1
|
||||
24
cranelift/filetests/filetests/runtests/cls-aarch64.clif
Normal file
24
cranelift/filetests/filetests/runtests/cls-aarch64.clif
Normal file
@@ -0,0 +1,24 @@
|
||||
test interpret
|
||||
test run
|
||||
target aarch64
|
||||
; not implemented on `x86_64`
|
||||
|
||||
function %cls_i32(i32) -> i32 {
|
||||
block0(v0: i32):
|
||||
v1 = cls v0
|
||||
return v1
|
||||
}
|
||||
; run: %cls_i32(1) == 30
|
||||
; run: %cls_i32(0x40000000) == 0
|
||||
; run: %cls_i32(-1) == 31
|
||||
; run: %cls_i32(0) == 31
|
||||
|
||||
function %cls_i64(i64) -> i64 {
|
||||
block0(v0: i64):
|
||||
v1 = cls v0
|
||||
return v1
|
||||
}
|
||||
; run: %cls_i64(1) == 62
|
||||
; run: %cls_i64(0x4000000000000000) == 0
|
||||
; run: %cls_i64(-1) == 63
|
||||
; run: %cls_i64(0) == 63
|
||||
23
cranelift/filetests/filetests/runtests/cls-interpret.clif
Normal file
23
cranelift/filetests/filetests/runtests/cls-interpret.clif
Normal file
@@ -0,0 +1,23 @@
|
||||
test interpret
|
||||
; aarch64 yields cls_i8(1) == 30, which is incorrect
|
||||
|
||||
function %cls_i8(i8) -> i8 {
|
||||
block0(v0: i8):
|
||||
v1 = cls v0
|
||||
return v1
|
||||
}
|
||||
; run: %cls_i8(1) == 6
|
||||
; run: %cls_i8(0x40) == 0
|
||||
; run: %cls_i8(-1) == 7
|
||||
; run: %cls_i8(0) == 7
|
||||
|
||||
function %cls_i16(i16) -> i16 {
|
||||
block0(v0: i16):
|
||||
v1 = cls v0
|
||||
return v1
|
||||
}
|
||||
; run: %cls_i16(1) == 14
|
||||
; run: %cls_i16(0x4000) == 0
|
||||
; run: %cls_i16(-1) == 15
|
||||
; run: %cls_i16(0) == 15
|
||||
|
||||
19
cranelift/filetests/filetests/runtests/clz-interpret.clif
Normal file
19
cranelift/filetests/filetests/runtests/clz-interpret.clif
Normal file
@@ -0,0 +1,19 @@
|
||||
test interpret
|
||||
|
||||
function %clz_i8(i8) -> i8 {
|
||||
block0(v0: i8):
|
||||
v1 = clz v0
|
||||
return v1
|
||||
}
|
||||
; run: %clz_i8(1) == 7
|
||||
; run: %clz_i8(0x40) == 1
|
||||
; run: %clz_i8(-1) == 0
|
||||
|
||||
function %clz_i16(i16) -> i16 {
|
||||
block0(v0: i16):
|
||||
v1 = clz v0
|
||||
return v1
|
||||
}
|
||||
; run: %clz_i16(1) == 15
|
||||
; run: %clz_i16(0x4000) == 1
|
||||
; run: %clz_i16(-1) == 0
|
||||
22
cranelift/filetests/filetests/runtests/clz.clif
Normal file
22
cranelift/filetests/filetests/runtests/clz.clif
Normal file
@@ -0,0 +1,22 @@
|
||||
test interpret
|
||||
test run
|
||||
target aarch64
|
||||
target x86_64
|
||||
|
||||
function %clz_i32(i32) -> i32 {
|
||||
block0(v0: i32):
|
||||
v1 = clz v0
|
||||
return v1
|
||||
}
|
||||
; run: %clz_i32(1) == 31
|
||||
; run: %clz_i32(0x40000000) == 1
|
||||
; run: %clz_i32(-1) == 0
|
||||
|
||||
function %clz_i64(i64) -> i64 {
|
||||
block0(v0: i64):
|
||||
v1 = clz v0
|
||||
return v1
|
||||
}
|
||||
; run: %clz_i64(1) == 63
|
||||
; run: %clz_i64(0x4000000000000000) == 1
|
||||
; run: %clz_i64(-1) == 0
|
||||
19
cranelift/filetests/filetests/runtests/ctz-interpret.clif
Normal file
19
cranelift/filetests/filetests/runtests/ctz-interpret.clif
Normal file
@@ -0,0 +1,19 @@
|
||||
test interpret
|
||||
|
||||
function %ctz_i8(i8) -> i8 {
|
||||
block0(v0: i8):
|
||||
v1 = ctz v0
|
||||
return v1
|
||||
}
|
||||
; run: %ctz_i8(1) == 0
|
||||
; run: %ctz_i8(0x40) == 6
|
||||
; run: %ctz_i8(-1) == 0
|
||||
|
||||
function %ctz_i16(i16) -> i16 {
|
||||
block0(v0: i16):
|
||||
v1 = ctz v0
|
||||
return v1
|
||||
}
|
||||
; run: %ctz_i16(1) == 0
|
||||
; run: %ctz_i16(0x4000) == 14
|
||||
; run: %ctz_i16(-1) == 0
|
||||
22
cranelift/filetests/filetests/runtests/ctz.clif
Normal file
22
cranelift/filetests/filetests/runtests/ctz.clif
Normal file
@@ -0,0 +1,22 @@
|
||||
test interpret
|
||||
test run
|
||||
target aarch64
|
||||
target x86_64
|
||||
|
||||
function %ctz_i32(i32) -> i32 {
|
||||
block0(v0: i32):
|
||||
v1 = ctz v0
|
||||
return v1
|
||||
}
|
||||
; run: %ctz_i32(1) == 0
|
||||
; run: %ctz_i32(0x40000000) == 30
|
||||
; run: %ctz_i32(-1) == 0
|
||||
|
||||
function %ctz_i64(i64) -> i64 {
|
||||
block0(v0: i64):
|
||||
v1 = ctz v0
|
||||
return v1
|
||||
}
|
||||
; run: %ctz_i64(1) == 0
|
||||
; run: %ctz_i64(0x4000000000000000) == 62
|
||||
; run: %ctz_i64(-1) == 0
|
||||
50
cranelift/filetests/filetests/runtests/popcnt-aarch64.clif
Normal file
50
cranelift/filetests/filetests/runtests/popcnt-aarch64.clif
Normal file
@@ -0,0 +1,50 @@
|
||||
test interpret
|
||||
test run
|
||||
target aarch64
|
||||
|
||||
function %popcnt_i8(i8) -> i8 {
|
||||
block0(v0: i8):
|
||||
v1 = popcnt v0
|
||||
return v1
|
||||
}
|
||||
; run: %popcnt_i8(1) == 1
|
||||
; run: %popcnt_i8(0x40) == 1
|
||||
; run: %popcnt_i8(-1) == 8
|
||||
; run: %popcnt_i8(0) == 0
|
||||
|
||||
function %popcnt_i16(i16) -> i16 {
|
||||
block0(v0: i16):
|
||||
v1 = popcnt v0
|
||||
return v1
|
||||
}
|
||||
; run: %popcnt_i16(1) == 1
|
||||
; run: %popcnt_i16(0x4000) == 1
|
||||
; run: %popcnt_i16(-1) == 16
|
||||
; run: %popcnt_i16(0) == 0
|
||||
|
||||
function %popcnt_i32(i32) -> i32 {
|
||||
block0(v0: i32):
|
||||
v1 = popcnt v0
|
||||
return v1
|
||||
}
|
||||
; run: %popcnt_i32(1) == 1
|
||||
; run: %popcnt_i32(0x40000000) == 1
|
||||
; run: %popcnt_i32(-1) == 32
|
||||
; run: %popcnt_i32(0) == 0
|
||||
|
||||
function %popcnt_i64(i64) -> i64 {
|
||||
block0(v0: i64):
|
||||
v1 = popcnt v0
|
||||
return v1
|
||||
}
|
||||
; run: %popcnt_i64(1) == 1
|
||||
; run: %popcnt_i64(0x4000000000000000) == 1
|
||||
; run: %popcnt_i64(-1) == 64
|
||||
; run: %popcnt_i64(0) == 0
|
||||
|
||||
function %popcnt_i8x16(i8x16) -> i8x16 {
|
||||
block0(v0: i8x16):
|
||||
v1 = popcnt v0
|
||||
return v1
|
||||
}
|
||||
; run: %popcnt_i8x16([1 1 1 1 0x40 0x40 0x40 0x40 0xff 0xff 0xff 0xff 0 0 0 0]) == [1 1 1 1 1 1 1 1 8 8 8 8 0 0 0 0]
|
||||
24
cranelift/filetests/filetests/runtests/popcnt-interpret.clif
Normal file
24
cranelift/filetests/filetests/runtests/popcnt-interpret.clif
Normal file
@@ -0,0 +1,24 @@
|
||||
test interpret
|
||||
; i16x8 vectors aren't currently supported by the `AArch64` backend.
|
||||
|
||||
function %popcnt_i16x8(i16x8) -> i16x8 {
|
||||
block0(v0: i16x8):
|
||||
v1 = popcnt v0
|
||||
return v1
|
||||
}
|
||||
; run: %popcnt_i16x8([1 1 0x4000 0x4000 0xffff 0xffff 0 0]) == [1 1 1 1 16 16 0 0]
|
||||
|
||||
function %popcnt_i32x4(i32x4) -> i32x4 {
|
||||
block0(v0: i32x4):
|
||||
v1 = popcnt v0
|
||||
return v1
|
||||
}
|
||||
; run: %popcnt_i32x4([1 0x40000000 0xFFFFFFFF 0]) == [1 1 32 0]
|
||||
|
||||
function %popcnt_i64x2(i64x2) -> i64x2 {
|
||||
block0(v0: i64x2):
|
||||
v1 = popcnt v0
|
||||
return v1
|
||||
}
|
||||
; run: %popcnt_i64x2([1 0x4000000000000000]) == [1 1]
|
||||
; run: %popcnt_i64x2([0xffffffffffffffff 0]) == [64 0]
|
||||
@@ -668,11 +668,40 @@ where
|
||||
Opcode::IshlImm => binary(Value::shl, arg(0)?, imm_as_ctrl_ty()?)?,
|
||||
Opcode::UshrImm => binary(Value::ushr, arg(0)?, imm_as_ctrl_ty()?)?,
|
||||
Opcode::SshrImm => binary(Value::ishr, arg(0)?, imm_as_ctrl_ty()?)?,
|
||||
Opcode::Bitrev => unimplemented!("Bitrev"),
|
||||
Opcode::Clz => unimplemented!("Clz"),
|
||||
Opcode::Cls => unimplemented!("Cls"),
|
||||
Opcode::Ctz => unimplemented!("Ctz"),
|
||||
Opcode::Popcnt => unimplemented!("Popcnt"),
|
||||
Opcode::Bitrev => assign(Value::reverse_bits(arg(0)?)?),
|
||||
// For `Clz`, `Cls`, `Ctz`, and `Popcnt`, the underlying Rust function
|
||||
// always returns `u32` (and therefore a `Value` of type `U32`), so this
|
||||
// is switched back to the correct type by recreating the `Value`.
|
||||
Opcode::Clz => assign(Value::int(
|
||||
Value::leading_zeros(arg(0)?)?.into_int()?,
|
||||
ctrl_ty,
|
||||
)?),
|
||||
Opcode::Cls => {
|
||||
let count = if Value::lt(&arg(0)?, &Value::int(0, ctrl_ty)?)? {
|
||||
Value::int(Value::leading_ones(arg(0)?)?.into_int()?, ctrl_ty)?
|
||||
} else {
|
||||
Value::int(Value::leading_zeros(arg(0)?)?.into_int()?, ctrl_ty)?
|
||||
};
|
||||
assign(Value::sub(count, Value::int(1, ctrl_ty)?)?)
|
||||
}
|
||||
Opcode::Ctz => assign(Value::int(
|
||||
Value::trailing_zeros(arg(0)?)?.into_int()?,
|
||||
ctrl_ty,
|
||||
)?),
|
||||
Opcode::Popcnt => {
|
||||
let count = if arg(0)?.ty().is_int() {
|
||||
Value::int(Value::count_ones(arg(0)?)?.into_int()?, ctrl_ty)?
|
||||
} else {
|
||||
let lanes = extractlanes(&arg(0)?, ctrl_ty.lane_type())?;
|
||||
let mut new_vec = SimdVec::new();
|
||||
for i in lanes {
|
||||
let c: V = Value::count_ones(i)?;
|
||||
new_vec.push(c);
|
||||
}
|
||||
vectorizelanes(&new_vec, ctrl_ty)?
|
||||
};
|
||||
assign(count)
|
||||
}
|
||||
Opcode::Fcmp => assign(Value::bool(
|
||||
fcmp(inst.fp_cond_code().unwrap(), &arg(0)?, &arg(1)?)?,
|
||||
ctrl_ty.as_bool(),
|
||||
|
||||
@@ -62,6 +62,13 @@ pub trait Value: Clone + From<DataValue> {
|
||||
fn or(self, other: Self) -> ValueResult<Self>;
|
||||
fn xor(self, other: Self) -> ValueResult<Self>;
|
||||
fn not(self) -> ValueResult<Self>;
|
||||
|
||||
// Bit counting.
|
||||
fn count_ones(self) -> ValueResult<Self>;
|
||||
fn leading_ones(self) -> ValueResult<Self>;
|
||||
fn leading_zeros(self) -> ValueResult<Self>;
|
||||
fn trailing_zeros(self) -> ValueResult<Self>;
|
||||
fn reverse_bits(self) -> ValueResult<Self>;
|
||||
}
|
||||
|
||||
#[derive(Error, Debug, PartialEq)]
|
||||
@@ -132,6 +139,20 @@ pub enum ValueConversionKind {
|
||||
|
||||
/// Helper for creating match expressions over [DataValue].
|
||||
macro_rules! unary_match {
|
||||
( $op:ident($arg1:expr); [ $( $data_value_ty:ident ),* ]; $return_value_ty:ident ) => {
|
||||
match $arg1 {
|
||||
$( DataValue::$data_value_ty(a) => {
|
||||
Ok(DataValue::$return_value_ty(a.$op()))
|
||||
} )*
|
||||
_ => unimplemented!()
|
||||
}
|
||||
};
|
||||
( $op:ident($arg1:expr); [ $( $data_value_ty:ident ),* ] ) => {
|
||||
match $arg1 {
|
||||
$( DataValue::$data_value_ty(a) => { Ok(DataValue::$data_value_ty(a.$op())) } )*
|
||||
_ => unimplemented!()
|
||||
}
|
||||
};
|
||||
( $op:tt($arg1:expr); [ $( $data_value_ty:ident ),* ] ) => {
|
||||
match $arg1 {
|
||||
$( DataValue::$data_value_ty(a) => { Ok(DataValue::$data_value_ty($op a)) } )*
|
||||
@@ -438,4 +459,24 @@ impl Value for DataValue {
|
||||
fn not(self) -> ValueResult<Self> {
|
||||
unary_match!(!(&self); [I8, I16, I32, I64])
|
||||
}
|
||||
|
||||
fn count_ones(self) -> ValueResult<Self> {
|
||||
unary_match!(count_ones(&self); [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128]; U32)
|
||||
}
|
||||
|
||||
fn leading_ones(self) -> ValueResult<Self> {
|
||||
unary_match!(leading_ones(&self); [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128]; U32)
|
||||
}
|
||||
|
||||
fn leading_zeros(self) -> ValueResult<Self> {
|
||||
unary_match!(leading_zeros(&self); [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128]; U32)
|
||||
}
|
||||
|
||||
fn trailing_zeros(self) -> ValueResult<Self> {
|
||||
unary_match!(trailing_zeros(&self); [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128]; U32)
|
||||
}
|
||||
|
||||
fn reverse_bits(self) -> ValueResult<Self> {
|
||||
unary_match!(reverse_bits(&self); [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128])
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user