Merge pull request #3314 from dheaton-arm/implement-bitops

Implement bit operations for Cranelift interpreter
2021-09-13 09:29:10 -07:00
parent e85a57d51b e7d570ddd9
commit 9323762d71
11 changed files with 307 additions and 5 deletions
--- a/cranelift/filetests/filetests/runtests/bitrev.clif
+++ b/cranelift/filetests/filetests/runtests/bitrev.clif
@@ -0,0 +1,40 @@
 test interpret
 test run
 target aarch64
 target x86_64
 function %bitrev_i8(i8) -> i8 {
 block0(v0: i8):
    v1 = bitrev v0
    return v1
 }
 ; run: %bitrev_i8(1) == -128
 ; run: %bitrev_i8(64) == 2
 ; run: %bitrev_i8(-1) == -1
 function %bitrev_i16(i16) -> i16 {
 block0(v0: i16):
    v1 = bitrev v0
    return v1
 }
 ; run: %bitrev_i16(1) == -32768
 ; run: %bitrev_i16(16384) == 2
 ; run: %bitrev_i16(-1) == -1
 function %bitrev_i32(i32) -> i32 {
 block0(v0: i32):
    v1 = bitrev v0
    return v1
 }
 ; run: %bitrev_i32(1) == -2147483648
 ; run: %bitrev_i32(1073741824) == 2
 ; run: %bitrev_i32(-1) == -1
 function %bitrev_i64(i64) -> i64 {
 block0(v0: i64):
    v1 = bitrev v0
    return v1
 }
 ; run: %bitrev_i64(1) == -9223372036854775808
 ; run: %bitrev_i64(4611686018427387904) == 2
 ; run: %bitrev_i64(-1) == -1
--- a/cranelift/filetests/filetests/runtests/cls-aarch64.clif
+++ b/cranelift/filetests/filetests/runtests/cls-aarch64.clif
@@ -0,0 +1,24 @@
 test interpret
 test run
 target aarch64
 ; not implemented on `x86_64`
 function %cls_i32(i32) -> i32 {
 block0(v0: i32):
    v1 = cls v0
    return v1
 }
 ; run: %cls_i32(1) == 30
 ; run: %cls_i32(0x40000000) == 0
 ; run: %cls_i32(-1) == 31
 ; run: %cls_i32(0) == 31
 function %cls_i64(i64) -> i64 {
 block0(v0: i64):
    v1 = cls v0
    return v1
 }
 ; run: %cls_i64(1) == 62
 ; run: %cls_i64(0x4000000000000000) == 0
 ; run: %cls_i64(-1) == 63
 ; run: %cls_i64(0) == 63
--- a/cranelift/filetests/filetests/runtests/cls-interpret.clif
+++ b/cranelift/filetests/filetests/runtests/cls-interpret.clif
@@ -0,0 +1,23 @@
 test interpret
 ; aarch64 yields cls_i8(1) == 30, which is incorrect
 function %cls_i8(i8) -> i8 {
 block0(v0: i8):
    v1 = cls v0
    return v1
 }
 ; run: %cls_i8(1) == 6
 ; run: %cls_i8(0x40) == 0
 ; run: %cls_i8(-1) == 7
 ; run: %cls_i8(0) == 7
 function %cls_i16(i16) -> i16 {
 block0(v0: i16):
    v1 = cls v0
    return v1
 }
 ; run: %cls_i16(1) == 14
 ; run: %cls_i16(0x4000) == 0
 ; run: %cls_i16(-1) == 15
 ; run: %cls_i16(0) == 15
--- a/cranelift/filetests/filetests/runtests/clz-interpret.clif
+++ b/cranelift/filetests/filetests/runtests/clz-interpret.clif
@@ -0,0 +1,19 @@
 test interpret
 function %clz_i8(i8) -> i8 {
 block0(v0: i8):
    v1 = clz v0
    return v1
 }
 ; run: %clz_i8(1) == 7
 ; run: %clz_i8(0x40) == 1
 ; run: %clz_i8(-1) == 0
 function %clz_i16(i16) -> i16 {
 block0(v0: i16):
    v1 = clz v0
    return v1
 }
 ; run: %clz_i16(1) == 15
 ; run: %clz_i16(0x4000) == 1
 ; run: %clz_i16(-1) == 0
--- a/cranelift/filetests/filetests/runtests/clz.clif
+++ b/cranelift/filetests/filetests/runtests/clz.clif
@@ -0,0 +1,22 @@
 test interpret
 test run
 target aarch64
 target x86_64
 function %clz_i32(i32) -> i32 {
 block0(v0: i32):
    v1 = clz v0
    return v1
 }
 ; run: %clz_i32(1) == 31
 ; run: %clz_i32(0x40000000) == 1
 ; run: %clz_i32(-1) == 0
 function %clz_i64(i64) -> i64 {
 block0(v0: i64):
    v1 = clz v0
    return v1
 }
 ; run: %clz_i64(1) == 63
 ; run: %clz_i64(0x4000000000000000) == 1
 ; run: %clz_i64(-1) == 0
--- a/cranelift/filetests/filetests/runtests/ctz-interpret.clif
+++ b/cranelift/filetests/filetests/runtests/ctz-interpret.clif
@@ -0,0 +1,19 @@
 test interpret
 function %ctz_i8(i8) -> i8 {
 block0(v0: i8):
    v1 = ctz v0
    return v1
 }
 ; run: %ctz_i8(1) == 0
 ; run: %ctz_i8(0x40) == 6
 ; run: %ctz_i8(-1) == 0
 function %ctz_i16(i16) -> i16 {
 block0(v0: i16):
    v1 = ctz v0
    return v1
 }
 ; run: %ctz_i16(1) == 0
 ; run: %ctz_i16(0x4000) == 14
 ; run: %ctz_i16(-1) == 0
--- a/cranelift/filetests/filetests/runtests/ctz.clif
+++ b/cranelift/filetests/filetests/runtests/ctz.clif
@@ -0,0 +1,22 @@
 test interpret
 test run
 target aarch64
 target x86_64
 function %ctz_i32(i32) -> i32 {
 block0(v0: i32):
    v1 = ctz v0
    return v1
 }
 ; run: %ctz_i32(1) == 0
 ; run: %ctz_i32(0x40000000) == 30
 ; run: %ctz_i32(-1) == 0
 function %ctz_i64(i64) -> i64 {
 block0(v0: i64):
    v1 = ctz v0
    return v1
 }
 ; run: %ctz_i64(1) == 0
 ; run: %ctz_i64(0x4000000000000000) == 62
 ; run: %ctz_i64(-1) == 0
--- a/cranelift/filetests/filetests/runtests/popcnt-aarch64.clif
+++ b/cranelift/filetests/filetests/runtests/popcnt-aarch64.clif
@@ -0,0 +1,50 @@
 test interpret
 test run
 target aarch64
 function %popcnt_i8(i8) -> i8 {
 block0(v0: i8):
    v1 = popcnt v0
    return v1
 }
 ; run: %popcnt_i8(1) == 1
 ; run: %popcnt_i8(0x40) == 1
 ; run: %popcnt_i8(-1) == 8
 ; run: %popcnt_i8(0) == 0
 function %popcnt_i16(i16) -> i16 {
 block0(v0: i16):
    v1 = popcnt v0
    return v1
 }
 ; run: %popcnt_i16(1) == 1
 ; run: %popcnt_i16(0x4000) == 1
 ; run: %popcnt_i16(-1) == 16
 ; run: %popcnt_i16(0) == 0
 function %popcnt_i32(i32) -> i32 {
 block0(v0: i32):
    v1 = popcnt v0
    return v1
 }
 ; run: %popcnt_i32(1) == 1
 ; run: %popcnt_i32(0x40000000) == 1
 ; run: %popcnt_i32(-1) == 32
 ; run: %popcnt_i32(0) == 0
 function %popcnt_i64(i64) -> i64 {
 block0(v0: i64):
    v1 = popcnt v0
    return v1
 }
 ; run: %popcnt_i64(1) == 1
 ; run: %popcnt_i64(0x4000000000000000) == 1
 ; run: %popcnt_i64(-1) == 64
 ; run: %popcnt_i64(0) == 0
 function %popcnt_i8x16(i8x16) -> i8x16 {
 block0(v0: i8x16):
    v1 = popcnt v0
    return v1
 }
 ; run: %popcnt_i8x16([1 1 1 1 0x40 0x40 0x40 0x40 0xff 0xff 0xff 0xff 0 0 0 0]) == [1 1 1 1 1 1 1 1 8 8 8 8 0 0 0 0]
--- a/cranelift/filetests/filetests/runtests/popcnt-interpret.clif
+++ b/cranelift/filetests/filetests/runtests/popcnt-interpret.clif
@@ -0,0 +1,24 @@
 test interpret
 ; i16x8 vectors aren't currently supported by the `AArch64` backend.
 function %popcnt_i16x8(i16x8) -> i16x8 {
 block0(v0: i16x8):
    v1 = popcnt v0
    return v1
 }
 ; run: %popcnt_i16x8([1 1 0x4000 0x4000 0xffff 0xffff 0 0]) == [1 1 1 1 16 16 0 0]
 function %popcnt_i32x4(i32x4) -> i32x4 {
 block0(v0: i32x4):
    v1 = popcnt v0
    return v1
 }
 ; run: %popcnt_i32x4([1 0x40000000 0xFFFFFFFF 0]) == [1 1 32 0]
 function %popcnt_i64x2(i64x2) -> i64x2 {
 block0(v0: i64x2):
    v1 = popcnt v0
    return v1
 }
 ; run: %popcnt_i64x2([1 0x4000000000000000]) == [1 1]
 ; run: %popcnt_i64x2([0xffffffffffffffff 0]) == [64 0]
--- a/cranelift/interpreter/src/step.rs
+++ b/cranelift/interpreter/src/step.rs
@@ -668,11 +668,29 @@ where
        Opcode::IshlImm => binary(Value::shl, arg(0)?, imm_as_ctrl_ty()?)?,
        Opcode::UshrImm => binary(Value::ushr, arg(0)?, imm_as_ctrl_ty()?)?,
        Opcode::SshrImm => binary(Value::ishr, arg(0)?, imm_as_ctrl_ty()?)?,
-        Opcode::Bitrev => unimplemented!("Bitrev"),
+        Opcode::Bitrev => assign(Value::reverse_bits(arg(0)?)?),
-        Opcode::Clz => unimplemented!("Clz"),
+        Opcode::Clz => assign(arg(0)?.leading_zeros()?),
-        Opcode::Cls => unimplemented!("Cls"),
+        Opcode::Cls => {
-        Opcode::Ctz => unimplemented!("Ctz"),
+            let count = if Value::lt(&arg(0)?, &Value::int(0, ctrl_ty)?)? {
-        Opcode::Popcnt => unimplemented!("Popcnt"),
+                arg(0)?.leading_ones()?
            } else {
                arg(0)?.leading_zeros()?
            };
            assign(Value::sub(count, Value::int(1, ctrl_ty)?)?)
        }
        Opcode::Ctz => assign(arg(0)?.trailing_zeros()?),
        Opcode::Popcnt => {
            let count = if arg(0)?.ty().is_int() {
                arg(0)?.count_ones()?
            } else {
                let lanes = extractlanes(&arg(0)?, ctrl_ty.lane_type())?
                    .into_iter()
                    .map(|lane| lane.count_ones())
                    .collect::<ValueResult<SimdVec<V>>>()?;
                vectorizelanes(&lanes, ctrl_ty)?
            };
            assign(count)
        }
        Opcode::Fcmp => assign(Value::bool(
            fcmp(inst.fp_cond_code().unwrap(), &arg(0)?, &arg(1)?)?,
            ctrl_ty.as_bool(),
--- a/cranelift/interpreter/src/value.rs
+++ b/cranelift/interpreter/src/value.rs
@@ -62,6 +62,13 @@ pub trait Value: Clone + From<DataValue> {
    fn or(self, other: Self) -> ValueResult<Self>;
    fn xor(self, other: Self) -> ValueResult<Self>;
    fn not(self) -> ValueResult<Self>;
    // Bit counting.
    fn count_ones(self) -> ValueResult<Self>;
    fn leading_ones(self) -> ValueResult<Self>;
    fn leading_zeros(self) -> ValueResult<Self>;
    fn trailing_zeros(self) -> ValueResult<Self>;
    fn reverse_bits(self) -> ValueResult<Self>;
 }
 #[derive(Error, Debug, PartialEq)]
@@ -132,6 +139,20 @@ pub enum ValueConversionKind {
 /// Helper for creating match expressions over [DataValue].
 macro_rules! unary_match {
    ( $op:ident($arg1:expr); [ $( $data_value_ty:ident ),* ]; [ $( $return_value_ty:ident ),* ] ) => {
        match $arg1 {
            $( DataValue::$data_value_ty(a) => {
                Ok(DataValue::$data_value_ty($return_value_ty::try_from(a.$op()).unwrap()))
            } )*
            _ => unimplemented!()
        }
    };
    ( $op:ident($arg1:expr); [ $( $data_value_ty:ident ),* ] ) => {
        match $arg1 {
            $( DataValue::$data_value_ty(a) => { Ok(DataValue::$data_value_ty(a.$op())) } )*
            _ => unimplemented!()
        }
    };
    ( $op:tt($arg1:expr); [ $( $data_value_ty:ident ),* ] ) => {
        match $arg1 {
            $( DataValue::$data_value_ty(a) => { Ok(DataValue::$data_value_ty($op a)) } )*
@@ -443,4 +464,24 @@ impl Value for DataValue {
    fn not(self) -> ValueResult<Self> {
        unary_match!(!(&self); [I8, I16, I32, I64])
    }
    fn count_ones(self) -> ValueResult<Self> {
        unary_match!(count_ones(&self); [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128]; [i8, i16, i32, i64, i128, u8, u16, u32, u64, u128])
    }
    fn leading_ones(self) -> ValueResult<Self> {
        unary_match!(leading_ones(&self); [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128]; [i8, i16, i32, i64, i128, u8, u16, u32, u64, u128])
    }
    fn leading_zeros(self) -> ValueResult<Self> {
        unary_match!(leading_zeros(&self); [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128]; [i8, i16, i32, i64, i128, u8, u16, u32, u64, u128])
    }
    fn trailing_zeros(self) -> ValueResult<Self> {
        unary_match!(trailing_zeros(&self); [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128]; [i8, i16, i32, i64, i128, u8, u16, u32, u64, u128])
    }
    fn reverse_bits(self) -> ValueResult<Self> {
        unary_match!(reverse_bits(&self); [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128])
    }
 }