[AArch64] Merge 32- and 64-bit BitOps (#3840)
Copyright (c) 2022, Arm Limited.
This commit is contained in:
@@ -71,6 +71,7 @@
|
||||
;; A bit op instruction with a single register source.
|
||||
(BitRR
|
||||
(op BitOp)
|
||||
(size OperandSize)
|
||||
(rd WritableReg)
|
||||
(rn Reg))
|
||||
|
||||
@@ -876,13 +877,9 @@
|
||||
(type BitOp
|
||||
(enum
|
||||
;; Bit reverse
|
||||
(RBit32)
|
||||
;; Bit reverse
|
||||
(RBit64)
|
||||
(Clz32)
|
||||
(Clz64)
|
||||
(Cls32)
|
||||
(Cls64)
|
||||
(RBit)
|
||||
(Clz)
|
||||
(Cls)
|
||||
))
|
||||
|
||||
(type AMode extern (enum))
|
||||
@@ -1454,10 +1451,10 @@
|
||||
(writable_reg_to_reg dst)))
|
||||
|
||||
;; Helper for emitting `MInst.BitRR` instructions.
|
||||
(decl bit_rr (BitOp Reg) Reg)
|
||||
(rule (bit_rr op src)
|
||||
(decl bit_rr (BitOp Type Reg) Reg)
|
||||
(rule (bit_rr op ty src)
|
||||
(let ((dst WritableReg (temp_writable_reg $I64))
|
||||
(_ Unit (emit (MInst.BitRR op dst src))))
|
||||
(_ Unit (emit (MInst.BitRR op (operand_size ty) dst src))))
|
||||
(writable_reg_to_reg dst)))
|
||||
|
||||
;; Helper for emitting `adds` instructions.
|
||||
@@ -1822,27 +1819,18 @@
|
||||
|
||||
;; Helpers for generating `rbit` instructions.
|
||||
|
||||
(decl rbit32 (Reg) Reg)
|
||||
(rule (rbit32 x) (bit_rr (BitOp.RBit32) x))
|
||||
|
||||
(decl rbit64 (Reg) Reg)
|
||||
(rule (rbit64 x) (bit_rr (BitOp.RBit64) x))
|
||||
(decl rbit (Type Reg) Reg)
|
||||
(rule (rbit ty x) (bit_rr (BitOp.RBit) ty x))
|
||||
|
||||
;; Helpers for generating `clz` instructions.
|
||||
|
||||
(decl clz32 (Reg) Reg)
|
||||
(rule (clz32 x) (bit_rr (BitOp.Clz32) x))
|
||||
|
||||
(decl clz64 (Reg) Reg)
|
||||
(rule (clz64 x) (bit_rr (BitOp.Clz64) x))
|
||||
(decl a64_clz (Type Reg) Reg)
|
||||
(rule (a64_clz ty x) (bit_rr (BitOp.Clz) ty x))
|
||||
|
||||
;; Helpers for generating `cls` instructions.
|
||||
|
||||
(decl cls32 (Reg) Reg)
|
||||
(rule (cls32 x) (bit_rr (BitOp.Cls32) x))
|
||||
|
||||
(decl cls64 (Reg) Reg)
|
||||
(rule (cls64 x) (bit_rr (BitOp.Cls64) x))
|
||||
(decl a64_cls (Type Reg) Reg)
|
||||
(rule (a64_cls ty x) (bit_rr (BitOp.Cls) ty x))
|
||||
|
||||
;; Helpers for generating `eon` instructions.
|
||||
|
||||
|
||||
@@ -879,14 +879,15 @@ impl MachInstEmit for Inst {
|
||||
sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm));
|
||||
}
|
||||
|
||||
&Inst::BitRR { op, rd, rn, .. } => {
|
||||
let size = if op.operand_size().is32() { 0b0 } else { 0b1 };
|
||||
&Inst::BitRR {
|
||||
op, size, rd, rn, ..
|
||||
} => {
|
||||
let (op1, op2) = match op {
|
||||
BitOp::RBit32 | BitOp::RBit64 => (0b00000, 0b000000),
|
||||
BitOp::Clz32 | BitOp::Clz64 => (0b00000, 0b000100),
|
||||
BitOp::Cls32 | BitOp::Cls64 => (0b00000, 0b000101),
|
||||
BitOp::RBit => (0b00000, 0b000000),
|
||||
BitOp::Clz => (0b00000, 0b000100),
|
||||
BitOp::Cls => (0b00000, 0b000101),
|
||||
};
|
||||
sink.put4(enc_bit_rr(size, op1, op2, rn, rd))
|
||||
sink.put4(enc_bit_rr(size.sf_bit(), op1, op2, rn, rd))
|
||||
}
|
||||
|
||||
&Inst::ULoad8 { rd, ref mem, flags }
|
||||
|
||||
@@ -1262,7 +1262,8 @@ fn test_aarch64_binemit() {
|
||||
|
||||
insns.push((
|
||||
Inst::BitRR {
|
||||
op: BitOp::RBit32,
|
||||
op: BitOp::RBit,
|
||||
size: OperandSize::Size32,
|
||||
rd: writable_xreg(1),
|
||||
rn: xreg(10),
|
||||
},
|
||||
@@ -1272,7 +1273,8 @@ fn test_aarch64_binemit() {
|
||||
|
||||
insns.push((
|
||||
Inst::BitRR {
|
||||
op: BitOp::RBit64,
|
||||
op: BitOp::RBit,
|
||||
size: OperandSize::Size64,
|
||||
rd: writable_xreg(1),
|
||||
rn: xreg(10),
|
||||
},
|
||||
@@ -1282,7 +1284,8 @@ fn test_aarch64_binemit() {
|
||||
|
||||
insns.push((
|
||||
Inst::BitRR {
|
||||
op: BitOp::Clz32,
|
||||
op: BitOp::Clz,
|
||||
size: OperandSize::Size32,
|
||||
rd: writable_xreg(15),
|
||||
rn: xreg(3),
|
||||
},
|
||||
@@ -1292,7 +1295,8 @@ fn test_aarch64_binemit() {
|
||||
|
||||
insns.push((
|
||||
Inst::BitRR {
|
||||
op: BitOp::Clz64,
|
||||
op: BitOp::Clz,
|
||||
size: OperandSize::Size64,
|
||||
rd: writable_xreg(15),
|
||||
rn: xreg(3),
|
||||
},
|
||||
@@ -1302,7 +1306,8 @@ fn test_aarch64_binemit() {
|
||||
|
||||
insns.push((
|
||||
Inst::BitRR {
|
||||
op: BitOp::Cls32,
|
||||
op: BitOp::Cls,
|
||||
size: OperandSize::Size32,
|
||||
rd: writable_xreg(21),
|
||||
rn: xreg(16),
|
||||
},
|
||||
@@ -1312,7 +1317,8 @@ fn test_aarch64_binemit() {
|
||||
|
||||
insns.push((
|
||||
Inst::BitRR {
|
||||
op: BitOp::Cls64,
|
||||
op: BitOp::Cls,
|
||||
size: OperandSize::Size64,
|
||||
rd: writable_xreg(21),
|
||||
rn: xreg(16),
|
||||
},
|
||||
|
||||
@@ -58,35 +58,12 @@ pub enum FPUOpRI {
|
||||
}
|
||||
|
||||
impl BitOp {
|
||||
/// What is the opcode's native width?
|
||||
pub fn operand_size(&self) -> OperandSize {
|
||||
match self {
|
||||
BitOp::RBit32 | BitOp::Clz32 | BitOp::Cls32 => OperandSize::Size32,
|
||||
_ => OperandSize::Size64,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the assembly mnemonic for this opcode.
|
||||
pub fn op_str(&self) -> &'static str {
|
||||
match self {
|
||||
BitOp::RBit32 | BitOp::RBit64 => "rbit",
|
||||
BitOp::Clz32 | BitOp::Clz64 => "clz",
|
||||
BitOp::Cls32 | BitOp::Cls64 => "cls",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<(Opcode, Type)> for BitOp {
|
||||
/// Get the BitOp from the IR opcode.
|
||||
fn from(op_ty: (Opcode, Type)) -> BitOp {
|
||||
match op_ty {
|
||||
(Opcode::Bitrev, I32) => BitOp::RBit32,
|
||||
(Opcode::Bitrev, I64) => BitOp::RBit64,
|
||||
(Opcode::Clz, I32) => BitOp::Clz32,
|
||||
(Opcode::Clz, I64) => BitOp::Clz64,
|
||||
(Opcode::Cls, I32) => BitOp::Cls32,
|
||||
(Opcode::Cls, I64) => BitOp::Cls64,
|
||||
_ => unreachable!("Called with non-bit op!: {:?}", op_ty),
|
||||
BitOp::RBit => "rbit",
|
||||
BitOp::Clz => "clz",
|
||||
BitOp::Cls => "cls",
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2239,8 +2216,7 @@ impl Inst {
|
||||
let extendop = extendop.show_rru(mb_rru);
|
||||
format!("{} {}, {}, {}, {}", op, rd, rn, rm, extendop)
|
||||
}
|
||||
&Inst::BitRR { op, rd, rn } => {
|
||||
let size = op.operand_size();
|
||||
&Inst::BitRR { op, size, rd, rn } => {
|
||||
let op = op.op_str();
|
||||
let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
|
||||
let rn = show_ireg_sized(rn, mb_rru, size);
|
||||
|
||||
@@ -1014,45 +1014,40 @@
|
||||
;; the reversed result in the highest 8 bits, so we need to shift them down into
|
||||
;; place.
|
||||
(rule (lower (has_type $I8 (bitrev x)))
|
||||
(value_reg (lsr_imm $I32 (rbit32 (put_in_reg x)) (imm_shift_from_u8 24))))
|
||||
(value_reg (lsr_imm $I32 (rbit $I32 (put_in_reg x)) (imm_shift_from_u8 24))))
|
||||
|
||||
;; Reversing an 16-bit value with a 32-bit bitrev instruction will place
|
||||
;; the reversed result in the highest 16 bits, so we need to shift them down into
|
||||
;; place.
|
||||
(rule (lower (has_type $I16 (bitrev x)))
|
||||
(value_reg (lsr_imm $I32 (rbit32 (put_in_reg x)) (imm_shift_from_u8 16))))
|
||||
|
||||
(rule (lower (has_type $I32 (bitrev x)))
|
||||
(value_reg (rbit32 (put_in_reg x))))
|
||||
|
||||
(rule (lower (has_type $I64 (bitrev x)))
|
||||
(value_reg (rbit64 (put_in_reg x))))
|
||||
(value_reg (lsr_imm $I32 (rbit $I32 (put_in_reg x)) (imm_shift_from_u8 16))))
|
||||
|
||||
(rule (lower (has_type $I128 (bitrev x)))
|
||||
(let (
|
||||
(val ValueRegs (put_in_regs x))
|
||||
(lo_rev Reg (rbit64 (value_regs_get val 0)))
|
||||
(hi_rev Reg (rbit64 (value_regs_get val 1)))
|
||||
(lo_rev Reg (rbit $I64 (value_regs_get val 0)))
|
||||
(hi_rev Reg (rbit $I64 (value_regs_get val 1)))
|
||||
)
|
||||
(value_regs hi_rev lo_rev)))
|
||||
|
||||
(rule (lower (has_type ty (bitrev x)))
|
||||
(value_reg (rbit ty (put_in_reg x))))
|
||||
|
||||
|
||||
;;;; Rules for `clz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type $I8 (clz x)))
|
||||
(value_reg (sub_imm $I32 (clz32 (put_in_reg_zext32 x)) (u8_into_imm12 24))))
|
||||
(value_reg (sub_imm $I32 (a64_clz $I32 (put_in_reg_zext32 x)) (u8_into_imm12 24))))
|
||||
|
||||
(rule (lower (has_type $I16 (clz x)))
|
||||
(value_reg (sub_imm $I32 (clz32 (put_in_reg_zext32 x)) (u8_into_imm12 16))))
|
||||
|
||||
(rule (lower (has_type $I32 (clz x)))
|
||||
(value_reg (clz32 (put_in_reg x))))
|
||||
|
||||
(rule (lower (has_type $I64 (clz x)))
|
||||
(value_reg (clz64 (put_in_reg x))))
|
||||
(value_reg (sub_imm $I32 (a64_clz $I32 (put_in_reg_zext32 x)) (u8_into_imm12 16))))
|
||||
|
||||
(rule (lower (has_type $I128 (clz x)))
|
||||
(lower_clz128 (put_in_regs x)))
|
||||
|
||||
(rule (lower (has_type ty (clz x)))
|
||||
(value_reg (a64_clz ty (put_in_reg x))))
|
||||
|
||||
;; clz hi_clz, hi
|
||||
;; clz lo_clz, lo
|
||||
;; lsr tmp, hi_clz, #6
|
||||
@@ -1061,8 +1056,8 @@
|
||||
(decl lower_clz128 (ValueRegs) ValueRegs)
|
||||
(rule (lower_clz128 val)
|
||||
(let (
|
||||
(hi_clz Reg (clz64 (value_regs_get val 1)))
|
||||
(lo_clz Reg (clz64 (value_regs_get val 0)))
|
||||
(hi_clz Reg (a64_clz $I64 (value_regs_get val 1)))
|
||||
(lo_clz Reg (a64_clz $I64 (value_regs_get val 0)))
|
||||
(tmp Reg (lsr_imm $I64 hi_clz (imm_shift_from_u8 6)))
|
||||
)
|
||||
(value_regs (madd64 lo_clz tmp hi_clz) (imm $I64 0))))
|
||||
@@ -1074,38 +1069,29 @@
|
||||
;; leading zeros of the reversed value.
|
||||
|
||||
(rule (lower (has_type $I8 (ctz x)))
|
||||
(value_reg (clz32 (orr_imm $I32 (rbit32 (put_in_reg x)) (u64_into_imm_logic $I32 0x800000)))))
|
||||
(value_reg (a64_clz $I32 (orr_imm $I32 (rbit $I32 (put_in_reg x)) (u64_into_imm_logic $I32 0x800000)))))
|
||||
|
||||
(rule (lower (has_type $I16 (ctz x)))
|
||||
(value_reg (clz32 (orr_imm $I32 (rbit32 (put_in_reg x)) (u64_into_imm_logic $I32 0x8000)))))
|
||||
|
||||
(rule (lower (has_type $I32 (ctz x)))
|
||||
(value_reg (clz32 (rbit32 (put_in_reg x)))))
|
||||
|
||||
(rule (lower (has_type $I64 (ctz x)))
|
||||
(value_reg (clz64 (rbit64 (put_in_reg x)))))
|
||||
(value_reg (a64_clz $I32 (orr_imm $I32 (rbit $I32 (put_in_reg x)) (u64_into_imm_logic $I32 0x8000)))))
|
||||
|
||||
(rule (lower (has_type $I128 (ctz x)))
|
||||
(let (
|
||||
(val ValueRegs (put_in_regs x))
|
||||
(lo Reg (rbit64 (value_regs_get val 0)))
|
||||
(hi Reg (rbit64 (value_regs_get val 1)))
|
||||
(lo Reg (rbit $I64 (value_regs_get val 0)))
|
||||
(hi Reg (rbit $I64 (value_regs_get val 1)))
|
||||
)
|
||||
(lower_clz128 (value_regs hi lo))))
|
||||
|
||||
(rule (lower (has_type ty (ctz x)))
|
||||
(value_reg (a64_clz ty (rbit ty (put_in_reg x)))))
|
||||
|
||||
;;;; Rules for `cls` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type $I8 (cls x)))
|
||||
(value_reg (sub_imm $I32 (cls32 (put_in_reg_zext32 x)) (u8_into_imm12 24))))
|
||||
(value_reg (sub_imm $I32 (a64_cls $I32 (put_in_reg_zext32 x)) (u8_into_imm12 24))))
|
||||
|
||||
(rule (lower (has_type $I16 (cls x)))
|
||||
(value_reg (sub_imm $I32 (cls32 (put_in_reg_zext32 x)) (u8_into_imm12 16))))
|
||||
|
||||
(rule (lower (has_type $I32 (cls x)))
|
||||
(value_reg (cls32 (put_in_reg x))))
|
||||
|
||||
(rule (lower (has_type $I64 (cls x)))
|
||||
(value_reg (cls64 (put_in_reg x))))
|
||||
(value_reg (sub_imm $I32 (a64_cls $I32 (put_in_reg_zext32 x)) (u8_into_imm12 16))))
|
||||
|
||||
;; cls lo_cls, lo
|
||||
;; cls hi_cls, hi
|
||||
@@ -1121,8 +1107,8 @@
|
||||
(val ValueRegs (put_in_regs x))
|
||||
(lo Reg (value_regs_get val 0))
|
||||
(hi Reg (value_regs_get val 1))
|
||||
(lo_cls Reg (cls64 lo))
|
||||
(hi_cls Reg (cls64 hi))
|
||||
(lo_cls Reg (a64_cls $I64 lo))
|
||||
(hi_cls Reg (a64_cls $I64 hi))
|
||||
(sign_eq_eon Reg (eon $I64 hi lo))
|
||||
(sign_eq Reg (lsr_imm $I64 sign_eq_eon (imm_shift_from_u8 63)))
|
||||
(lo_sign_bits Reg (madd64 lo_cls sign_eq sign_eq))
|
||||
@@ -1133,6 +1119,9 @@
|
||||
)
|
||||
(value_regs (add $I64 maybe_lo hi_cls) (imm $I64 0))))
|
||||
|
||||
(rule (lower (has_type ty (cls x)))
|
||||
(value_reg (a64_cls ty (put_in_reg x))))
|
||||
|
||||
;;;; Rules for `popcnt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; The implementation of `popcnt` for scalar types is done by moving the value
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
src/clif.isle 9ea75a6f790b5c03
|
||||
src/prelude.isle 980b300b3ec3e338
|
||||
src/isa/aarch64/inst.isle a7f3572a5cf2f201
|
||||
src/isa/aarch64/lower.isle 534c135b5f535f33
|
||||
src/isa/aarch64/inst.isle 62ab4218b01cc799
|
||||
src/isa/aarch64/lower.isle 4496f1be20d545
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user