[AArch64] Merge 32- and 64-bit BitOps (#3840)

Copyright (c) 2022, Arm Limited.
This commit is contained in:
Sam Parker
2022-02-23 19:36:23 +00:00
committed by GitHub
parent d307a4ab9a
commit 5b7df72bce
7 changed files with 541 additions and 610 deletions

View File

@@ -71,6 +71,7 @@
;; A bit op instruction with a single register source.
(BitRR
(op BitOp)
(size OperandSize)
(rd WritableReg)
(rn Reg))
@@ -876,13 +877,9 @@
(type BitOp
(enum
;; Bit reverse
(RBit32)
;; Bit reverse
(RBit64)
(Clz32)
(Clz64)
(Cls32)
(Cls64)
(RBit)
(Clz)
(Cls)
))
(type AMode extern (enum))
@@ -1454,10 +1451,10 @@
(writable_reg_to_reg dst)))
;; Helper for emitting `MInst.BitRR` instructions.
(decl bit_rr (BitOp Reg) Reg)
(rule (bit_rr op src)
(decl bit_rr (BitOp Type Reg) Reg)
(rule (bit_rr op ty src)
(let ((dst WritableReg (temp_writable_reg $I64))
(_ Unit (emit (MInst.BitRR op dst src))))
(_ Unit (emit (MInst.BitRR op (operand_size ty) dst src))))
(writable_reg_to_reg dst)))
;; Helper for emitting `adds` instructions.
@@ -1822,27 +1819,18 @@
;; Helpers for generating `rbit` instructions.
(decl rbit32 (Reg) Reg)
(rule (rbit32 x) (bit_rr (BitOp.RBit32) x))
(decl rbit64 (Reg) Reg)
(rule (rbit64 x) (bit_rr (BitOp.RBit64) x))
(decl rbit (Type Reg) Reg)
(rule (rbit ty x) (bit_rr (BitOp.RBit) ty x))
;; Helpers for generating `clz` instructions.
(decl clz32 (Reg) Reg)
(rule (clz32 x) (bit_rr (BitOp.Clz32) x))
(decl clz64 (Reg) Reg)
(rule (clz64 x) (bit_rr (BitOp.Clz64) x))
(decl a64_clz (Type Reg) Reg)
(rule (a64_clz ty x) (bit_rr (BitOp.Clz) ty x))
;; Helpers for generating `cls` instructions.
(decl cls32 (Reg) Reg)
(rule (cls32 x) (bit_rr (BitOp.Cls32) x))
(decl cls64 (Reg) Reg)
(rule (cls64 x) (bit_rr (BitOp.Cls64) x))
(decl a64_cls (Type Reg) Reg)
(rule (a64_cls ty x) (bit_rr (BitOp.Cls) ty x))
;; Helpers for generating `eon` instructions.

View File

@@ -879,14 +879,15 @@ impl MachInstEmit for Inst {
sink.put4(enc_arith_rrr(top11, bits_15_10, rd, rn, rm));
}
&Inst::BitRR { op, rd, rn, .. } => {
let size = if op.operand_size().is32() { 0b0 } else { 0b1 };
&Inst::BitRR {
op, size, rd, rn, ..
} => {
let (op1, op2) = match op {
BitOp::RBit32 | BitOp::RBit64 => (0b00000, 0b000000),
BitOp::Clz32 | BitOp::Clz64 => (0b00000, 0b000100),
BitOp::Cls32 | BitOp::Cls64 => (0b00000, 0b000101),
BitOp::RBit => (0b00000, 0b000000),
BitOp::Clz => (0b00000, 0b000100),
BitOp::Cls => (0b00000, 0b000101),
};
sink.put4(enc_bit_rr(size, op1, op2, rn, rd))
sink.put4(enc_bit_rr(size.sf_bit(), op1, op2, rn, rd))
}
&Inst::ULoad8 { rd, ref mem, flags }

View File

@@ -1262,7 +1262,8 @@ fn test_aarch64_binemit() {
insns.push((
Inst::BitRR {
op: BitOp::RBit32,
op: BitOp::RBit,
size: OperandSize::Size32,
rd: writable_xreg(1),
rn: xreg(10),
},
@@ -1272,7 +1273,8 @@ fn test_aarch64_binemit() {
insns.push((
Inst::BitRR {
op: BitOp::RBit64,
op: BitOp::RBit,
size: OperandSize::Size64,
rd: writable_xreg(1),
rn: xreg(10),
},
@@ -1282,7 +1284,8 @@ fn test_aarch64_binemit() {
insns.push((
Inst::BitRR {
op: BitOp::Clz32,
op: BitOp::Clz,
size: OperandSize::Size32,
rd: writable_xreg(15),
rn: xreg(3),
},
@@ -1292,7 +1295,8 @@ fn test_aarch64_binemit() {
insns.push((
Inst::BitRR {
op: BitOp::Clz64,
op: BitOp::Clz,
size: OperandSize::Size64,
rd: writable_xreg(15),
rn: xreg(3),
},
@@ -1302,7 +1306,8 @@ fn test_aarch64_binemit() {
insns.push((
Inst::BitRR {
op: BitOp::Cls32,
op: BitOp::Cls,
size: OperandSize::Size32,
rd: writable_xreg(21),
rn: xreg(16),
},
@@ -1312,7 +1317,8 @@ fn test_aarch64_binemit() {
insns.push((
Inst::BitRR {
op: BitOp::Cls64,
op: BitOp::Cls,
size: OperandSize::Size64,
rd: writable_xreg(21),
rn: xreg(16),
},

View File

@@ -58,35 +58,12 @@ pub enum FPUOpRI {
}
impl BitOp {
/// What is the opcode's native width?
pub fn operand_size(&self) -> OperandSize {
match self {
BitOp::RBit32 | BitOp::Clz32 | BitOp::Cls32 => OperandSize::Size32,
_ => OperandSize::Size64,
}
}
/// Get the assembly mnemonic for this opcode.
pub fn op_str(&self) -> &'static str {
match self {
BitOp::RBit32 | BitOp::RBit64 => "rbit",
BitOp::Clz32 | BitOp::Clz64 => "clz",
BitOp::Cls32 | BitOp::Cls64 => "cls",
}
}
}
impl From<(Opcode, Type)> for BitOp {
/// Get the BitOp from the IR opcode.
fn from(op_ty: (Opcode, Type)) -> BitOp {
match op_ty {
(Opcode::Bitrev, I32) => BitOp::RBit32,
(Opcode::Bitrev, I64) => BitOp::RBit64,
(Opcode::Clz, I32) => BitOp::Clz32,
(Opcode::Clz, I64) => BitOp::Clz64,
(Opcode::Cls, I32) => BitOp::Cls32,
(Opcode::Cls, I64) => BitOp::Cls64,
_ => unreachable!("Called with non-bit op!: {:?}", op_ty),
BitOp::RBit => "rbit",
BitOp::Clz => "clz",
BitOp::Cls => "cls",
}
}
}
@@ -2239,8 +2216,7 @@ impl Inst {
let extendop = extendop.show_rru(mb_rru);
format!("{} {}, {}, {}, {}", op, rd, rn, rm, extendop)
}
&Inst::BitRR { op, rd, rn } => {
let size = op.operand_size();
&Inst::BitRR { op, size, rd, rn } => {
let op = op.op_str();
let rd = show_ireg_sized(rd.to_reg(), mb_rru, size);
let rn = show_ireg_sized(rn, mb_rru, size);

View File

@@ -1014,45 +1014,40 @@
;; the reversed result in the highest 8 bits, so we need to shift them down into
;; place.
(rule (lower (has_type $I8 (bitrev x)))
(value_reg (lsr_imm $I32 (rbit32 (put_in_reg x)) (imm_shift_from_u8 24))))
(value_reg (lsr_imm $I32 (rbit $I32 (put_in_reg x)) (imm_shift_from_u8 24))))
;; Reversing an 16-bit value with a 32-bit bitrev instruction will place
;; the reversed result in the highest 16 bits, so we need to shift them down into
;; place.
(rule (lower (has_type $I16 (bitrev x)))
(value_reg (lsr_imm $I32 (rbit32 (put_in_reg x)) (imm_shift_from_u8 16))))
(rule (lower (has_type $I32 (bitrev x)))
(value_reg (rbit32 (put_in_reg x))))
(rule (lower (has_type $I64 (bitrev x)))
(value_reg (rbit64 (put_in_reg x))))
(value_reg (lsr_imm $I32 (rbit $I32 (put_in_reg x)) (imm_shift_from_u8 16))))
(rule (lower (has_type $I128 (bitrev x)))
(let (
(val ValueRegs (put_in_regs x))
(lo_rev Reg (rbit64 (value_regs_get val 0)))
(hi_rev Reg (rbit64 (value_regs_get val 1)))
(lo_rev Reg (rbit $I64 (value_regs_get val 0)))
(hi_rev Reg (rbit $I64 (value_regs_get val 1)))
)
(value_regs hi_rev lo_rev)))
(rule (lower (has_type ty (bitrev x)))
(value_reg (rbit ty (put_in_reg x))))
;;;; Rules for `clz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I8 (clz x)))
(value_reg (sub_imm $I32 (clz32 (put_in_reg_zext32 x)) (u8_into_imm12 24))))
(value_reg (sub_imm $I32 (a64_clz $I32 (put_in_reg_zext32 x)) (u8_into_imm12 24))))
(rule (lower (has_type $I16 (clz x)))
(value_reg (sub_imm $I32 (clz32 (put_in_reg_zext32 x)) (u8_into_imm12 16))))
(rule (lower (has_type $I32 (clz x)))
(value_reg (clz32 (put_in_reg x))))
(rule (lower (has_type $I64 (clz x)))
(value_reg (clz64 (put_in_reg x))))
(value_reg (sub_imm $I32 (a64_clz $I32 (put_in_reg_zext32 x)) (u8_into_imm12 16))))
(rule (lower (has_type $I128 (clz x)))
(lower_clz128 (put_in_regs x)))
(rule (lower (has_type ty (clz x)))
(value_reg (a64_clz ty (put_in_reg x))))
;; clz hi_clz, hi
;; clz lo_clz, lo
;; lsr tmp, hi_clz, #6
@@ -1061,8 +1056,8 @@
(decl lower_clz128 (ValueRegs) ValueRegs)
(rule (lower_clz128 val)
(let (
(hi_clz Reg (clz64 (value_regs_get val 1)))
(lo_clz Reg (clz64 (value_regs_get val 0)))
(hi_clz Reg (a64_clz $I64 (value_regs_get val 1)))
(lo_clz Reg (a64_clz $I64 (value_regs_get val 0)))
(tmp Reg (lsr_imm $I64 hi_clz (imm_shift_from_u8 6)))
)
(value_regs (madd64 lo_clz tmp hi_clz) (imm $I64 0))))
@@ -1074,38 +1069,29 @@
;; leading zeros of the reversed value.
(rule (lower (has_type $I8 (ctz x)))
(value_reg (clz32 (orr_imm $I32 (rbit32 (put_in_reg x)) (u64_into_imm_logic $I32 0x800000)))))
(value_reg (a64_clz $I32 (orr_imm $I32 (rbit $I32 (put_in_reg x)) (u64_into_imm_logic $I32 0x800000)))))
(rule (lower (has_type $I16 (ctz x)))
(value_reg (clz32 (orr_imm $I32 (rbit32 (put_in_reg x)) (u64_into_imm_logic $I32 0x8000)))))
(rule (lower (has_type $I32 (ctz x)))
(value_reg (clz32 (rbit32 (put_in_reg x)))))
(rule (lower (has_type $I64 (ctz x)))
(value_reg (clz64 (rbit64 (put_in_reg x)))))
(value_reg (a64_clz $I32 (orr_imm $I32 (rbit $I32 (put_in_reg x)) (u64_into_imm_logic $I32 0x8000)))))
(rule (lower (has_type $I128 (ctz x)))
(let (
(val ValueRegs (put_in_regs x))
(lo Reg (rbit64 (value_regs_get val 0)))
(hi Reg (rbit64 (value_regs_get val 1)))
(lo Reg (rbit $I64 (value_regs_get val 0)))
(hi Reg (rbit $I64 (value_regs_get val 1)))
)
(lower_clz128 (value_regs hi lo))))
(rule (lower (has_type ty (ctz x)))
(value_reg (a64_clz ty (rbit ty (put_in_reg x)))))
;;;; Rules for `cls` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I8 (cls x)))
(value_reg (sub_imm $I32 (cls32 (put_in_reg_zext32 x)) (u8_into_imm12 24))))
(value_reg (sub_imm $I32 (a64_cls $I32 (put_in_reg_zext32 x)) (u8_into_imm12 24))))
(rule (lower (has_type $I16 (cls x)))
(value_reg (sub_imm $I32 (cls32 (put_in_reg_zext32 x)) (u8_into_imm12 16))))
(rule (lower (has_type $I32 (cls x)))
(value_reg (cls32 (put_in_reg x))))
(rule (lower (has_type $I64 (cls x)))
(value_reg (cls64 (put_in_reg x))))
(value_reg (sub_imm $I32 (a64_cls $I32 (put_in_reg_zext32 x)) (u8_into_imm12 16))))
;; cls lo_cls, lo
;; cls hi_cls, hi
@@ -1121,8 +1107,8 @@
(val ValueRegs (put_in_regs x))
(lo Reg (value_regs_get val 0))
(hi Reg (value_regs_get val 1))
(lo_cls Reg (cls64 lo))
(hi_cls Reg (cls64 hi))
(lo_cls Reg (a64_cls $I64 lo))
(hi_cls Reg (a64_cls $I64 hi))
(sign_eq_eon Reg (eon $I64 hi lo))
(sign_eq Reg (lsr_imm $I64 sign_eq_eon (imm_shift_from_u8 63)))
(lo_sign_bits Reg (madd64 lo_cls sign_eq sign_eq))
@@ -1133,6 +1119,9 @@
)
(value_regs (add $I64 maybe_lo hi_cls) (imm $I64 0))))
(rule (lower (has_type ty (cls x)))
(value_reg (a64_cls ty (put_in_reg x))))
;;;; Rules for `popcnt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; The implementation of `popcnt` for scalar types is done by moving the value

View File

@@ -1,4 +1,4 @@
src/clif.isle 9ea75a6f790b5c03
src/prelude.isle 980b300b3ec3e338
src/isa/aarch64/inst.isle a7f3572a5cf2f201
src/isa/aarch64/lower.isle 534c135b5f535f33
src/isa/aarch64/inst.isle 62ab4218b01cc799
src/isa/aarch64/lower.isle 4496f1be20d545

File diff suppressed because it is too large Load Diff