Merge pull request #1528 from cfallin/aarch64-bit-ops-fix
arm64: Support less-than-64-bit integers in Bitrev, Clz, Cls, and Popcnt instructions.
This commit is contained in:
@@ -221,7 +221,7 @@ impl From<(Opcode, Type)> for BitOp {
|
|||||||
(Opcode::Clz, I64) => BitOp::Clz64,
|
(Opcode::Clz, I64) => BitOp::Clz64,
|
||||||
(Opcode::Cls, I32) => BitOp::Cls32,
|
(Opcode::Cls, I32) => BitOp::Cls32,
|
||||||
(Opcode::Cls, I64) => BitOp::Cls64,
|
(Opcode::Cls, I64) => BitOp::Cls64,
|
||||||
_ => unreachable!("Called with non-bit op!"),
|
_ => unreachable!("Called with non-bit op!: {:?}", op_ty),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1240,24 +1240,64 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::Bitrev | Opcode::Clz | Opcode::Cls => {
|
Opcode::Bitrev | Opcode::Clz | Opcode::Cls | Opcode::Ctz => {
|
||||||
let rd = output_to_reg(ctx, outputs[0]);
|
let rd = output_to_reg(ctx, outputs[0]);
|
||||||
let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
|
let needs_zext = match op {
|
||||||
let op = BitOp::from((op, ty.unwrap()));
|
Opcode::Bitrev | Opcode::Ctz => false,
|
||||||
ctx.emit(Inst::BitRR { rd, rn, op });
|
Opcode::Clz | Opcode::Cls => true,
|
||||||
}
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
let ty = ty.unwrap();
|
||||||
|
let narrow_mode = if needs_zext && ty_bits(ty) == 64 {
|
||||||
|
NarrowValueMode::ZeroExtend64
|
||||||
|
} else if needs_zext {
|
||||||
|
NarrowValueMode::ZeroExtend32
|
||||||
|
} else {
|
||||||
|
NarrowValueMode::None
|
||||||
|
};
|
||||||
|
let rn = input_to_reg(ctx, inputs[0], narrow_mode);
|
||||||
|
let op_ty = match ty {
|
||||||
|
I8 | I16 | I32 => I32,
|
||||||
|
I64 => I64,
|
||||||
|
_ => panic!("Unsupported type for Bitrev/Clz/Cls"),
|
||||||
|
};
|
||||||
|
let bitop = match op {
|
||||||
|
Opcode::Clz | Opcode::Cls | Opcode::Bitrev => BitOp::from((op, op_ty)),
|
||||||
|
Opcode::Ctz => BitOp::from((Opcode::Bitrev, op_ty)),
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
ctx.emit(Inst::BitRR { rd, rn, op: bitop });
|
||||||
|
|
||||||
Opcode::Ctz => {
|
// Both bitrev and ctz use a bit-reverse (rbit) instruction; ctz to reduce the problem
|
||||||
let rd = output_to_reg(ctx, outputs[0]);
|
// to a clz, and bitrev as the main operation.
|
||||||
let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
|
if op == Opcode::Bitrev || op == Opcode::Ctz {
|
||||||
let op = BitOp::from((Opcode::Bitrev, ty.unwrap()));
|
// Reversing an n-bit value (n < 32) with a 32-bit bitrev instruction will place
|
||||||
ctx.emit(Inst::BitRR { rd, rn, op });
|
// the reversed result in the highest n bits, so we need to shift them down into
|
||||||
let op = BitOp::from((Opcode::Clz, ty.unwrap()));
|
// place.
|
||||||
ctx.emit(Inst::BitRR {
|
let right_shift = match ty {
|
||||||
rd,
|
I8 => Some(24),
|
||||||
rn: rd.to_reg(),
|
I16 => Some(16),
|
||||||
op,
|
I32 => None,
|
||||||
});
|
I64 => None,
|
||||||
|
_ => panic!("Unsupported type for Bitrev"),
|
||||||
|
};
|
||||||
|
if let Some(s) = right_shift {
|
||||||
|
ctx.emit(Inst::AluRRImmShift {
|
||||||
|
alu_op: ALUOp::Lsr32,
|
||||||
|
rd,
|
||||||
|
rn: rd.to_reg(),
|
||||||
|
immshift: ImmShift::maybe_from_u64(s).unwrap(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if op == Opcode::Ctz {
|
||||||
|
ctx.emit(Inst::BitRR {
|
||||||
|
op: BitOp::from((Opcode::Clz, op_ty)),
|
||||||
|
rd,
|
||||||
|
rn: rd.to_reg(),
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::Popcnt => {
|
Opcode::Popcnt => {
|
||||||
@@ -1272,7 +1312,10 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
|
|||||||
// x >> 56
|
// x >> 56
|
||||||
let ty = ty.unwrap();
|
let ty = ty.unwrap();
|
||||||
let rd = output_to_reg(ctx, outputs[0]);
|
let rd = output_to_reg(ctx, outputs[0]);
|
||||||
let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
|
// FIXME(#1537): zero-extend 8/16/32-bit operands only to 32 bits,
|
||||||
|
// and fix the sequence below to work properly for this.
|
||||||
|
let narrow_mode = NarrowValueMode::ZeroExtend64;
|
||||||
|
let rn = input_to_reg(ctx, inputs[0], narrow_mode);
|
||||||
let tmp = ctx.tmp(RegClass::I64, I64);
|
let tmp = ctx.tmp(RegClass::I64, I64);
|
||||||
|
|
||||||
// If this is a 32-bit Popcnt, use Lsr32 to clear the top 32 bits of the register, then
|
// If this is a 32-bit Popcnt, use Lsr32 to clear the top 32 bits of the register, then
|
||||||
|
|||||||
@@ -1,6 +1,34 @@
|
|||||||
test vcode
|
test vcode
|
||||||
target aarch64
|
target aarch64
|
||||||
|
|
||||||
|
function %a(i8) -> i8 {
|
||||||
|
block0(v0: i8):
|
||||||
|
v1 = bitrev v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: rbit w0, w0
|
||||||
|
; nextln: lsr w0, w0, #24
|
||||||
|
; nextln: mov sp, fp
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %a(i16) -> i16 {
|
||||||
|
block0(v0: i16):
|
||||||
|
v1 = bitrev v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: rbit w0, w0
|
||||||
|
; nextln: lsr w0, w0, #16
|
||||||
|
; nextln: mov sp, fp
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
function %a(i32) -> i32 {
|
function %a(i32) -> i32 {
|
||||||
block0(v0: i32):
|
block0(v0: i32):
|
||||||
v1 = bitrev v0
|
v1 = bitrev v0
|
||||||
@@ -27,6 +55,35 @@ block0(v0: i64):
|
|||||||
; nextln: ldp fp, lr, [sp], #16
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
; nextln: ret
|
; nextln: ret
|
||||||
|
|
||||||
|
|
||||||
|
function %b(i8) -> i8 {
|
||||||
|
block0(v0: i8):
|
||||||
|
v1 = clz v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: uxtb w0, w0
|
||||||
|
; nextln: clz w0, w0
|
||||||
|
; nextln: mov sp, fp
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %b(i16) -> i16 {
|
||||||
|
block0(v0: i16):
|
||||||
|
v1 = clz v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: uxth w0, w0
|
||||||
|
; nextln: clz w0, w0
|
||||||
|
; nextln: mov sp, fp
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
function %b(i32) -> i32 {
|
function %b(i32) -> i32 {
|
||||||
block0(v0: i32):
|
block0(v0: i32):
|
||||||
v1 = clz v0
|
v1 = clz v0
|
||||||
@@ -53,6 +110,34 @@ block0(v0: i64):
|
|||||||
; nextln: ldp fp, lr, [sp], #16
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
; nextln: ret
|
; nextln: ret
|
||||||
|
|
||||||
|
function %c(i8) -> i8 {
|
||||||
|
block0(v0: i8):
|
||||||
|
v1 = cls v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: uxtb w0, w0
|
||||||
|
; nextln: cls w0, w0
|
||||||
|
; nextln: mov sp, fp
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %c(i16) -> i16 {
|
||||||
|
block0(v0: i16):
|
||||||
|
v1 = cls v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: uxth w0, w0
|
||||||
|
; nextln: cls w0, w0
|
||||||
|
; nextln: mov sp, fp
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
function %c(i32) -> i32 {
|
function %c(i32) -> i32 {
|
||||||
block0(v0: i32):
|
block0(v0: i32):
|
||||||
v1 = cls v0
|
v1 = cls v0
|
||||||
@@ -79,6 +164,36 @@ block0(v0: i64):
|
|||||||
; nextln: ldp fp, lr, [sp], #16
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
; nextln: ret
|
; nextln: ret
|
||||||
|
|
||||||
|
function %d(i8) -> i8 {
|
||||||
|
block0(v0: i8):
|
||||||
|
v1 = ctz v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: rbit w0, w0
|
||||||
|
; nextln: lsr w0, w0, #24
|
||||||
|
; nextln: clz w0, w0
|
||||||
|
; nextln: mov sp, fp
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %d(i16) -> i16 {
|
||||||
|
block0(v0: i16):
|
||||||
|
v1 = ctz v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: rbit w0, w0
|
||||||
|
; nextln: lsr w0, w0, #16
|
||||||
|
; nextln: clz w0, w0
|
||||||
|
; nextln: mov sp, fp
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
function %d(i32) -> i32 {
|
function %d(i32) -> i32 {
|
||||||
block0(v0: i32):
|
block0(v0: i32):
|
||||||
v1 = ctz v0
|
v1 = ctz v0
|
||||||
@@ -140,6 +255,59 @@ block0(v0: i32):
|
|||||||
|
|
||||||
; check: stp fp, lr, [sp, #-16]!
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
; nextln: mov fp, sp
|
; nextln: mov fp, sp
|
||||||
|
; nextln: mov w0, w0
|
||||||
|
; nextln: lsr w1, w0, #1
|
||||||
|
; nextln: and x1, x1, #6148914691236517205
|
||||||
|
; nextln: sub x1, x0, x1
|
||||||
|
; nextln: and x0, x1, #3689348814741910323
|
||||||
|
; nextln: lsr x1, x1, #2
|
||||||
|
; nextln: and x1, x1, #3689348814741910323
|
||||||
|
; nextln: add x0, x1, x0
|
||||||
|
; nextln: add x0, x0, x0, LSR 4
|
||||||
|
; nextln: and x0, x0, #1085102592571150095
|
||||||
|
; nextln: add x0, x0, x0, LSL 8
|
||||||
|
; nextln: add x0, x0, x0, LSL 16
|
||||||
|
; nextln: add x0, x0, x0, LSL 32
|
||||||
|
; nextln: lsr x0, x0, #56
|
||||||
|
; nextln: mov sp, fp
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %d(i16) -> i16 {
|
||||||
|
block0(v0: i16):
|
||||||
|
v1 = popcnt v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: uxth x0, w0
|
||||||
|
; nextln: lsr w1, w0, #1
|
||||||
|
; nextln: and x1, x1, #6148914691236517205
|
||||||
|
; nextln: sub x1, x0, x1
|
||||||
|
; nextln: and x0, x1, #3689348814741910323
|
||||||
|
; nextln: lsr x1, x1, #2
|
||||||
|
; nextln: and x1, x1, #3689348814741910323
|
||||||
|
; nextln: add x0, x1, x0
|
||||||
|
; nextln: add x0, x0, x0, LSR 4
|
||||||
|
; nextln: and x0, x0, #1085102592571150095
|
||||||
|
; nextln: add x0, x0, x0, LSL 8
|
||||||
|
; nextln: add x0, x0, x0, LSL 16
|
||||||
|
; nextln: add x0, x0, x0, LSL 32
|
||||||
|
; nextln: lsr x0, x0, #56
|
||||||
|
; nextln: mov sp, fp
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %d(i8) -> i8 {
|
||||||
|
block0(v0: i8):
|
||||||
|
v1 = popcnt v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: uxtb x0, w0
|
||||||
; nextln: lsr w1, w0, #1
|
; nextln: lsr w1, w0, #1
|
||||||
; nextln: and x1, x1, #6148914691236517205
|
; nextln: and x1, x1, #6148914691236517205
|
||||||
; nextln: sub x1, x0, x1
|
; nextln: sub x1, x0, x1
|
||||||
|
|||||||
Reference in New Issue
Block a user