cranelift x64: use the POPCNT instruction for Popcount when it's available;
This commit is contained in:
@@ -406,6 +406,8 @@ pub enum UnaryRmROpcode {
|
|||||||
Lzcnt,
|
Lzcnt,
|
||||||
/// Counts trailing zeroes (Trailing Zero CouNT).
|
/// Counts trailing zeroes (Trailing Zero CouNT).
|
||||||
Tzcnt,
|
Tzcnt,
|
||||||
|
/// Counts the number of ones (POPulation CouNT).
|
||||||
|
Popcnt,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Debug for UnaryRmROpcode {
|
impl fmt::Debug for UnaryRmROpcode {
|
||||||
@@ -415,6 +417,7 @@ impl fmt::Debug for UnaryRmROpcode {
|
|||||||
UnaryRmROpcode::Bsf => write!(fmt, "bsf"),
|
UnaryRmROpcode::Bsf => write!(fmt, "bsf"),
|
||||||
UnaryRmROpcode::Lzcnt => write!(fmt, "lzcnt"),
|
UnaryRmROpcode::Lzcnt => write!(fmt, "lzcnt"),
|
||||||
UnaryRmROpcode::Tzcnt => write!(fmt, "tzcnt"),
|
UnaryRmROpcode::Tzcnt => write!(fmt, "tzcnt"),
|
||||||
|
UnaryRmROpcode::Popcnt => write!(fmt, "popcnt"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -677,23 +677,25 @@ pub(crate) fn emit(
|
|||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
use UnaryRmROpcode::*;
|
||||||
let prefix = match size {
|
let prefix = match size {
|
||||||
2 => match op {
|
2 => match op {
|
||||||
UnaryRmROpcode::Bsr | UnaryRmROpcode::Bsf => LegacyPrefixes::_66,
|
Bsr | Bsf => LegacyPrefixes::_66,
|
||||||
UnaryRmROpcode::Lzcnt | UnaryRmROpcode::Tzcnt => LegacyPrefixes::_66F3,
|
Lzcnt | Tzcnt | Popcnt => LegacyPrefixes::_66F3,
|
||||||
},
|
},
|
||||||
4 | 8 => match op {
|
4 | 8 => match op {
|
||||||
UnaryRmROpcode::Bsr | UnaryRmROpcode::Bsf => LegacyPrefixes::None,
|
Bsr | Bsf => LegacyPrefixes::None,
|
||||||
UnaryRmROpcode::Lzcnt | UnaryRmROpcode::Tzcnt => LegacyPrefixes::_F3,
|
Lzcnt | Tzcnt | Popcnt => LegacyPrefixes::_F3,
|
||||||
},
|
},
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
};
|
};
|
||||||
|
|
||||||
let (opcode, num_opcodes) = match op {
|
let (opcode, num_opcodes) = match op {
|
||||||
UnaryRmROpcode::Bsr => (0x0fbd, 2),
|
Bsr => (0x0fbd, 2),
|
||||||
UnaryRmROpcode::Bsf => (0x0fbc, 2),
|
Bsf => (0x0fbc, 2),
|
||||||
UnaryRmROpcode::Lzcnt => (0x0fbd, 2),
|
Lzcnt => (0x0fbd, 2),
|
||||||
UnaryRmROpcode::Tzcnt => (0x0fbc, 2),
|
Tzcnt => (0x0fbc, 2),
|
||||||
|
Popcnt => (0x0fb8, 2),
|
||||||
};
|
};
|
||||||
|
|
||||||
match src {
|
match src {
|
||||||
|
|||||||
@@ -2410,15 +2410,69 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
Opcode::Popcnt => {
|
Opcode::Popcnt => {
|
||||||
// TODO when the x86 flags have use_popcnt, we can use the popcnt instruction.
|
|
||||||
|
|
||||||
let (ext_spec, ty) = match ctx.input_ty(insn, 0) {
|
let (ext_spec, ty) = match ctx.input_ty(insn, 0) {
|
||||||
types::I8 | types::I16 => (Some(ExtSpec::ZeroExtendTo32), types::I32),
|
types::I8 | types::I16 => (Some(ExtSpec::ZeroExtendTo32), types::I32),
|
||||||
a if a == types::I32 || a == types::I64 => (None, a),
|
a if a == types::I32 || a == types::I64 || a == types::I128 => (None, a),
|
||||||
types::I128 => (None, types::I128),
|
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
if isa_flags.use_popcnt() {
|
||||||
|
match ty {
|
||||||
|
types::I32 | types::I64 => {
|
||||||
|
let src = input_to_reg_mem(ctx, inputs[0]);
|
||||||
|
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||||
|
ctx.emit(Inst::unary_rm_r(
|
||||||
|
ty.bytes() as u8,
|
||||||
|
UnaryRmROpcode::Popcnt,
|
||||||
|
src,
|
||||||
|
dst,
|
||||||
|
));
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
types::I128 => {
|
||||||
|
// The number of ones in a 128-bits value is the plain sum of the number of
|
||||||
|
// ones in its low and high parts. No risk of overflow here.
|
||||||
|
let dsts = get_output_reg(ctx, outputs[0]);
|
||||||
|
let dst = dsts.regs()[0];
|
||||||
|
let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap();
|
||||||
|
let srcs = put_input_in_regs(ctx, inputs[0]);
|
||||||
|
let src_lo = srcs.regs()[0];
|
||||||
|
let src_hi = srcs.regs()[1];
|
||||||
|
|
||||||
|
ctx.emit(Inst::unary_rm_r(
|
||||||
|
8,
|
||||||
|
UnaryRmROpcode::Popcnt,
|
||||||
|
RegMem::reg(src_lo),
|
||||||
|
dst,
|
||||||
|
));
|
||||||
|
ctx.emit(Inst::unary_rm_r(
|
||||||
|
8,
|
||||||
|
UnaryRmROpcode::Popcnt,
|
||||||
|
RegMem::reg(src_hi),
|
||||||
|
tmp,
|
||||||
|
));
|
||||||
|
ctx.emit(Inst::alu_rmi_r(
|
||||||
|
true,
|
||||||
|
AluRmiROpcode::Add,
|
||||||
|
RegMemImm::reg(tmp.to_reg()),
|
||||||
|
dst,
|
||||||
|
));
|
||||||
|
|
||||||
|
// Zero the result's high component.
|
||||||
|
ctx.emit(Inst::alu_rmi_r(
|
||||||
|
true,
|
||||||
|
AluRmiROpcode::Xor,
|
||||||
|
RegMemImm::reg(dsts.regs()[1].to_reg()),
|
||||||
|
dsts.regs()[1],
|
||||||
|
));
|
||||||
|
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let (srcs, ty): (SmallVec<[RegMem; 2]>, Type) = if let Some(ext_spec) = ext_spec {
|
let (srcs, ty): (SmallVec<[RegMem; 2]>, Type) = if let Some(ext_spec) = ext_spec {
|
||||||
(
|
(
|
||||||
smallvec![RegMem::reg(extend_input_to_reg(ctx, inputs[0], ext_spec))],
|
smallvec![RegMem::reg(extend_input_to_reg(ctx, inputs[0], ext_spec))],
|
||||||
|
|||||||
31
cranelift/filetests/filetests/isa/x64/popcnt-use-popcnt.clif
Normal file
31
cranelift/filetests/filetests/isa/x64/popcnt-use-popcnt.clif
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
test compile
|
||||||
|
target x86_64 has_popcnt has_sse42
|
||||||
|
feature "experimental_x64"
|
||||||
|
|
||||||
|
function %popcnt(i64) -> i64 {
|
||||||
|
block0(v0: i64):
|
||||||
|
v1 = popcnt v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: pushq %rbp
|
||||||
|
; check: movq %rsp, %rbp
|
||||||
|
; check: popcntq %rdi, %rsi
|
||||||
|
; check: movq %rsi, %rax
|
||||||
|
; check: movq %rbp, %rsp
|
||||||
|
; check: popq %rbp
|
||||||
|
; check: ret
|
||||||
|
|
||||||
|
function %popcnt(i32) -> i32 {
|
||||||
|
block0(v0: i32):
|
||||||
|
v1 = popcnt v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: pushq %rbp
|
||||||
|
; check: movq %rsp, %rbp
|
||||||
|
; check: popcntl %edi, %esi
|
||||||
|
; check: movq %rsi, %rax
|
||||||
|
; check: movq %rbp, %rsp
|
||||||
|
; check: popq %rbp
|
||||||
|
; check: ret
|
||||||
Reference in New Issue
Block a user