cranelift x64: use the TZCNT instruction for Ctz when it's available;
This commit is contained in:
@@ -404,6 +404,8 @@ pub enum UnaryRmROpcode {
|
|||||||
Bsf,
|
Bsf,
|
||||||
/// Counts leading zeroes (Leading Zero CouNT).
|
/// Counts leading zeroes (Leading Zero CouNT).
|
||||||
Lzcnt,
|
Lzcnt,
|
||||||
|
/// Counts trailing zeroes (Trailing Zero CouNT).
|
||||||
|
Tzcnt,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Debug for UnaryRmROpcode {
|
impl fmt::Debug for UnaryRmROpcode {
|
||||||
@@ -412,6 +414,7 @@ impl fmt::Debug for UnaryRmROpcode {
|
|||||||
UnaryRmROpcode::Bsr => write!(fmt, "bsr"),
|
UnaryRmROpcode::Bsr => write!(fmt, "bsr"),
|
||||||
UnaryRmROpcode::Bsf => write!(fmt, "bsf"),
|
UnaryRmROpcode::Bsf => write!(fmt, "bsf"),
|
||||||
UnaryRmROpcode::Lzcnt => write!(fmt, "lzcnt"),
|
UnaryRmROpcode::Lzcnt => write!(fmt, "lzcnt"),
|
||||||
|
UnaryRmROpcode::Tzcnt => write!(fmt, "tzcnt"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -680,11 +680,11 @@ pub(crate) fn emit(
|
|||||||
let prefix = match size {
|
let prefix = match size {
|
||||||
2 => match op {
|
2 => match op {
|
||||||
UnaryRmROpcode::Bsr | UnaryRmROpcode::Bsf => LegacyPrefixes::_66,
|
UnaryRmROpcode::Bsr | UnaryRmROpcode::Bsf => LegacyPrefixes::_66,
|
||||||
UnaryRmROpcode::Lzcnt => LegacyPrefixes::_66F3,
|
UnaryRmROpcode::Lzcnt | UnaryRmROpcode::Tzcnt => LegacyPrefixes::_66F3,
|
||||||
},
|
},
|
||||||
4 | 8 => match op {
|
4 | 8 => match op {
|
||||||
UnaryRmROpcode::Bsr | UnaryRmROpcode::Bsf => LegacyPrefixes::None,
|
UnaryRmROpcode::Bsr | UnaryRmROpcode::Bsf => LegacyPrefixes::None,
|
||||||
UnaryRmROpcode::Lzcnt => LegacyPrefixes::_F3,
|
UnaryRmROpcode::Lzcnt | UnaryRmROpcode::Tzcnt => LegacyPrefixes::_F3,
|
||||||
},
|
},
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
};
|
};
|
||||||
@@ -693,6 +693,7 @@ pub(crate) fn emit(
|
|||||||
UnaryRmROpcode::Bsr => (0x0fbd, 2),
|
UnaryRmROpcode::Bsr => (0x0fbd, 2),
|
||||||
UnaryRmROpcode::Bsf => (0x0fbc, 2),
|
UnaryRmROpcode::Bsf => (0x0fbc, 2),
|
||||||
UnaryRmROpcode::Lzcnt => (0x0fbd, 2),
|
UnaryRmROpcode::Lzcnt => (0x0fbd, 2),
|
||||||
|
UnaryRmROpcode::Tzcnt => (0x0fbc, 2),
|
||||||
};
|
};
|
||||||
|
|
||||||
match src {
|
match src {
|
||||||
|
|||||||
@@ -2346,13 +2346,27 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
Opcode::Ctz => {
|
Opcode::Ctz => {
|
||||||
// TODO when the x86 flags have use_bmi1, we can use TZCNT.
|
let orig_ty = ctx.input_ty(insn, 0);
|
||||||
|
|
||||||
|
if isa_flags.use_bmi1() && (orig_ty == types::I32 || orig_ty == types::I64) {
|
||||||
|
// We can use a plain tzcnt instruction here. Note no special handling is required
|
||||||
|
// for zero inputs, because the machine instruction does what the CLIF expects for
|
||||||
|
// zero, i.e. it returns zero.
|
||||||
|
let src = input_to_reg_mem(ctx, inputs[0]);
|
||||||
|
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||||
|
ctx.emit(Inst::unary_rm_r(
|
||||||
|
orig_ty.bytes() as u8,
|
||||||
|
UnaryRmROpcode::Tzcnt,
|
||||||
|
src,
|
||||||
|
dst,
|
||||||
|
));
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
// General formula using bit-scan forward (BSF):
|
// General formula using bit-scan forward (BSF):
|
||||||
// bsf %src, %dst
|
// bsf %src, %dst
|
||||||
// mov $(size_bits), %tmp
|
// mov $(size_bits), %tmp
|
||||||
// cmovz %tmp, %dst
|
// cmovz %tmp, %dst
|
||||||
let orig_ty = ctx.input_ty(insn, 0);
|
|
||||||
if orig_ty == types::I128 {
|
if orig_ty == types::I128 {
|
||||||
// ctz src_lo, dst
|
// ctz src_lo, dst
|
||||||
// ctz src_hi, tmp1
|
// ctz src_hi, tmp1
|
||||||
|
|||||||
31
cranelift/filetests/filetests/isa/x64/ctz-bmi1.clif
Normal file
31
cranelift/filetests/filetests/isa/x64/ctz-bmi1.clif
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
test compile
|
||||||
|
target x86_64 has_bmi1
|
||||||
|
feature "experimental_x64"
|
||||||
|
|
||||||
|
function %ctz(i64) -> i64 {
|
||||||
|
block0(v0: i64):
|
||||||
|
v1 = ctz v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: pushq %rbp
|
||||||
|
; check: movq %rsp, %rbp
|
||||||
|
; check: tzcntq %rdi, %rsi
|
||||||
|
; check: movq %rsi, %rax
|
||||||
|
; check: movq %rbp, %rsp
|
||||||
|
; check: popq %rbp
|
||||||
|
; check: ret
|
||||||
|
|
||||||
|
function %ctz(i32) -> i32 {
|
||||||
|
block0(v0: i32):
|
||||||
|
v1 = ctz v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: pushq %rbp
|
||||||
|
; check: movq %rsp, %rbp
|
||||||
|
; check: tzcntl %edi, %esi
|
||||||
|
; check: movq %rsi, %rax
|
||||||
|
; check: movq %rbp, %rsp
|
||||||
|
; check: popq %rbp
|
||||||
|
; check: ret
|
||||||
Reference in New Issue
Block a user