Use andn for band_not when bmi1 is present (#5701)
We can use the andn instruction for the lowering of band_not on x64 when bmi1 is available.
This commit is contained in:
@@ -26,6 +26,15 @@
|
||||
(src1_dst SyntheticAmode)
|
||||
(src2 Gpr))
|
||||
|
||||
;; Integer arithmetic binary op that relies on the VEX prefix.
|
||||
;; NOTE: we don't currently support emitting VEX instructions with memory
|
||||
;; arguments, so `src2` is artificially constrained to be a Gpr.
|
||||
(AluRmRVex (size OperandSize)
|
||||
(op AluRmROpcode)
|
||||
(src1 Gpr)
|
||||
(src2 Gpr)
|
||||
(dst WritableGpr))
|
||||
|
||||
;; Instructions on general-purpose registers that only read src and
|
||||
;; defines dst (dst is not modified). `bsr`, etc.
|
||||
(UnaryRmR (size OperandSize) ;; 2, 4, or 8
|
||||
@@ -586,6 +595,9 @@
|
||||
Xor
|
||||
Mul))
|
||||
|
||||
(type AluRmROpcode extern
|
||||
(enum Andn))
|
||||
|
||||
(type UnaryRmROpcode extern
|
||||
(enum Bsr
|
||||
Bsf
|
||||
@@ -1837,6 +1849,18 @@
|
||||
src1
|
||||
src2))
|
||||
|
||||
;; Helper for emitting `MInst.AluRmRVex` instructions.
|
||||
(decl alu_rm_r_vex (Type AluRmROpcode Gpr Gpr) Gpr)
|
||||
(rule (alu_rm_r_vex ty opcode src1 src2)
|
||||
(let ((dst WritableGpr (temp_writable_gpr))
|
||||
(size OperandSize (operand_size_of_type_32_64 ty))
|
||||
(_ Unit (emit (MInst.AluRmRVex size opcode src1 src2 dst))))
|
||||
dst))
|
||||
|
||||
(decl x64_andn (Type Gpr Gpr) Gpr)
|
||||
(rule (x64_andn ty src1 src2)
|
||||
(alu_rm_r_vex ty (AluRmROpcode.Andn) src1 src2))
|
||||
|
||||
;; Helper for emitting immediates with an `i64` value. Note that
|
||||
;; integer constants in ISLE are always parsed as `i128`s; this enables
|
||||
;; negative numbers to be used as immediates.
|
||||
|
||||
@@ -745,7 +745,7 @@ impl PrettyPrint for RegMem {
|
||||
}
|
||||
}
|
||||
|
||||
/// Some basic ALU operations. TODO: maybe add Adc, Sbb.
|
||||
/// Some basic ALU operations.
|
||||
#[derive(Copy, Clone, PartialEq)]
|
||||
pub enum AluRmiROpcode {
|
||||
/// Add operation.
|
||||
@@ -788,6 +788,36 @@ impl fmt::Display for AluRmiROpcode {
|
||||
}
|
||||
}
|
||||
|
||||
/// ALU operations that don't accept intermediates.
|
||||
#[derive(Copy, Clone, PartialEq)]
|
||||
pub enum AluRmROpcode {
|
||||
/// And with negated second operand.
|
||||
Andn,
|
||||
}
|
||||
|
||||
impl AluRmROpcode {
|
||||
pub(crate) fn available_from(&self) -> SmallVec<[InstructionSet; 2]> {
|
||||
match self {
|
||||
AluRmROpcode::Andn => smallvec![InstructionSet::BMI1],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for AluRmROpcode {
|
||||
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
||||
let name = match self {
|
||||
AluRmROpcode::Andn => "andn",
|
||||
};
|
||||
write!(fmt, "{}", name)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for AluRmROpcode {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fmt::Debug::fmt(self, f)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, PartialEq)]
|
||||
/// Unary operations requiring register or memory and register operands.
|
||||
pub enum UnaryRmROpcode {
|
||||
|
||||
@@ -283,6 +283,40 @@ pub(crate) fn emit(
|
||||
);
|
||||
}
|
||||
|
||||
Inst::AluRmRVex {
|
||||
size,
|
||||
op,
|
||||
dst,
|
||||
src1,
|
||||
src2,
|
||||
} => {
|
||||
use AluRmROpcode::*;
|
||||
let dst = allocs.next(dst.to_reg().to_reg());
|
||||
let src1 = allocs.next(src1.to_reg());
|
||||
let src2 = allocs.next(src2.to_reg());
|
||||
|
||||
let w = match size {
|
||||
OperandSize::Size32 => false,
|
||||
OperandSize::Size64 => true,
|
||||
|
||||
// the other cases would be rejected by isle constructors
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let opcode = match op {
|
||||
Andn => 0xf2,
|
||||
};
|
||||
|
||||
VexInstruction::new()
|
||||
.map(OpcodeMap::_0F38)
|
||||
.w(w)
|
||||
.reg(dst.to_real_reg().unwrap().hw_enc())
|
||||
.vvvv(src1.to_real_reg().unwrap().hw_enc())
|
||||
.rm(src2.to_real_reg().unwrap().hw_enc())
|
||||
.opcode(opcode)
|
||||
.encode(sink);
|
||||
}
|
||||
|
||||
Inst::UnaryRmR { size, op, src, dst } => {
|
||||
let dst = allocs.next(dst.to_reg().to_reg());
|
||||
let rex_flags = RexFlags::from(*size);
|
||||
|
||||
@@ -124,6 +124,7 @@ impl Inst {
|
||||
| Inst::Unwind { .. }
|
||||
| Inst::DummyUse { .. } => smallvec![],
|
||||
|
||||
Inst::AluRmRVex { op, .. } => op.available_from(),
|
||||
Inst::UnaryRmR { op, .. } => op.available_from(),
|
||||
|
||||
// These use dynamic SSE opcodes.
|
||||
@@ -747,6 +748,25 @@ impl PrettyPrint for Inst {
|
||||
src1_dst,
|
||||
)
|
||||
}
|
||||
Inst::AluRmRVex {
|
||||
size,
|
||||
op,
|
||||
src1,
|
||||
src2,
|
||||
dst,
|
||||
} => {
|
||||
let size_bytes = size.to_bytes();
|
||||
let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes(), allocs);
|
||||
let src1 = pretty_print_reg(src1.to_reg(), size_bytes, allocs);
|
||||
let src2 = pretty_print_reg(src2.to_reg(), size_bytes, allocs);
|
||||
format!(
|
||||
"{} {}, {}, {}",
|
||||
ljustify2(op.to_string(), String::new()),
|
||||
dst,
|
||||
src1,
|
||||
src2,
|
||||
)
|
||||
}
|
||||
Inst::UnaryRmR { src, dst, op, size } => {
|
||||
let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes(), allocs);
|
||||
let src = src.pretty_print(size.to_bytes(), allocs);
|
||||
@@ -1754,6 +1774,13 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
|
||||
collector.reg_use(src2.to_reg());
|
||||
src1_dst.get_operands(collector);
|
||||
}
|
||||
Inst::AluRmRVex {
|
||||
src1, src2, dst, ..
|
||||
} => {
|
||||
collector.reg_def(dst.to_writable_reg());
|
||||
collector.reg_use(src1.to_reg());
|
||||
collector.reg_use(src2.to_reg());
|
||||
}
|
||||
Inst::Not { src, dst, .. } => {
|
||||
collector.reg_use(src.to_reg());
|
||||
collector.reg_reuse_def(dst.to_writable_reg(), 0);
|
||||
|
||||
@@ -1103,12 +1103,17 @@
|
||||
(sse_and_not ty y x))
|
||||
|
||||
|
||||
(rule 1 (lower (has_type ty (band_not x y)))
|
||||
(rule 1 (lower (has_type ty @ (use_bmi1 $false) (band_not x y)))
|
||||
(if (ty_int_ref_scalar_64 ty))
|
||||
(x64_and ty
|
||||
x
|
||||
(x64_not ty y)))
|
||||
|
||||
(rule 1 (lower (has_type ty @ (use_bmi1 $true) (band_not x y)))
|
||||
(if (ty_int_ref_scalar_64 ty))
|
||||
;; the first argument is the one that gets inverted with andn
|
||||
(x64_andn ty y x))
|
||||
|
||||
|
||||
;;;; Rules for `bxor_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
|
||||
17
cranelift/filetests/filetests/isa/x64/band_not_bmi1.clif
Normal file
17
cranelift/filetests/filetests/isa/x64/band_not_bmi1.clif
Normal file
@@ -0,0 +1,17 @@
|
||||
test compile precise-output
|
||||
target x86_64 has_bmi1
|
||||
|
||||
function %f1(i8, i8) -> i8 {
|
||||
block0(v0: i8, v1: i8):
|
||||
v2 = band_not v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; andn %eax, %esi, %edi
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
test interpret
|
||||
test run
|
||||
target x86_64
|
||||
target x86_64 has_bmi1
|
||||
target aarch64
|
||||
target s390x
|
||||
|
||||
@@ -65,4 +66,4 @@ block0(v0: i8, v1: i8):
|
||||
|
||||
; run: %bxor_not(0xFF, 0) == 0
|
||||
; run: %bxor_not(0x55, 0xFF) == 85
|
||||
; run: %bxor_not(0, 0) == -1
|
||||
; run: %bxor_not(0, 0) == -1
|
||||
|
||||
Reference in New Issue
Block a user