diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index d18e8682c3..213307cc43 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -26,6 +26,15 @@ (src1_dst SyntheticAmode) (src2 Gpr)) + ;; Integer arithmetic binary op that relies on the VEX prefix. + ;; NOTE: we don't currently support emitting VEX instructions with memory + ;; arguments, so `src2` is artificially constrained to be a Gpr. + (AluRmRVex (size OperandSize) + (op AluRmROpcode) + (src1 Gpr) + (src2 Gpr) + (dst WritableGpr)) + ;; Instructions on general-purpose registers that only read src and ;; defines dst (dst is not modified). `bsr`, etc. (UnaryRmR (size OperandSize) ;; 2, 4, or 8 @@ -586,6 +595,9 @@ Xor Mul)) +(type AluRmROpcode extern + (enum Andn)) + (type UnaryRmROpcode extern (enum Bsr Bsf @@ -1837,6 +1849,18 @@ src1 src2)) +;; Helper for emitting `MInst.AluRmRVex` instructions. +(decl alu_rm_r_vex (Type AluRmROpcode Gpr Gpr) Gpr) +(rule (alu_rm_r_vex ty opcode src1 src2) + (let ((dst WritableGpr (temp_writable_gpr)) + (size OperandSize (operand_size_of_type_32_64 ty)) + (_ Unit (emit (MInst.AluRmRVex size opcode src1 src2 dst)))) + dst)) + +(decl x64_andn (Type Gpr Gpr) Gpr) +(rule (x64_andn ty src1 src2) + (alu_rm_r_vex ty (AluRmROpcode.Andn) src1 src2)) + ;; Helper for emitting immediates with an `i64` value. Note that ;; integer constants in ISLE are always parsed as `i128`s; this enables ;; negative numbers to be used as immediates. diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index 0d202082c7..76787bfb60 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -745,7 +745,7 @@ impl PrettyPrint for RegMem { } } -/// Some basic ALU operations. TODO: maybe add Adc, Sbb. +/// Some basic ALU operations. #[derive(Copy, Clone, PartialEq)] pub enum AluRmiROpcode { /// Add operation. @@ -788,6 +788,36 @@ impl fmt::Display for AluRmiROpcode { } } +/// ALU operations that don't accept intermediates. +#[derive(Copy, Clone, PartialEq)] +pub enum AluRmROpcode { + /// And with negated second operand. + Andn, +} + +impl AluRmROpcode { + pub(crate) fn available_from(&self) -> SmallVec<[InstructionSet; 2]> { + match self { + AluRmROpcode::Andn => smallvec![InstructionSet::BMI1], + } + } +} + +impl fmt::Debug for AluRmROpcode { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + let name = match self { + AluRmROpcode::Andn => "andn", + }; + write!(fmt, "{}", name) + } +} + +impl fmt::Display for AluRmROpcode { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Debug::fmt(self, f) + } +} + #[derive(Clone, PartialEq)] /// Unary operations requiring register or memory and register operands. pub enum UnaryRmROpcode { diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index f5b002e450..830565eff6 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -283,6 +283,40 @@ pub(crate) fn emit( ); } + Inst::AluRmRVex { + size, + op, + dst, + src1, + src2, + } => { + use AluRmROpcode::*; + let dst = allocs.next(dst.to_reg().to_reg()); + let src1 = allocs.next(src1.to_reg()); + let src2 = allocs.next(src2.to_reg()); + + let w = match size { + OperandSize::Size32 => false, + OperandSize::Size64 => true, + + // the other cases would be rejected by isle constructors + _ => unreachable!(), + }; + + let opcode = match op { + Andn => 0xf2, + }; + + VexInstruction::new() + .map(OpcodeMap::_0F38) + .w(w) + .reg(dst.to_real_reg().unwrap().hw_enc()) + .vvvv(src1.to_real_reg().unwrap().hw_enc()) + .rm(src2.to_real_reg().unwrap().hw_enc()) + .opcode(opcode) + .encode(sink); + } + Inst::UnaryRmR { size, op, src, dst } => { let dst = allocs.next(dst.to_reg().to_reg()); let rex_flags = RexFlags::from(*size); diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 0d1bf0a0be..518b544cf7 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -124,6 +124,7 @@ impl Inst { | Inst::Unwind { .. } | Inst::DummyUse { .. } => smallvec![], + Inst::AluRmRVex { op, .. } => op.available_from(), Inst::UnaryRmR { op, .. } => op.available_from(), // These use dynamic SSE opcodes. @@ -747,6 +748,25 @@ impl PrettyPrint for Inst { src1_dst, ) } + Inst::AluRmRVex { + size, + op, + src1, + src2, + dst, + } => { + let size_bytes = size.to_bytes(); + let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes(), allocs); + let src1 = pretty_print_reg(src1.to_reg(), size_bytes, allocs); + let src2 = pretty_print_reg(src2.to_reg(), size_bytes, allocs); + format!( + "{} {}, {}, {}", + ljustify2(op.to_string(), String::new()), + dst, + src1, + src2, + ) + } Inst::UnaryRmR { src, dst, op, size } => { let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes(), allocs); let src = src.pretty_print(size.to_bytes(), allocs); @@ -1754,6 +1774,13 @@ fn x64_get_operands VReg>(inst: &Inst, collector: &mut OperandCol collector.reg_use(src2.to_reg()); src1_dst.get_operands(collector); } + Inst::AluRmRVex { + src1, src2, dst, .. + } => { + collector.reg_def(dst.to_writable_reg()); + collector.reg_use(src1.to_reg()); + collector.reg_use(src2.to_reg()); + } Inst::Not { src, dst, .. } => { collector.reg_use(src.to_reg()); collector.reg_reuse_def(dst.to_writable_reg(), 0); diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index 9b3553f4ed..0f01e9cc53 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -1103,12 +1103,17 @@ (sse_and_not ty y x)) -(rule 1 (lower (has_type ty (band_not x y))) +(rule 1 (lower (has_type ty @ (use_bmi1 $false) (band_not x y))) (if (ty_int_ref_scalar_64 ty)) (x64_and ty x (x64_not ty y))) +(rule 1 (lower (has_type ty @ (use_bmi1 $true) (band_not x y))) + (if (ty_int_ref_scalar_64 ty)) + ;; the first argument is the one that gets inverted with andn + (x64_andn ty y x)) + ;;;; Rules for `bxor_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/cranelift/filetests/filetests/isa/x64/band_not_bmi1.clif b/cranelift/filetests/filetests/isa/x64/band_not_bmi1.clif new file mode 100644 index 0000000000..6c448f42bb --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/band_not_bmi1.clif @@ -0,0 +1,17 @@ +test compile precise-output +target x86_64 has_bmi1 + +function %f1(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = band_not v0, v1 + return v2 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; andn %eax, %esi, %edi +; movq %rbp, %rsp +; popq %rbp +; ret + diff --git a/cranelift/filetests/filetests/runtests/bnot.clif b/cranelift/filetests/filetests/runtests/bnot.clif index c29b525e44..19dfaa1bd8 100644 --- a/cranelift/filetests/filetests/runtests/bnot.clif +++ b/cranelift/filetests/filetests/runtests/bnot.clif @@ -1,6 +1,7 @@ test interpret test run target x86_64 +target x86_64 has_bmi1 target aarch64 target s390x @@ -65,4 +66,4 @@ block0(v0: i8, v1: i8): ; run: %bxor_not(0xFF, 0) == 0 ; run: %bxor_not(0x55, 0xFF) == 85 -; run: %bxor_not(0, 0) == -1 \ No newline at end of file +; run: %bxor_not(0, 0) == -1