diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index 88a3f1e6f9..71eaae8d96 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -548,9 +548,7 @@ And Or Xor - Mul - And8 - Or8)) + Mul)) (type UnaryRmROpcode extern (enum Bsr @@ -1074,6 +1072,13 @@ (decl cc_invert (CC) CC) (extern constructor cc_invert cc_invert) +(decl floatcc_inverse (FloatCC) FloatCC) +(extern constructor floatcc_inverse floatcc_inverse) + +;; Fails if the argument is not either CC.NZ or CC.Z. +(decl cc_nz_or_z (CC) CC) +(extern extractor cc_nz_or_z cc_nz_or_z) + (type AvxOpcode extern (enum Vfmadd213ps Vfmadd213pd)) @@ -3060,6 +3065,10 @@ (rule (jmp_known target) (SideEffectNoResult.Inst (MInst.JmpKnown target))) +(decl jmp_if (CC MachLabel) ConsumesFlags) +(rule (jmp_if cc taken) + (ConsumesFlags.ConsumesFlagsSideEffect (MInst.JmpIf cc taken))) + ;; Conditional jump based on the condition code. (decl jmp_cond (CC MachLabel MachLabel) ConsumesFlags) (rule (jmp_cond cc taken not_taken) @@ -3070,6 +3079,21 @@ (rule (jmp_cond_icmp (IcmpCondResult.Condition producer cc) taken not_taken) (with_flags_side_effect producer (jmp_cond cc taken not_taken))) +;; Conditional jump based on the result of an fcmp. +(decl jmp_cond_fcmp (FcmpCondResult MachLabel MachLabel) SideEffectNoResult) +(rule (jmp_cond_fcmp (FcmpCondResult.Condition producer cc) taken not_taken) + (with_flags_side_effect producer (jmp_cond cc taken not_taken))) +(rule (jmp_cond_fcmp (FcmpCondResult.AndCondition producer cc1 cc2) taken not_taken) + (with_flags_side_effect producer + (consumes_flags_concat + (jmp_if (cc_invert cc1) not_taken) + (jmp_cond (cc_invert cc2) not_taken taken)))) +(rule (jmp_cond_fcmp (FcmpCondResult.OrCondition producer cc1 cc2) taken not_taken) + (with_flags_side_effect producer + (consumes_flags_concat + (jmp_if cc1 taken) + (jmp_cond cc2 taken not_taken)))) + ;;;; Comparisons ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (type IcmpCondResult (enum (Condition (producer ProducesFlags) (cc CC)))) diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index d42398282d..99d88c0cc5 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -701,12 +701,6 @@ pub enum AluRmiROpcode { Xor, /// The signless, non-extending (N x N -> N, for N in {32,64}) variant. Mul, - /// 8-bit form of And. Handled separately as we don't have full 8-bit op - /// support (we just use wider instructions). Used only with some sequences - /// with SETcc. - And8, - /// 8-bit form of Or. - Or8, } impl fmt::Debug for AluRmiROpcode { @@ -720,8 +714,6 @@ impl fmt::Debug for AluRmiROpcode { AluRmiROpcode::Or => "or", AluRmiROpcode::Xor => "xor", AluRmiROpcode::Mul => "imul", - AluRmiROpcode::And8 => "and", - AluRmiROpcode::Or8 => "or", }; write!(fmt, "{}", name) } @@ -733,16 +725,6 @@ impl fmt::Display for AluRmiROpcode { } } -impl AluRmiROpcode { - /// Is this a special-cased 8-bit ALU op? - pub fn is_8bit(self) -> bool { - match self { - AluRmiROpcode::And8 | AluRmiROpcode::Or8 => true, - _ => false, - } - } -} - #[derive(Clone, PartialEq)] pub enum UnaryRmROpcode { /// Bit-scan reverse. @@ -1704,32 +1686,6 @@ impl CC { } } - pub(crate) fn from_floatcc(floatcc: FloatCC) -> Self { - match floatcc { - FloatCC::Ordered => CC::NP, - FloatCC::Unordered => CC::P, - // Alias for NE - FloatCC::OrderedNotEqual => CC::NZ, - // Alias for E - FloatCC::UnorderedOrEqual => CC::Z, - // Alias for A - FloatCC::GreaterThan => CC::NBE, - // Alias for AE - FloatCC::GreaterThanOrEqual => CC::NB, - FloatCC::UnorderedOrLessThan => CC::B, - FloatCC::UnorderedOrLessThanOrEqual => CC::BE, - FloatCC::Equal - | FloatCC::NotEqual - | FloatCC::LessThan - | FloatCC::LessThanOrEqual - | FloatCC::UnorderedOrGreaterThan - | FloatCC::UnorderedOrGreaterThanOrEqual => panic!( - "{:?} can't be lowered to a CC code; treat as special case.", - floatcc - ), - } - } - pub(crate) fn get_enc(self) -> u8 { self as u8 } diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 03acc88abf..6f542d343d 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -158,7 +158,7 @@ pub(crate) fn emit( (reg_g, src2) }; - let mut rex = RexFlags::from(*size); + let rex = RexFlags::from(*size); if *op == AluRmiROpcode::Mul { // We kinda freeloaded Mul into RMI_R_Op, but it doesn't fit the usual pattern, so // we have to special-case it. @@ -191,26 +191,19 @@ pub(crate) fn emit( } } } else { - let (opcode_r, opcode_m, subopcode_i, is_8bit) = match op { - AluRmiROpcode::Add => (0x01, 0x03, 0, false), - AluRmiROpcode::Adc => (0x11, 0x03, 0, false), - AluRmiROpcode::Sub => (0x29, 0x2B, 5, false), - AluRmiROpcode::Sbb => (0x19, 0x2B, 5, false), - AluRmiROpcode::And => (0x21, 0x23, 4, false), - AluRmiROpcode::Or => (0x09, 0x0B, 1, false), - AluRmiROpcode::Xor => (0x31, 0x33, 6, false), - AluRmiROpcode::And8 => (0x20, 0x22, 4, true), - AluRmiROpcode::Or8 => (0x08, 0x0A, 1, true), + let (opcode_r, opcode_m, subopcode_i) = match op { + AluRmiROpcode::Add => (0x01, 0x03, 0), + AluRmiROpcode::Adc => (0x11, 0x03, 0), + AluRmiROpcode::Sub => (0x29, 0x2B, 5), + AluRmiROpcode::Sbb => (0x19, 0x2B, 5), + AluRmiROpcode::And => (0x21, 0x23, 4), + AluRmiROpcode::Or => (0x09, 0x0B, 1), + AluRmiROpcode::Xor => (0x31, 0x33, 6), AluRmiROpcode::Mul => panic!("unreachable"), }; - assert!(!(is_8bit && *size == OperandSize::Size64)); match src2 { RegMemImm::Reg { reg: reg_e } => { - if is_8bit { - rex.always_emit_if_8bit_needed(reg_e); - rex.always_emit_if_8bit_needed(reg_g); - } // GCC/llvm use the swapped operand encoding (viz., the R/RM vs RM/R // duality). Do this too, so as to be able to compare generated machine // code easily. @@ -227,9 +220,6 @@ pub(crate) fn emit( RegMemImm::Mem { addr } => { let amode = addr.finalize(state, sink); - if is_8bit { - rex.always_emit_if_8bit_needed(reg_g); - } // Here we revert to the "normal" G-E ordering. emit_std_reg_mem( sink, @@ -245,7 +235,6 @@ pub(crate) fn emit( } RegMemImm::Imm { simm32 } => { - assert!(!is_8bit); let use_imm8 = low8_will_sign_extend_to_32(simm32); let opcode = if use_imm8 { 0x83 } else { 0x81 }; // And also here we use the "normal" G-E ordering. diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index 6f2cf2c085..952fc1619e 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -1258,86 +1258,6 @@ fn test_x64_emit() { "4C09FA", "orq %rdx, %r15, %rdx", )); - insns.push(( - Inst::alu_rmi_r( - OperandSize::Size32, - AluRmiROpcode::And8, - RegMemImm::reg(r15), - w_rdx, - ), - "4420FA", - "andb %dl, %r15b, %dl", - )); - insns.push(( - Inst::alu_rmi_r( - OperandSize::Size32, - AluRmiROpcode::And8, - RegMemImm::reg(rax), - w_rsi, - ), - "4020C6", - "andb %sil, %al, %sil", - )); - insns.push(( - Inst::alu_rmi_r( - OperandSize::Size32, - AluRmiROpcode::And8, - RegMemImm::reg(rax), - w_rbx, - ), - "20C3", - "andb %bl, %al, %bl", - )); - insns.push(( - Inst::alu_rmi_r( - OperandSize::Size32, - AluRmiROpcode::And8, - RegMemImm::mem(Amode::imm_reg(0, rax)), - w_rbx, - ), - "2218", - "andb %bl, 0(%rax), %bl", - )); - insns.push(( - Inst::alu_rmi_r( - OperandSize::Size32, - AluRmiROpcode::Or8, - RegMemImm::reg(r15), - w_rdx, - ), - "4408FA", - "orb %dl, %r15b, %dl", - )); - insns.push(( - Inst::alu_rmi_r( - OperandSize::Size32, - AluRmiROpcode::Or8, - RegMemImm::reg(rax), - w_rsi, - ), - "4008C6", - "orb %sil, %al, %sil", - )); - insns.push(( - Inst::alu_rmi_r( - OperandSize::Size32, - AluRmiROpcode::Or8, - RegMemImm::reg(rax), - w_rbx, - ), - "08C3", - "orb %bl, %al, %bl", - )); - insns.push(( - Inst::alu_rmi_r( - OperandSize::Size32, - AluRmiROpcode::Or8, - RegMemImm::mem(Amode::imm_reg(0, rax)), - w_rbx, - ), - "0A18", - "orb %bl, 0(%rax), %bl", - )); insns.push(( Inst::alu_rmi_r( OperandSize::Size64, diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index eee8bbeddd..dbc58b40e5 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -729,10 +729,6 @@ impl Inst { Inst::JmpKnown { dst } } - pub(crate) fn jmp_if(cc: CC, taken: MachLabel) -> Inst { - Inst::JmpIf { cc, taken } - } - pub(crate) fn jmp_cond(cc: CC, taken: MachLabel, not_taken: MachLabel) -> Inst { Inst::JmpCond { cc, @@ -892,23 +888,15 @@ impl PrettyPrint for Inst { .to_string() } - fn suffix_lqb(size: OperandSize, is_8: bool) -> String { - match (size, is_8) { - (_, true) => "b", - (OperandSize::Size32, false) => "l", - (OperandSize::Size64, false) => "q", + fn suffix_lqb(size: OperandSize) -> String { + match size { + OperandSize::Size32 => "l", + OperandSize::Size64 => "q", _ => unreachable!(), } .to_string() } - fn size_lqb(size: OperandSize, is_8: bool) -> u8 { - if is_8 { - return 1; - } - size.to_bytes() - } - fn suffix_bwlq(size: OperandSize) -> String { match size { OperandSize::Size8 => "b".to_string(), @@ -922,11 +910,10 @@ impl PrettyPrint for Inst { Inst::Nop { len } => format!("{} len={}", ljustify("nop".to_string()), len), Inst::AluRmiR { size, op, dst, .. } if self.produces_const() => { - let dst = - pretty_print_reg(dst.to_reg().to_reg(), size_lqb(*size, op.is_8bit()), allocs); + let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes(), allocs); format!( "{} {}, {}, {}", - ljustify2(op.to_string(), suffix_lqb(*size, op.is_8bit())), + ljustify2(op.to_string(), suffix_lqb(*size)), dst, dst, dst @@ -939,13 +926,13 @@ impl PrettyPrint for Inst { src2, dst, } => { - let size_bytes = size_lqb(*size, op.is_8bit()); + let size_bytes = size.to_bytes(); let src1 = pretty_print_reg(src1.to_reg(), size_bytes, allocs); let dst = pretty_print_reg(dst.to_reg().to_reg(), size_bytes, allocs); let src2 = src2.pretty_print(size_bytes, allocs); format!( "{} {}, {}, {}", - ljustify2(op.to_string(), suffix_lqb(*size, op.is_8bit())), + ljustify2(op.to_string(), suffix_lqb(*size)), src1, src2, dst @@ -957,12 +944,12 @@ impl PrettyPrint for Inst { src1_dst, src2, } => { - let size_bytes = size_lqb(*size, op.is_8bit()); + let size_bytes = size.to_bytes(); let src2 = pretty_print_reg(src2.to_reg(), size_bytes, allocs); let src1_dst = src1_dst.pretty_print(size_bytes, allocs); format!( "{} {}, {}", - ljustify2(op.to_string(), suffix_lqb(*size, op.is_8bit())), + ljustify2(op.to_string(), suffix_lqb(*size)), src2, src1_dst, ) diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index 376746656d..12f7a65d18 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -2872,17 +2872,49 @@ (rule (lower_branch (brif cc (ifcmp a b) _ _) (two_targets taken not_taken)) (side_effect (jmp_cond_icmp (emit_cmp cc a b) taken not_taken))) -;; Rules for `brz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Rules for `brff` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower_branch (brff cc (ffcmp a b) _ _) (two_targets taken not_taken)) + (side_effect (jmp_cond_fcmp (emit_fcmp cc a b) taken not_taken))) + +;; Rules for `brz` and `brnz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower_branch (brz (icmp cc a b) _ _) (two_targets taken not_taken)) (let ((cmp IcmpCondResult (invert_icmp_cond_result (emit_cmp cc a b)))) (side_effect (jmp_cond_icmp cmp taken not_taken)))) -;; Rules for `brnz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(rule (lower_branch (brz (fcmp cc a b) _ _) (two_targets taken not_taken)) + (let ((cmp FcmpCondResult (emit_fcmp (floatcc_inverse cc) a b))) + (side_effect (jmp_cond_fcmp cmp taken not_taken)))) + +(rule (lower_branch (brz val @ (value_type $I128) _ _) (two_targets taken not_taken)) + (side_effect (jmp_cond_icmp (cmp_zero_i128 (CC.NZ) val) taken not_taken))) (rule (lower_branch (brnz (icmp cc a b) _ _) (two_targets taken not_taken)) (side_effect (jmp_cond_icmp (emit_cmp cc a b) taken not_taken))) +(rule (lower_branch (brnz (fcmp cc a b) _ _) (two_targets taken not_taken)) + (let ((cmp FcmpCondResult (emit_fcmp cc a b))) + (side_effect (jmp_cond_fcmp cmp taken not_taken)))) + +(rule (lower_branch (brnz val @ (value_type $I128) _ _) (two_targets taken not_taken)) + (side_effect (jmp_cond_icmp (cmp_zero_i128 (CC.Z) val) taken not_taken))) + +;; Compare an I128 value to zero, returning a flags result suitable for making a +;; jump decision. The comparison is implemented as `(hi == 0) && (low == 0)`, +;; and the result can be interpreted as follows +;; * CC.Z indicates that the value was non-zero, as one or both of the halves of +;; the value were non-zero +;; * CC.NZ indicates that both halves of the value were 0 +(decl cmp_zero_i128 (CC ValueRegs) IcmpCondResult) +(rule (cmp_zero_i128 (cc_nz_or_z cc) val) + (let ((lo Gpr (value_regs_get_gpr val 0)) + (hi Gpr (value_regs_get_gpr val 1)) + (lo_z Gpr (with_flags_reg (x64_cmp (OperandSize.Size64) (RegMemImm.Imm 0) lo) + (x64_setcc (CC.Z)))) + (hi_z Gpr (with_flags_reg (x64_cmp (OperandSize.Size64) (RegMemImm.Imm 0) hi) + (x64_setcc (CC.Z))))) + (icmp_cond_result (x64_test (OperandSize.Size8) lo_z hi_z) cc))) ;; Rules for `bricmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index dc7335fbc3..26f1bf679b 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -5,7 +5,7 @@ pub(super) mod isle; use crate::data_value::DataValue; use crate::ir::{ - condcodes::{CondCode, FloatCC, IntCC}, + condcodes::{FloatCC, IntCC}, types, ExternalName, Inst as IRInst, InstructionData, LibCall, Opcode, Type, }; use crate::isa::x64::abi::*; @@ -478,100 +478,6 @@ fn emit_cmp>(ctx: &mut C, insn: IRInst, cc: IntCC) -> IntC } } -/// A specification for a fcmp emission. -enum FcmpSpec { - /// Normal flow. - Normal, - - /// Avoid emitting Equal at all costs by inverting it to NotEqual, and indicate when that - /// happens with `InvertedEqualOrConditions`. - /// - /// This is useful in contexts where it is hard/inefficient to produce a single instruction (or - /// sequence of instructions) that check for an "AND" combination of condition codes; see for - /// instance lowering of Select. - #[allow(dead_code)] - InvertEqual, -} - -/// This explains how to interpret the results of an fcmp instruction. -enum FcmpCondResult { - /// The given condition code must be set. - Condition(CC), - - /// Both condition codes must be set. - AndConditions(CC, CC), - - /// Either of the conditions codes must be set. - OrConditions(CC, CC), - - /// The associated spec was set to `FcmpSpec::InvertEqual` and Equal has been inverted. Either - /// of the condition codes must be set, and the user must invert meaning of analyzing the - /// condition code results. When the spec is set to `FcmpSpec::Normal`, then this case can't be - /// reached. - InvertedEqualOrConditions(CC, CC), -} - -/// Emits a float comparison instruction. -/// -/// Note: make sure that there are no instructions modifying the flags between a call to this -/// function and the use of the flags! -fn emit_fcmp>( - ctx: &mut C, - insn: IRInst, - mut cond_code: FloatCC, - spec: FcmpSpec, -) -> FcmpCondResult { - let (flip_operands, inverted_equal) = match cond_code { - FloatCC::LessThan - | FloatCC::LessThanOrEqual - | FloatCC::UnorderedOrGreaterThan - | FloatCC::UnorderedOrGreaterThanOrEqual => { - cond_code = cond_code.reverse(); - (true, false) - } - FloatCC::Equal => { - let inverted_equal = match spec { - FcmpSpec::Normal => false, - FcmpSpec::InvertEqual => { - cond_code = FloatCC::NotEqual; // same as .inverse() - true - } - }; - (false, inverted_equal) - } - _ => (false, false), - }; - - // The only valid CC constructed with `from_floatcc` can be put in the flag - // register with a direct float comparison; do this here. - let op = match ctx.input_ty(insn, 0) { - types::F32 => SseOpcode::Ucomiss, - types::F64 => SseOpcode::Ucomisd, - _ => panic!("Bad input type to Fcmp"), - }; - - let inputs = &[InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }]; - let (lhs_input, rhs_input) = if flip_operands { - (inputs[1], inputs[0]) - } else { - (inputs[0], inputs[1]) - }; - let lhs = put_input_in_reg(ctx, lhs_input); - let rhs = input_to_reg_mem(ctx, rhs_input); - ctx.emit(Inst::xmm_cmp_rm_r(op, rhs, lhs)); - - let cond_result = match cond_code { - FloatCC::Equal => FcmpCondResult::AndConditions(CC::NP, CC::Z), - FloatCC::NotEqual if inverted_equal => { - FcmpCondResult::InvertedEqualOrConditions(CC::P, CC::NZ) - } - FloatCC::NotEqual if !inverted_equal => FcmpCondResult::OrConditions(CC::P, CC::NZ), - _ => FcmpCondResult::Condition(CC::from_floatcc(cond_code)), - }; - - cond_result -} - fn emit_vm_call>( ctx: &mut C, flags: &Flags, @@ -2878,61 +2784,10 @@ impl LowerBackend for X64Backend { if let Some(_icmp) = matches_input(ctx, flag_input, Opcode::Icmp) { implemented_in_isle(ctx) - } else if let Some(fcmp) = matches_input(ctx, flag_input, Opcode::Fcmp) { - let cond_code = ctx.data(fcmp).fp_cond_code().unwrap(); - let cond_code = if op0 == Opcode::Brz { - cond_code.inverse() - } else { - cond_code - }; - match emit_fcmp(ctx, fcmp, cond_code, FcmpSpec::Normal) { - FcmpCondResult::Condition(cc) => { - ctx.emit(Inst::jmp_cond(cc, taken, not_taken)); - } - FcmpCondResult::AndConditions(cc1, cc2) => { - ctx.emit(Inst::jmp_if(cc1.invert(), not_taken)); - ctx.emit(Inst::jmp_cond(cc2.invert(), not_taken, taken)); - } - FcmpCondResult::OrConditions(cc1, cc2) => { - ctx.emit(Inst::jmp_if(cc1, taken)); - ctx.emit(Inst::jmp_cond(cc2, taken, not_taken)); - } - FcmpCondResult::InvertedEqualOrConditions(_, _) => unreachable!(), - } + } else if let Some(_fcmp) = matches_input(ctx, flag_input, Opcode::Fcmp) { + implemented_in_isle(ctx) } else if src_ty == types::I128 { - let src = put_input_in_regs( - ctx, - InsnInput { - insn: branches[0], - input: 0, - }, - ); - let (half_cc, comb_op) = match op0 { - Opcode::Brz => (CC::Z, AluRmiROpcode::And8), - Opcode::Brnz => (CC::NZ, AluRmiROpcode::Or8), - _ => unreachable!(), - }; - let tmp1 = ctx.alloc_tmp(types::I64).only_reg().unwrap(); - let tmp2 = ctx.alloc_tmp(types::I64).only_reg().unwrap(); - ctx.emit(Inst::cmp_rmi_r( - OperandSize::Size64, - RegMemImm::imm(0), - src.regs()[0], - )); - ctx.emit(Inst::setcc(half_cc, tmp1)); - ctx.emit(Inst::cmp_rmi_r( - OperandSize::Size64, - RegMemImm::imm(0), - src.regs()[1], - )); - ctx.emit(Inst::setcc(half_cc, tmp2)); - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size32, - comb_op, - RegMemImm::reg(tmp1.to_reg()), - tmp2, - )); - ctx.emit(Inst::jmp_cond(CC::NZ, taken, not_taken)); + implemented_in_isle(ctx); } else if is_int_or_ref_ty(src_ty) || is_bool_ty(src_ty) { let src = put_input_in_reg( ctx, @@ -2968,34 +2823,7 @@ impl LowerBackend for X64Backend { } } - Opcode::BrIcmp | Opcode::Brif => implemented_in_isle(ctx), - Opcode::Brff => { - let flag_input = InsnInput { - insn: branches[0], - input: 0, - }; - - if let Some(ffcmp) = matches_input(ctx, flag_input, Opcode::Ffcmp) { - let cond_code = ctx.data(branches[0]).fp_cond_code().unwrap(); - match emit_fcmp(ctx, ffcmp, cond_code, FcmpSpec::Normal) { - FcmpCondResult::Condition(cc) => { - ctx.emit(Inst::jmp_cond(cc, taken, not_taken)); - } - FcmpCondResult::AndConditions(cc1, cc2) => { - ctx.emit(Inst::jmp_if(cc1.invert(), not_taken)); - ctx.emit(Inst::jmp_cond(cc2.invert(), not_taken, taken)); - } - FcmpCondResult::OrConditions(cc1, cc2) => { - ctx.emit(Inst::jmp_if(cc1, taken)); - ctx.emit(Inst::jmp_cond(cc2, taken, not_taken)); - } - FcmpCondResult::InvertedEqualOrConditions(_, _) => unreachable!(), - } - } else { - // Should be disallowed by flags checks in verifier. - unimplemented!("Brff with input not from ffcmp"); - } - } + Opcode::BrIcmp | Opcode::Brif | Opcode::Brff => implemented_in_isle(ctx), _ => panic!("unexpected branch opcode: {:?}", op0), } diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index 7eae08707e..07f4c2e75d 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -14,7 +14,7 @@ use crate::ir::LibCall; use crate::isa::x64::lower::emit_vm_call; use crate::{ ir::{ - condcodes::{FloatCC, IntCC}, + condcodes::{CondCode, FloatCC, IntCC}, immediates::*, types::*, Inst, InstructionData, MemFlags, Opcode, TrapCode, Value, ValueList, @@ -590,6 +590,20 @@ where cc.invert() } + #[inline] + fn cc_nz_or_z(&mut self, cc: &CC) -> Option { + match cc { + CC::Z => Some(*cc), + CC::NZ => Some(*cc), + _ => None, + } + } + + #[inline] + fn floatcc_inverse(&mut self, cc: &FloatCC) -> FloatCC { + cc.inverse() + } + #[inline] fn sum_extend_fits_in_32_bits( &mut self, diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index 661ecb9fed..70d1d0de86 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -553,7 +553,10 @@ (type SideEffectNoResult (enum (Inst (inst MInst)) (Inst2 (inst1 MInst) - (inst2 MInst)))) + (inst2 MInst)) + (Inst3 (inst1 MInst) + (inst2 MInst) + (inst3 MInst)))) ;; Create an empty `InstOutput`, but do emit the given side-effectful ;; instruction. @@ -565,10 +568,19 @@ (let ((_ Unit (emit inst1)) (_ Unit (emit inst2))) (output_none))) +(rule (side_effect (SideEffectNoResult.Inst3 inst1 inst2 inst3)) + (let ((_ Unit (emit inst1)) + (_ Unit (emit inst2)) + (_ Unit (emit inst3))) + (output_none))) (decl side_effect_concat (SideEffectNoResult SideEffectNoResult) SideEffectNoResult) (rule (side_effect_concat (SideEffectNoResult.Inst inst1) (SideEffectNoResult.Inst inst2)) (SideEffectNoResult.Inst2 inst1 inst2)) +(rule (side_effect_concat (SideEffectNoResult.Inst inst1) (SideEffectNoResult.Inst2 inst2 inst3)) + (SideEffectNoResult.Inst3 inst1 inst2 inst3)) +(rule (side_effect_concat (SideEffectNoResult.Inst2 inst1 inst2) (SideEffectNoResult.Inst inst3)) + (SideEffectNoResult.Inst3 inst1 inst2 inst3)) ;;;; Helpers for Working with Flags ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -595,6 +607,7 @@ ;; ProducesFlags. See `with_flags` below for more. (type ConsumesFlags (enum (ConsumesFlagsSideEffect (inst MInst)) + (ConsumesFlagsSideEffect2 (inst1 MInst) (inst2 MInst)) (ConsumesFlagsReturnsResultWithProducer (inst MInst) (result Reg)) (ConsumesFlagsReturnsReg (inst MInst) (result Reg)) (ConsumesFlagsTwiceReturnsValueRegs (inst1 MInst) @@ -630,6 +643,10 @@ inst1 inst2 (value_regs reg1 reg2))) +(rule (consumes_flags_concat + (ConsumesFlags.ConsumesFlagsSideEffect inst1) + (ConsumesFlags.ConsumesFlagsSideEffect inst2)) + (ConsumesFlags.ConsumesFlagsSideEffect2 inst1 inst2)) ;; Combine flags-producing and -consuming instructions together, ensuring that ;; they are emitted back-to-back and no other instructions can be emitted @@ -707,11 +724,21 @@ (ConsumesFlags.ConsumesFlagsSideEffect c)) (SideEffectNoResult.Inst c)) +(rule (with_flags_side_effect + (ProducesFlags.AlreadyExistingFlags) + (ConsumesFlags.ConsumesFlagsSideEffect2 c1 c2)) + (SideEffectNoResult.Inst2 c1 c2)) + (rule (with_flags_side_effect (ProducesFlags.ProducesFlagsSideEffect p) (ConsumesFlags.ConsumesFlagsSideEffect c)) (SideEffectNoResult.Inst2 p c)) +(rule (with_flags_side_effect + (ProducesFlags.ProducesFlagsSideEffect p) + (ConsumesFlags.ConsumesFlagsSideEffect2 c1 c2)) + (SideEffectNoResult.Inst3 p c1 c2)) + ;;;; Helpers for Working with TrapCode ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (decl trap_code_division_by_zero () TrapCode) diff --git a/cranelift/filetests/filetests/isa/x64/branches.clif b/cranelift/filetests/filetests/isa/x64/branches.clif index 0a6dbfacc8..231dfdca0d 100644 --- a/cranelift/filetests/filetests/isa/x64/branches.clif +++ b/cranelift/filetests/filetests/isa/x64/branches.clif @@ -126,3 +126,62 @@ block2: ; popq %rbp ; ret +function %f4(f32, f32) -> b1 { +block0(v0: f32, v1: f32): + v2 = fcmp eq v0, v1 + brz v2, block1 + jump block2 +block1: + v3 = bconst.b1 true + return v3 +block2: + v4 = bconst.b1 false + return v4 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; ucomiss %xmm1, %xmm0 +; jp label1 +; jnz label1; j label2 +; block1: +; movl $1, %eax +; movq %rbp, %rsp +; popq %rbp +; ret +; block2: +; xorl %eax, %eax, %eax +; movq %rbp, %rsp +; popq %rbp +; ret + +function %f4(f32, f32) -> b1 { +block0(v0: f32, v1: f32): + v2 = fcmp ne v0, v1 + brz v2, block1 + jump block2 +block1: + v3 = bconst.b1 true + return v3 +block2: + v4 = bconst.b1 false + return v4 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; ucomiss %xmm1, %xmm0 +; jp label2 +; jnz label2; j label1 +; block1: +; movl $1, %eax +; movq %rbp, %rsp +; popq %rbp +; ret +; block2: +; xorl %eax, %eax, %eax +; movq %rbp, %rsp +; popq %rbp +; ret diff --git a/cranelift/filetests/filetests/isa/x64/i128.clif b/cranelift/filetests/filetests/isa/x64/i128.clif index 614e3f56f7..565905cc69 100644 --- a/cranelift/filetests/filetests/isa/x64/i128.clif +++ b/cranelift/filetests/filetests/isa/x64/i128.clif @@ -320,7 +320,7 @@ block2: ; setz %r11b ; cmpq $0, %rsi ; setz %al -; andb %al, %r11b, %al +; testb %r11b, %al ; jnz label1; j label2 ; block1: ; movl $1, %eax @@ -351,11 +351,11 @@ block2: ; movq %rsp, %rbp ; block0: ; cmpq $0, %rdi -; setnz %r11b +; setz %r11b ; cmpq $0, %rsi -; setnz %al -; orb %al, %r11b, %al -; jnz label1; j label2 +; setz %al +; testb %r11b, %al +; jz label1; j label2 ; block1: ; movl $1, %eax ; movq %rbp, %rsp diff --git a/cranelift/filetests/filetests/runtests/fcmp.clif b/cranelift/filetests/filetests/runtests/fcmp.clif new file mode 100644 index 0000000000..eb77f779d0 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/fcmp.clif @@ -0,0 +1,62 @@ +test run +target aarch64 +target s390x +target x86_64 + +function %fcmp_eq(f64, f64) -> b1 { +block0(v0: f64, v1: f64): + v2 = fcmp eq v0, v1 + return v2 +} + +; run: %fcmp_eq(0x1.0, 0x1.0) == true +; run: %fcmp_eq(0x1.0, 0x0.0) == false + +function %fcmp_ne(f64, f64) -> b1 { +block0(v0: f64, v1: f64): + v2 = fcmp ne v0, v1 + return v2 +} + +; run: %fcmp_ne(0x1.0, 0x1.0) == false +; run: %fcmp_ne(0x1.0, 0x0.0) == true + +function %fcmp_lt(f64, f64) -> b1 { +block0(v0: f64, v1: f64): + v2 = fcmp lt v0, v1 + return v2 +} + +; run: %fcmp_lt(0x1.0, 0x1.0) == false +; run: %fcmp_lt(0x1.0, 0x0.0) == false +; run: %fcmp_lt(0x1.0, 0x2.3) == true + +function %fcmp_le(f64, f64) -> b1 { +block0(v0: f64, v1: f64): + v2 = fcmp le v0, v1 + return v2 +} + +; run: %fcmp_le(0x1.0, 0x1.0) == true +; run: %fcmp_le(0x1.0, 0x0.0) == false +; run: %fcmp_le(0x1.0, 0x2.3) == true + +function %fcmp_gt(f64, f64) -> b1 { +block0(v0: f64, v1: f64): + v2 = fcmp gt v0, v1 + return v2 +} + +; run: %fcmp_gt(0x1.0, 0x1.0) == false +; run: %fcmp_gt(0x1.0, 0x0.0) == true +; run: %fcmp_gt(0x1.0, 0x2.3) == false + +function %fcmp_ge(f64, f64) -> b1 { +block0(v0: f64, v1: f64): + v2 = fcmp ge v0, v1 + return v2 +} + +; run: %fcmp_ge(0x1.0, 0x1.0) == true +; run: %fcmp_ge(0x1.0, 0x0.0) == true +; run: %fcmp_ge(0x1.0, 0x2.3) == false