diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index 8037df9ba1..a08ea230cd 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -210,6 +210,13 @@ impl RegMemImm { Self::Imm { .. } => {} } } + + pub(crate) fn to_reg(&self) -> Option { + match self { + Self::Reg { reg } => Some(*reg), + _ => None, + } + } } impl ShowWithRRU for RegMemImm { @@ -255,6 +262,12 @@ impl RegMem { RegMem::Mem { addr, .. } => addr.get_regs_as_uses(collector), } } + pub(crate) fn to_reg(&self) -> Option { + match self { + RegMem::Reg { reg } => Some(*reg), + _ => None, + } + } } impl ShowWithRRU for RegMem { @@ -271,7 +284,7 @@ impl ShowWithRRU for RegMem { } /// Some basic ALU operations. TODO: maybe add Adc, Sbb. -#[derive(Clone, PartialEq)] +#[derive(Copy, Clone, PartialEq)] pub enum AluRmiROpcode { Add, Sub, diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index da6be3eb4a..b47e0ee7ff 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -2044,8 +2044,7 @@ pub(crate) fn emit( // // ;; positive inputs get saturated to INT_MAX; negative ones to INT_MIN, which is // ;; already in %dst. - // mov 0, %tmp_gpr - // movd/movq %tmp_gpr, %tmp_xmm + // xorpd %tmp_xmm, %tmp_xmm // cmpss/cmpsd %src, %tmp_xmm // jnb done // mov/movaps $INT_MAX, %dst @@ -2069,8 +2068,7 @@ pub(crate) fn emit( // // ;; if positive, it was a real overflow // check_positive: - // mov 0, %tmp_gpr - // movd/movq %tmp_gpr, %tmp_xmm + // xorpd %tmp_xmm, %tmp_xmm // cmpss/cmpsd %src, %tmp_xmm // jnb done // ud2 trap IntegerOverflow @@ -2120,11 +2118,10 @@ pub(crate) fn emit( sink.bind_label(not_nan); // If the input was positive, saturate to INT_MAX. - // TODO use xorps/xorpd here - let inst = Inst::imm_r(false, 0, *tmp_gpr); // rely on sign-extension to get 0 on 64-bits - inst.emit(sink, flags, state); + + // Zero out tmp_xmm. let inst = - Inst::gpr_to_xmm(cast_op, RegMem::reg(tmp_gpr.to_reg()), *src_size, *tmp_xmm); + Inst::xmm_rm_r(SseOpcode::Xorpd, RegMem::reg(tmp_xmm.to_reg()), *tmp_xmm); inst.emit(sink, flags, state); let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src), tmp_xmm.to_reg()); @@ -2194,12 +2191,9 @@ pub(crate) fn emit( sink.bind_label(check_positive); - // TODO use xorpd - let inst = Inst::imm_r(false, 0, *tmp_gpr); - inst.emit(sink, flags, state); - + // Zero out the tmp_xmm register. let inst = - Inst::gpr_to_xmm(cast_op, RegMem::reg(tmp_gpr.to_reg()), *src_size, *tmp_xmm); + Inst::xmm_rm_r(SseOpcode::Xorpd, RegMem::reg(tmp_xmm.to_reg()), *tmp_xmm); inst.emit(sink, flags, state); let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src), tmp_xmm.to_reg()); diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index cc7967a64f..3e183989a9 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -939,6 +939,40 @@ impl Inst { } } +// Inst helpers. + +impl Inst { + /// In certain cases, instructions of this format can act as a definition of an XMM register, + /// producing a value that is independent of its initial value. + /// + /// For example, a vector equality comparison (`cmppd` or `cmpps`) that compares a register to + /// itself will generate all ones as a result, regardless of its value. From the register + /// allocator's point of view, we should (i) record the first register, which is normally a + /// mod, as a def instead; and (ii) not record the second register as a use, because it is the + /// same as the first register (already handled). + fn produces_const(&self) -> bool { + match self { + Self::Alu_RMI_R { op, src, dst, .. } => { + src.to_reg() == Some(dst.to_reg()) + && (*op == AluRmiROpcode::Xor || *op == AluRmiROpcode::Sub) + } + + Self::XMM_RM_R { op, src, dst, .. } => { + src.to_reg() == Some(dst.to_reg()) + && (*op == SseOpcode::Xorps || *op == SseOpcode::Xorpd) + } + + Self::XmmRmRImm { op, src, dst, imm } => { + src.to_reg() == Some(dst.to_reg()) + && (*op == SseOpcode::Cmppd || *op == SseOpcode::Cmpps) + && *imm == FcmpImm::Equal.encode() + } + + _ => false, + } + } +} + //============================================================================= // Instructions: printing @@ -1433,8 +1467,13 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { // sets. match inst { Inst::Alu_RMI_R { src, dst, .. } => { - src.get_regs_as_uses(collector); - collector.add_mod(*dst); + if inst.produces_const() { + // No need to account for src, since src == dst. + collector.add_def(*dst); + } else { + src.get_regs_as_uses(collector); + collector.add_mod(*dst); + } } Inst::Div { divisor, .. } => { collector.add_mod(Writable::from_reg(regs::rax())); @@ -1466,26 +1505,17 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { collector.add_def(*dst); } Inst::XMM_RM_R { src, dst, .. } => { - src.get_regs_as_uses(collector); - collector.add_mod(*dst); - } - Inst::XmmRmRImm { src, dst, op, imm } => { - // In certain cases, instructions of this format can act as a definition of an XMM - // register, producing a value that is independent of its initial value. For example, - // a vector equality comparison (`cmppd` or `cmpps`) that compares a register to itself - // will generate all ones as a result, regardless of its value. From the register - // allocator's point of view, we should (i) record the first register, which is normally - // a mod, as a def instread; and (ii) not record the second register as a use, because - // it is the same as the first register (already handled). TODO Re-factored in #2071. - let is_def = if let RegMem::Reg { reg } = src { - (*op == SseOpcode::Cmppd || *op == SseOpcode::Cmpps) - && *imm == FcmpImm::Equal.encode() - && *reg == dst.to_reg() + if inst.produces_const() { + // No need to account for src, since src == dst. + collector.add_def(*dst); } else { - false - }; - - if is_def { + src.get_regs_as_uses(collector); + collector.add_mod(*dst); + } + } + Inst::XmmRmRImm { src, dst, .. } => { + if inst.produces_const() { + // No need to account for src, since src == dst. collector.add_def(*dst); } else { src.get_regs_as_uses(collector); @@ -1694,6 +1724,17 @@ impl RegMemImm { RegMemImm::Imm { .. } => {} } } + + fn map_as_def(&mut self, mapper: &RUM) { + match self { + Self::Reg { reg } => { + let mut writable_src = Writable::from_reg(*reg); + map_def(mapper, &mut writable_src); + *self = Self::reg(writable_src.to_reg()); + } + _ => panic!("unexpected RegMemImm kind in map_src_reg_as_def"), + } + } } impl RegMem { @@ -1703,10 +1744,23 @@ impl RegMem { RegMem::Mem { ref mut addr, .. } => addr.map_uses(map), } } + + fn map_as_def(&mut self, mapper: &RUM) { + match self { + Self::Reg { reg } => { + let mut writable_src = Writable::from_reg(*reg); + map_def(mapper, &mut writable_src); + *self = Self::reg(writable_src.to_reg()); + } + _ => panic!("unexpected RegMem kind in map_src_reg_as_def"), + } + } } fn x64_map_regs(inst: &mut Inst, mapper: &RUM) { // Note this must be carefully synchronized with x64_get_regs. + let produces_const = inst.produces_const(); + match inst { // ** Nop Inst::Alu_RMI_R { @@ -1714,8 +1768,13 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RUM) { ref mut dst, .. } => { - src.map_uses(mapper); - map_mod(mapper, dst); + if produces_const { + src.map_as_def(mapper); + map_def(mapper, dst); + } else { + src.map_uses(mapper); + map_mod(mapper, dst); + } } Inst::Div { divisor, .. } => divisor.map_uses(mapper), Inst::MulHi { rhs, .. } => rhs.map_uses(mapper), @@ -1742,28 +1801,12 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RUM) { Inst::XmmRmRImm { ref mut src, ref mut dst, - ref op, - ref imm, + .. } => { - // In certain cases, instructions of this format can convert an XMM register into a - // define (e.g. an equality comparison); this extra logic is necessary to inform the - // registry allocator of a different register usage. TODO Re-factored in #2071. - if let RegMem::Reg { reg } = src { - if (*op == SseOpcode::Cmppd || *op == SseOpcode::Cmpps) - && *imm == FcmpImm::Equal.encode() - && *reg == dst.to_reg() - { - let mut writable_src = Writable::from_reg(*reg); - map_def(mapper, &mut writable_src); - *reg = writable_src.to_reg(); - map_def(mapper, dst); - } else { - // Otherwise, we map the instruction as usual. - src.map_uses(mapper); - map_mod(mapper, dst); - } + if produces_const { + src.map_as_def(mapper); + map_def(mapper, dst); } else { - // TODO this is duplicated because there seems to be no way to join the `if let` and `if`? src.map_uses(mapper); map_mod(mapper, dst); } @@ -1773,8 +1816,13 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RUM) { ref mut dst, .. } => { - src.map_uses(mapper); - map_mod(mapper, dst); + if produces_const { + src.map_as_def(mapper); + map_def(mapper, dst); + } else { + src.map_uses(mapper); + map_mod(mapper, dst); + } } Inst::XmmRmiReg { ref mut src, @@ -2097,8 +2145,23 @@ impl MachInst for Inst { ) -> SmallVec<[Self; 4]> { let mut ret = SmallVec::new(); if ty.is_int() { - let is_64 = ty == I64 && value > 0x7fffffff; - ret.push(Inst::imm_r(is_64, value, to_reg)); + if value == 0 { + ret.push(Inst::alu_rmi_r( + ty == I64, + AluRmiROpcode::Xor, + RegMemImm::reg(to_reg.to_reg()), + to_reg, + )); + } else { + let is_64 = ty == I64 && value > 0x7fffffff; + ret.push(Inst::imm_r(is_64, value, to_reg)); + } + } else if value == 0 { + ret.push(Inst::xmm_rm_r( + SseOpcode::Xorps, + RegMem::reg(to_reg.to_reg()), + to_reg, + )); } else { match ty { F32 => { diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 127b717389..9250c72440 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -1043,7 +1043,7 @@ fn lower_insn_to_regs>( } Opcode::F64const => { - // TODO use xorpd for 0 and cmpeqpd for all 1s. + // TODO use cmpeqpd for all 1s. let value = ctx.get_constant(insn).unwrap(); let dst = output_to_reg(ctx, outputs[0]); for inst in Inst::gen_constant(dst, value, F64, |reg_class, ty| { @@ -1054,7 +1054,7 @@ fn lower_insn_to_regs>( } Opcode::F32const => { - // TODO use xorps for 0 and cmpeqps for all 1s. + // TODO use cmpeqps for all 1s. let value = ctx.get_constant(insn).unwrap(); let dst = output_to_reg(ctx, outputs[0]); for inst in Inst::gen_constant(dst, value, F32, |reg_class, ty| {