Remove MachInst::gen_constant (#5427)

* aarch64: constant generation cleanup Add support for MOVZ and MOVN generation via ISLE. Handle f32const, f64const, and nop instructions via ISLE. No longer call Inst::gen_constant from lower.rs. * riscv64: constant generation cleanup Handle f32const, f64const, and nop instructions via ISLE. * s390x: constant generation cleanup Fix rule priorities for "imm" term. Only handle 32-bit stack offsets; no longer use load_constant64. * x64: constant generation cleanup No longer call Inst::gen_constant from lower.rs or abi.rs. * Refactor LowerBackend::lower to return InstOutput No longer write to the per-insn output registers; instead, return an InstOutput vector of temp registers holding the outputs. This will allow calling LowerBackend::lower multiple times for the same instruction, e.g. to rematerialize constants. When emitting the primary copy of the instruction during lowering, writing to the per-insn registers is now done in lower_clif_block. As a result, the ISLE lower_common routine is no longer needed. In addition, the InsnOutput type and all code related to it can be removed as well. * Refactor IsleContext to hold a LowerBackend reference Remove the "triple", "flags", and "isa_flags" fields that are copied from LowerBackend to each IsleContext, and instead just hold a reference to LowerBackend in IsleContext. This will allow calling LowerBackend::lower from within callbacks in src/machinst/isle.rs, e.g. to rematerialize constants. To avoid having to pass LowerBackend references through multiple functions, eliminate the lower_insn_to_regs subroutines in those targets that still have them, and just inline into the main lower routine. This also eliminates lower_inst.rs on aarch64 and riscv64. Replace all accesses to the removed IsleContext fields by going through the LowerBackend reference. * Remove MachInst::gen_constant This addresses the problem described in issue https://github.com/bytecodealliance/wasmtime/issues/4426 that targets currently have to duplicate code to emit constants between the ISLE logic and the gen_constant callback. After the various cleanups in earlier patches in this series, the only remaining user of get_constant is put_value_in_regs in Lower. This can now be removed, and instead constant rematerialization can be performed in the put_in_regs ISLE callback by simply directly calling LowerBackend::lower on the instruction defining the constant (using a different output register). Since the check for egraph mode is now no longer performed in put_value_in_regs, the Lower::flags member becomes obsolete. Care needs to be taken that other calls directly to the Lower::put_value_in_regs routine now handle the fact that no more rematerialization is performed. All such calls in target code already historically handle constants themselves. The remaining call site in the ISLE gen_call_common helper can be redirected to the ISLE put_in_regs callback. The existing target implementations of gen_constant are then unused and can be removed. (In some target there may still be further opportunities to remove duplication between ISLE and some local Rust code - this can be left to future patches.)
2022-12-13 22:00:04 +01:00
parent 37ade17e2a
commit df923f18ca
45 changed files with 853 additions and 1481 deletions
--- a/cranelift/codegen/src/machinst/compile.rs
+++ b/cranelift/codegen/src/machinst/compile.rs
@@ -23,15 +23,7 @@ pub fn compile<B: LowerBackend + TargetIsa>(
    let block_order = BlockLoweringOrder::new(f);

    // Build the lowering context.
-    let lower = crate::machinst::Lower::new(
-        f,
-        b.flags().clone(),
-        machine_env,
-        abi,
-        emit_info,
-        block_order,
-        sigs,
-    )?;
+    let lower = crate::machinst::Lower::new(f, machine_env, abi, emit_info, block_order, sigs)?;

    // Lower the IR.
    let vcode = {
--- a/cranelift/codegen/src/machinst/helpers.rs
+++ b/cranelift/codegen/src/machinst/helpers.rs
@@ -1,7 +1,5 @@
 //! Miscellaneous helpers for machine backends.

-use super::{InsnOutput, Lower, VCodeInst, ValueRegs};
-use super::{Reg, Writable};
 use crate::ir::Type;
 use std::ops::{Add, BitAnd, Not, Sub};

@@ -20,14 +18,6 @@ pub(crate) fn ty_has_float_or_vec_representation(ty: Type) -> bool {
    ty.is_vector() || ty.is_float()
 }

-/// Allocate a register for an instruction output and return it.
-pub(crate) fn get_output_reg<I: VCodeInst>(
-    ctx: &mut Lower<I>,
-    spec: InsnOutput,
-) -> ValueRegs<Writable<Reg>> {
-    ctx.get_output(spec.insn, spec.output)
-}
-
 /// Align a size up to a power-of-two alignment.
 pub(crate) fn align_to<N>(x: N, alignment: N) -> N
 where
--- a/cranelift/codegen/src/machinst/inst_common.rs
+++ b/cranelift/codegen/src/machinst/inst_common.rs
@@ -1,8 +1,6 @@
 //! A place to park MachInst::Inst fragments which are common across multiple architectures.

-use super::{Lower, VCodeInst};
 use crate::ir::{self, Inst as IRInst};
-use smallvec::SmallVec;

 //============================================================================
 // Instruction input "slots".
@@ -24,15 +22,6 @@ pub(crate) struct InsnOutput {
    pub(crate) output: usize,
 }

-pub(crate) fn insn_outputs<I: VCodeInst>(
-    ctx: &Lower<I>,
-    insn: IRInst,
-) -> SmallVec<[InsnOutput; 4]> {
-    (0..ctx.num_outputs(insn))
-        .map(|i| InsnOutput { insn, output: i })
-        .collect()
-}
-
 //============================================================================
 // Atomic instructions.

--- a/cranelift/codegen/src/machinst/isle.rs
+++ b/cranelift/codegen/src/machinst/isle.rs
@@ -1,10 +1,8 @@
-use crate::ir::{Inst, Value, ValueList};
-use crate::machinst::{get_output_reg, InsnOutput};
+use crate::ir::{Value, ValueList};
 use alloc::boxed::Box;
 use alloc::vec::Vec;
 use smallvec::SmallVec;
 use std::cell::Cell;
-use target_lexicon::Triple;

 pub use super::MachLabel;
 use super::RetPair;
@@ -13,9 +11,10 @@ pub use crate::ir::{
    DynamicStackSlot, ExternalName, FuncRef, GlobalValue, Immediate, SigRef, StackSlot,
 };
 pub use crate::isa::unwind::UnwindInst;
+pub use crate::isa::TargetIsa;
 pub use crate::machinst::{
-    ABIArg, ABIArgSlot, InputSourceInst, Lower, RealReg, Reg, RelocDistance, Sig, VCodeInst,
-    Writable,
+    ABIArg, ABIArgSlot, InputSourceInst, Lower, LowerBackend, RealReg, Reg, RelocDistance, Sig,
+    VCodeInst, Writable,
 };
 pub use crate::settings::TlsModel;

@@ -123,11 +122,32 @@ macro_rules! isle_lower_prelude_methods {

        #[inline]
        fn put_in_reg(&mut self, val: Value) -> Reg {
-            self.lower_ctx.put_value_in_regs(val).only_reg().unwrap()
+            self.put_in_regs(val).only_reg().unwrap()
        }

        #[inline]
        fn put_in_regs(&mut self, val: Value) -> ValueRegs {
+            // If the value is a constant, then (re)materialize it at each
+            // use. This lowers register pressure. (Only do this if we are
+            // not using egraph-based compilation; the egraph framework
+            // more efficiently rematerializes constants where needed.)
+            if !self.backend.flags().use_egraphs() {
+                let inputs = self.lower_ctx.get_value_as_source_or_const(val);
+                if inputs.constant.is_some() {
+                    let insn = match inputs.inst {
+                        InputSourceInst::UniqueUse(insn, 0) => Some(insn),
+                        InputSourceInst::Use(insn, 0) => Some(insn),
+                        _ => None,
+                    };
+                    if let Some(insn) = insn {
+                        if let Ok(regs) = self.backend.lower(self.lower_ctx, insn) {
+                            assert!(regs.len() == 1);
+                            return regs[0];
+                        }
+                    }
+                }
+            }
+
            self.lower_ctx.put_value_in_regs(val)
        }

@@ -263,7 +283,7 @@ macro_rules! isle_lower_prelude_methods {
        }

        fn avoid_div_traps(&mut self, _: Type) -> Option<()> {
-            if self.flags.avoid_div_traps() {
+            if self.backend.flags().avoid_div_traps() {
                Some(())
            } else {
                None
@@ -272,12 +292,12 @@ macro_rules! isle_lower_prelude_methods {

        #[inline]
        fn tls_model(&mut self, _: Type) -> TlsModel {
-            self.flags.tls_model()
+            self.backend.flags().tls_model()
        }

        #[inline]
        fn tls_model_is_elf_gd(&mut self) -> Option<()> {
-            if self.flags.tls_model() == TlsModel::ElfGd {
+            if self.backend.flags().tls_model() == TlsModel::ElfGd {
                Some(())
            } else {
                None
@@ -286,7 +306,7 @@ macro_rules! isle_lower_prelude_methods {

        #[inline]
        fn tls_model_is_macho(&mut self) -> Option<()> {
-            if self.flags.tls_model() == TlsModel::Macho {
+            if self.backend.flags().tls_model() == TlsModel::Macho {
                Some(())
            } else {
                None
@@ -295,7 +315,7 @@ macro_rules! isle_lower_prelude_methods {

        #[inline]
        fn tls_model_is_coff(&mut self) -> Option<()> {
-            if self.flags.tls_model() == TlsModel::Coff {
+            if self.backend.flags().tls_model() == TlsModel::Coff {
                Some(())
            } else {
                None
@@ -304,7 +324,7 @@ macro_rules! isle_lower_prelude_methods {

        #[inline]
        fn preserve_frame_pointers(&mut self) -> Option<()> {
-            if self.flags.preserve_frame_pointers() {
+            if self.backend.flags().preserve_frame_pointers() {
                Some(())
            } else {
                None
@@ -572,7 +592,7 @@ macro_rules! isle_prelude_caller_methods {
                &extname,
                dist,
                caller_conv,
-                self.flags.clone(),
+                self.backend.flags().clone(),
            )
            .unwrap();

@@ -601,7 +621,7 @@ macro_rules! isle_prelude_caller_methods {
                ptr,
                Opcode::CallIndirect,
                caller_conv,
-                self.flags.clone(),
+                self.backend.flags().clone(),
            )
            .unwrap();

@@ -641,7 +661,7 @@ macro_rules! isle_prelude_method_helpers {
                let input = inputs
                    .get(off + i, &self.lower_ctx.dfg().value_lists)
                    .unwrap();
-                arg_regs.push(self.lower_ctx.put_value_in_regs(input));
+                arg_regs.push(self.put_in_regs(input));
            }
            for (i, arg_regs) in arg_regs.iter().enumerate() {
                caller.emit_copy_regs_to_buffer(self.lower_ctx, i, *arg_regs);
@@ -708,77 +728,11 @@ macro_rules! isle_prelude_method_helpers {

 /// This structure is used to implement the ISLE-generated `Context` trait and
 /// internally has a temporary reference to a machinst `LowerCtx`.
-pub(crate) struct IsleContext<'a, 'b, I, Flags, IsaFlags, const N: usize>
+pub(crate) struct IsleContext<'a, 'b, I, B>
 where
    I: VCodeInst,
-    [(I, bool); N]: smallvec::Array,
+    B: LowerBackend,
 {
    pub lower_ctx: &'a mut Lower<'b, I>,
-    pub triple: &'a Triple,
-    pub flags: &'a Flags,
-    pub isa_flags: &'a IsaFlags,
-}
-
-/// Shared lowering code amongst all backends for doing ISLE-based lowering.
-///
-/// The `isle_lower` argument here is an ISLE-generated function for `lower` and
-/// then this function otherwise handles register mapping and such around the
-/// lowering.
-pub(crate) fn lower_common<I, Flags, IsaFlags, IsleFunction, const N: usize>(
-    lower_ctx: &mut Lower<I>,
-    triple: &Triple,
-    flags: &Flags,
-    isa_flags: &IsaFlags,
-    outputs: &[InsnOutput],
-    inst: Inst,
-    isle_lower: IsleFunction,
-) -> Result<(), ()>
-where
-    I: VCodeInst,
-    [(I, bool); N]: smallvec::Array<Item = (I, bool)>,
-    IsleFunction: Fn(&mut IsleContext<'_, '_, I, Flags, IsaFlags, N>, Inst) -> Option<InstOutput>,
-{
-    // TODO: reuse the ISLE context across lowerings so we can reuse its
-    // internal heap allocations.
-    let mut isle_ctx = IsleContext {
-        lower_ctx,
-        triple,
-        flags,
-        isa_flags,
-    };
-
-    let temp_regs = isle_lower(&mut isle_ctx, inst).ok_or(())?;
-
-    #[cfg(debug_assertions)]
-    {
-        debug_assert_eq!(
-            temp_regs.len(),
-            outputs.len(),
-            "the number of temporary values and destination values do \
-         not match ({} != {}); ensure the correct registers are being \
-         returned.",
-            temp_regs.len(),
-            outputs.len(),
-        );
-    }
-
-    // The ISLE generated code emits its own registers to define the
-    // instruction's lowered values in. However, other instructions
-    // that use this SSA value will be lowered assuming that the value
-    // is generated into a pre-assigned, different, register.
-    //
-    // To connect the two, we set up "aliases" in the VCodeBuilder
-    // that apply when it is building the Operand table for the
-    // regalloc to use. These aliases effectively rewrite any use of
-    // the pre-assigned register to the register that was returned by
-    // the ISLE lowering logic.
-    for i in 0..outputs.len() {
-        let regs = temp_regs[i];
-        let dsts = get_output_reg(isle_ctx.lower_ctx, outputs[i]);
-        for (dst, temp) in dsts.regs().iter().zip(regs.regs().iter()) {
-            isle_ctx.lower_ctx.set_vreg_alias(dst.to_reg(), *temp);
-        }
-    }
-
-    Ok(())
+    pub backend: &'a B,
 }
--- a/cranelift/codegen/src/machinst/lower.rs
+++ b/cranelift/codegen/src/machinst/lower.rs
@@ -14,9 +14,9 @@ use crate::ir::{
    Type, Value, ValueDef, ValueLabelAssignments, ValueLabelStart,
 };
 use crate::machinst::{
-    non_writable_value_regs, writable_value_regs, BlockIndex, BlockLoweringOrder, Callee,
-    LoweredBlock, MachLabel, Reg, SigSet, VCode, VCodeBuilder, VCodeConstant, VCodeConstantData,
-    VCodeConstants, VCodeInst, ValueRegs, Writable,
+    writable_value_regs, BlockIndex, BlockLoweringOrder, Callee, LoweredBlock, MachLabel, Reg,
+    SigSet, VCode, VCodeBuilder, VCodeConstant, VCodeConstantData, VCodeConstants, VCodeInst,
+    ValueRegs, Writable,
 };
 use crate::{trace, CodegenResult};
 use alloc::vec::Vec;
@@ -26,6 +26,9 @@ use std::fmt::Debug;

 use super::{VCodeBuildDirection, VRegAllocator};

+/// A vector of ValueRegs, used to represent the outputs of an instruction.
+pub type InstOutput = SmallVec<[ValueRegs<Reg>; 2]>;
+
 /// An "instruction color" partitions CLIF instructions by side-effecting ops.
 /// All instructions with the same "color" are guaranteed not to be separated by
 /// any side-effecting op (for this purpose, loads are also considered
@@ -121,7 +124,7 @@ pub trait LowerBackend {
    /// edge (block-param actuals) into registers, because the actual branch
    /// generation (`lower_branch_group()`) happens *after* any possible merged
    /// out-edge.
-    fn lower(&self, ctx: &mut Lower<Self::MInst>, inst: Inst) -> CodegenResult<()>;
+    fn lower(&self, ctx: &mut Lower<Self::MInst>, inst: Inst) -> CodegenResult<InstOutput>;

    /// Lower a block-terminating group of branches (which together can be seen
    /// as one N-way branch), given a vcode MachLabel for each target.
@@ -146,9 +149,6 @@ pub struct Lower<'func, I: VCodeInst> {
    /// The function to lower.
    f: &'func Function,

-    /// Machine-independent flags.
-    flags: crate::settings::Flags,
-
    /// The set of allocatable registers.
    allocatable: PRegSet,

@@ -324,7 +324,6 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
    /// Prepare a new lowering context for the given IR function.
    pub fn new(
        f: &'func Function,
-        flags: crate::settings::Flags,
        machine_env: &MachineEnv,
        abi: Callee<I::ABIMachineSpec>,
        emit_info: I::Info,
@@ -415,7 +414,6 @@ impl<'func, I: VCodeInst> Lower<'func, I> {

        Ok(Lower {
            f,
-            flags,
            allocatable: PRegSet::from(machine_env),
            vcode,
            vregs,
@@ -742,7 +740,27 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
            // or any of its outputs its used.
            if has_side_effect || value_needed {
                trace!("lowering: inst {}: {:?}", inst, self.f.dfg[inst]);
-                backend.lower(self, inst)?;
+                let temp_regs = backend.lower(self, inst)?;
+
+                // The ISLE generated code emits its own registers to define the
+                // instruction's lowered values in. However, other instructions
+                // that use this SSA value will be lowered assuming that the value
+                // is generated into a pre-assigned, different, register.
+                //
+                // To connect the two, we set up "aliases" in the VCodeBuilder
+                // that apply when it is building the Operand table for the
+                // regalloc to use. These aliases effectively rewrite any use of
+                // the pre-assigned register to the register that was returned by
+                // the ISLE lowering logic.
+                debug_assert_eq!(temp_regs.len(), self.num_outputs(inst));
+                for i in 0..self.num_outputs(inst) {
+                    let regs = temp_regs[i];
+                    let dsts = self.value_regs[self.f.dfg.inst_results(inst)[i]];
+                    debug_assert_eq!(regs.len(), dsts.len());
+                    for (dst, temp) in dsts.regs().iter().zip(regs.regs().iter()) {
+                        self.set_vreg_alias(*dst, *temp);
+                    }
+                }
            }

            let loc = self.srcloc(inst);
@@ -1249,33 +1267,6 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
            assert!(!self.inst_sunk.contains(&inst));
        }

-        // If the value is a constant, then (re)materialize it at each
-        // use. This lowers register pressure. (Only do this if we are
-        // not using egraph-based compilation; the egraph framework
-        // more efficiently rematerializes constants where needed.)
-        if !self.flags.use_egraphs() {
-            if let Some(c) = self
-                .f
-                .dfg
-                .value_def(val)
-                .inst()
-                .and_then(|inst| self.get_constant(inst))
-            {
-                let ty = self.f.dfg.value_type(val);
-                let regs = self.alloc_tmp(ty);
-                trace!(" -> regs {:?}", regs);
-                assert!(regs.is_valid());
-
-                let insts = I::gen_constant(regs, c.into(), ty, |ty| {
-                    self.alloc_tmp(ty).only_reg().unwrap()
-                });
-                for inst in insts {
-                    self.emit(inst);
-                }
-                return non_writable_value_regs(regs);
-            }
-        }
-
        let regs = self.value_regs[val];
        trace!(" -> regs {:?}", regs);
        assert!(regs.is_valid());
@@ -1284,19 +1275,6 @@ impl<'func, I: VCodeInst> Lower<'func, I> {

        regs
    }
-
-    /// Get the `idx`th output register(s) of the given IR instruction.
-    ///
-    /// When `backend.lower_inst_to_regs(ctx, inst)` is called, it is expected
-    /// that the backend will write results to these output register(s).  This
-    /// register will always be "fresh"; it is guaranteed not to overlap with
-    /// any of the inputs, and can be freely used as a scratch register within
-    /// the lowered instruction sequence, as long as its final value is the
-    /// result of the computation.
-    pub fn get_output(&self, ir_inst: Inst, idx: usize) -> ValueRegs<Writable<Reg>> {
-        let val = self.f.dfg.inst_results(ir_inst)[idx];
-        writable_value_regs(self.value_regs[val])
-    }
 }

 /// Codegen primitives: allocate temps, emit instructions, set result registers,
--- a/cranelift/codegen/src/machinst/mod.rs
+++ b/cranelift/codegen/src/machinst/mod.rs
@@ -112,14 +112,6 @@ pub trait MachInst: Clone + Debug {
    /// Generate a move.
    fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self;

-    /// Generate a constant into a reg.
-    fn gen_constant<F: FnMut(Type) -> Writable<Reg>>(
-        to_regs: ValueRegs<Writable<Reg>>,
-        value: u128,
-        ty: Type,
-        alloc_tmp: F,
-    ) -> SmallVec<[Self; 4]>;
-
    /// Generate a dummy instruction that will keep a value alive but
    /// has no other purpose.
    fn gen_dummy_use(reg: Reg) -> Self;