diff --git a/cranelift/codegen/src/isa/aarch64/inst.isle b/cranelift/codegen/src/isa/aarch64/inst.isle index 036c324951..0b94ee37fe 100644 --- a/cranelift/codegen/src/isa/aarch64/inst.isle +++ b/cranelift/codegen/src/isa/aarch64/inst.isle @@ -1515,6 +1515,34 @@ (_ Unit (emit (MInst.VecRRLong op dst src high_half)))) (writable_reg_to_reg dst))) +;; Helper for emitting `MInst.MovFromVec` instructions. +(decl mov_from_vec (Reg u8 VectorSize) Reg) +(rule (mov_from_vec rn idx size) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.MovFromVec dst rn idx size)))) + (writable_reg_to_reg dst))) + +;; Helper for emitting `MInst.MovFromVecSigned` instructions. +(decl mov_from_vec_signed (Reg u8 VectorSize OperandSize) Reg) +(rule (mov_from_vec_signed rn idx size scalar_size) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.MovFromVecSigned dst rn idx size scalar_size)))) + (writable_reg_to_reg dst))) + +;; Helper for emitting `MInst.Extend` instructions. +(decl extend (Reg bool u8 u8) Reg) +(rule (extend rn signed from_bits to_bits) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.Extend dst rn signed from_bits to_bits)))) + (writable_reg_to_reg dst))) + +;; Helper for emitting `MInst.LoadAcquire` instructions. +(decl load_acquire (Type Reg) Reg) +(rule (load_acquire ty addr) + (let ((dst WritableReg (temp_writable_reg $I64)) + (_ Unit (emit (MInst.LoadAcquire ty dst addr)))) + (writable_reg_to_reg dst))) + ;; Immediate value helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (decl imm (Type u64) Reg) @@ -1543,10 +1571,7 @@ ;; Place a `Value` into a register, sign extending it to 64-bits (decl put_in_reg_sext64 (Value) Reg) (rule (put_in_reg_sext64 val @ (value_type (fits_in_32 ty))) - (let ((dst WritableReg (temp_writable_reg $I32)) - (src Reg (put_in_reg val)) - (_ Unit (emit (MInst.Extend dst src $true (ty_bits ty) 64)))) - (writable_reg_to_reg dst))) + (extend (put_in_reg val) $true (ty_bits ty) 64)) ;; 64-bit passthrough. (rule (put_in_reg_sext64 val @ (value_type $I64)) (put_in_reg val)) @@ -1554,10 +1579,7 @@ ;; Place a `Value` into a register, zero extending it to 64-bits (decl put_in_reg_zext64 (Value) Reg) (rule (put_in_reg_zext64 val @ (value_type (fits_in_32 ty))) - (let ((dst WritableReg (temp_writable_reg $I32)) - (src Reg (put_in_reg val)) - (_ Unit (emit (MInst.Extend dst src $false (ty_bits ty) 64)))) - (writable_reg_to_reg dst))) + (extend (put_in_reg val) $false (ty_bits ty) 64)) ;; 64-bit passthrough. (rule (put_in_reg_zext64 val @ (value_type $I64)) (put_in_reg val)) @@ -1599,3 +1621,18 @@ (rule (adds_op (fits_in_32 _ty)) (ALUOp.AddS32)) (rule (adds_op $I64) (ALUOp.AddS64)) +;; An atomic load that can be sunk into another operation. +(type SinkableAtomicLoad extern (enum)) + +;; Extract a `SinkableAtomicLoad` that works with `Reg` from a value +;; operand. +(decl sinkable_atomic_load (SinkableAtomicLoad) Value) +(extern extractor sinkable_atomic_load sinkable_atomic_load) + +;; Sink a `SinkableLoad` into a `Reg`. +;; +;; This is a side-effectful operation that notifies the context that the +;; instruction that produced the `SinkableAtomicLoad` has been sunk into another +;; instruction, and no longer needs to be lowered. +(decl sink_atomic_load (SinkableAtomicLoad) Reg) +(extern constructor sink_atomic_load sink_atomic_load) diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle index b4f26daeb8..ef9726d483 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.isle +++ b/cranelift/codegen/src/isa/aarch64/lower.isle @@ -502,3 +502,89 @@ (result Reg (alu_rrrr (ALUOp3.MSub64) div y64 x64)) ) (value_reg result))) + +;;;; Rules for `uextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; General rule for extending input to an output which fits in a single +;; register. +(rule (lower (has_type (fits_in_64 out) (uextend x @ (value_type in)))) + (value_reg (extend (put_in_reg x) $false (ty_bits in) (ty_bits out)))) + +;; Extraction of a vector lane automatically extends as necessary, so we can +;; skip an explicit extending instruction. +(rule (lower (has_type (fits_in_64 out) + (uextend (def_inst (extractlane vec @ (value_type in) + (u8_from_uimm8 lane)))))) + (value_reg (mov_from_vec (put_in_reg vec) lane (vector_size in)))) + +;; Atomic loads will also automatically zero their upper bits so the `uextend` +;; instruction can effectively get skipped here. +(rule (lower (has_type (fits_in_64 out) + (uextend (and (value_type in) (sinkable_atomic_load addr))))) + (value_reg (load_acquire in (sink_atomic_load addr)))) + +;; Conversion to 128-bit needs a zero-extension of the lower bits and the upper +;; bits are all zero. +(rule (lower (has_type $I128 (uextend x))) + (value_regs (put_in_reg_zext64 x) (imm $I64 0))) + +;; Like above where vector extraction automatically zero-extends extending to +;; i128 only requires generating a 0 constant for the upper bits. +(rule (lower (has_type $I128 + (uextend (def_inst (extractlane vec @ (value_type in) + (u8_from_uimm8 lane)))))) + (value_regs (mov_from_vec (put_in_reg vec) lane (vector_size in)) (imm $I64 0))) + +;;;; Rules for `sextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; General rule for extending input to an output which fits in a single +;; register. +(rule (lower (has_type (fits_in_64 out) (sextend x @ (value_type in)))) + (value_reg (extend (put_in_reg x) $true (ty_bits in) (ty_bits out)))) + +;; Extraction of a vector lane automatically extends as necessary, so we can +;; skip an explicit extending instruction. +(rule (lower (has_type (fits_in_64 out) + (sextend (def_inst (extractlane vec @ (value_type in) + (u8_from_uimm8 lane)))))) + (value_reg (mov_from_vec_signed (put_in_reg vec) + lane + (vector_size in) + (size_from_ty out)))) + +;; 64-bit to 128-bit only needs to sign-extend the input to the upper bits. +(rule (lower (has_type $I128 (sextend x))) + (let ( + (lo Reg (put_in_reg_sext64 x)) + (hi Reg (alu_rr_imm_shift (ALUOp.Asr64) lo (imm_shift_from_u8 63))) + ) + (value_regs lo hi))) + +;; Like above where vector extraction automatically zero-extends extending to +;; i128 only requires generating a 0 constant for the upper bits. +;; +;; Note that `mov_from_vec_signed` doesn't exist for i64x2, so that's +;; specifically excluded here. +(rule (lower (has_type $I128 + (sextend (def_inst (extractlane vec @ (value_type in @ (not_i64x2)) + (u8_from_uimm8 lane)))))) + (let ( + (lo Reg (mov_from_vec_signed (put_in_reg vec) + lane + (vector_size in) + (size_from_ty $I64))) + (hi Reg (alu_rr_imm_shift (ALUOp.Asr64) lo (imm_shift_from_u8 63))) + ) + (value_regs lo hi))) + +;; Extension from an extraction of i64x2 into i128. +(rule (lower (has_type $I128 + (sextend (def_inst (extractlane vec @ (value_type $I64X2) + (u8_from_uimm8 lane)))))) + (let ( + (lo Reg (mov_from_vec (put_in_reg vec) + lane + (VectorSize.Size64x2))) + (hi Reg (alu_rr_imm_shift (ALUOp.Asr64) lo (imm_shift_from_u8 63))) + ) + (value_regs lo hi))) diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index ac185daa2d..f3ca968479 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -1629,7 +1629,7 @@ pub(crate) fn emit_atomic_load>( ctx: &mut C, rt: Writable, insn: IRInst, -) { +) -> Inst { assert!(ctx.data(insn).opcode() == Opcode::AtomicLoad); let inputs = insn_inputs(ctx, insn); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); @@ -1638,7 +1638,7 @@ pub(crate) fn emit_atomic_load>( // We're ignoring the result type of the load because the LoadAcquire will // explicitly zero extend to the nearest word, and also zero the high half // of an X register. - ctx.emit(Inst::LoadAcquire { access_ty, rt, rn }); + Inst::LoadAcquire { access_ty, rt, rn } } fn load_op_to_ty(op: Opcode) -> Option { diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle.rs b/cranelift/codegen/src/isa/aarch64/lower/isle.rs index b841c27807..c17e349505 100644 --- a/cranelift/codegen/src/isa/aarch64/lower/isle.rs +++ b/cranelift/codegen/src/isa/aarch64/lower/isle.rs @@ -10,7 +10,7 @@ use super::{ Inst as MInst, JTSequenceInfo, MachLabel, MoveWideConst, NarrowValueMode, Opcode, OperandSize, PairAMode, Reg, ScalarSize, ShiftOpAndAmt, UImm5, VectorSize, NZCV, }; -use crate::isa::aarch64::settings as aarch64_settings; +use crate::isa::aarch64::settings::Flags; use crate::machinst::isle::*; use crate::{ binemit::CodeOffset, @@ -21,9 +21,8 @@ use crate::{ isa::aarch64::inst::aarch64_map_regs, isa::aarch64::inst::args::{ShiftOp, ShiftOpShiftImm}, isa::unwind::UnwindInst, - machinst::{get_output_reg, ty_bits, InsnOutput, LowerCtx, RegRenamer}, + machinst::{ty_bits, InsnOutput, LowerCtx}, }; -use smallvec::SmallVec; use std::boxed::Box; use std::vec::Vec; @@ -36,62 +35,21 @@ type BoxExternalName = Box; /// The main entry point for lowering with ISLE. pub(crate) fn lower( lower_ctx: &mut C, - isa_flags: &aarch64_settings::Flags, + isa_flags: &Flags, outputs: &[InsnOutput], inst: Inst, ) -> Result<(), ()> where C: LowerCtx, { - // TODO: reuse the ISLE context across lowerings so we can reuse its - // internal heap allocations. - let mut isle_ctx = IsleContext::new(lower_ctx, isa_flags); - - let temp_regs = generated_code::constructor_lower(&mut isle_ctx, inst).ok_or(())?; - let mut temp_regs = temp_regs.regs().iter(); - - #[cfg(debug_assertions)] - { - let all_dsts_len = outputs - .iter() - .map(|out| get_output_reg(isle_ctx.lower_ctx, *out).len()) - .sum(); - debug_assert_eq!( - temp_regs.len(), - all_dsts_len, - "the number of temporary registers and destination registers do \ - not match ({} != {}); ensure the correct registers are being \ - returned.", - temp_regs.len(), - all_dsts_len, - ); - } - - // The ISLE generated code emits its own registers to define the - // instruction's lowered values in. We rename those registers to the - // registers they were assigned when their value was used as an operand in - // earlier lowerings. - let mut renamer = RegRenamer::default(); - for output in outputs { - let dsts = get_output_reg(isle_ctx.lower_ctx, *output); - for (temp, dst) in temp_regs.by_ref().zip(dsts.regs()) { - renamer.add_rename(*temp, dst.to_reg()); - } - } - - for mut inst in isle_ctx.into_emitted_insts() { - aarch64_map_regs(&mut inst, &renamer); - lower_ctx.emit(inst); - } - - Ok(()) -} - -pub struct IsleContext<'a, C> { - lower_ctx: &'a mut C, - #[allow(dead_code)] // dead for now, but probably not for long - isa_flags: &'a aarch64_settings::Flags, - emitted_insts: SmallVec<[MInst; 6]>, + lower_common( + lower_ctx, + isa_flags, + outputs, + inst, + |cx, insn| generated_code::constructor_lower(cx, insn), + aarch64_map_regs, + ) } pub struct ExtendedValue { @@ -99,21 +57,12 @@ pub struct ExtendedValue { extend: ExtendOp, } -impl<'a, C> IsleContext<'a, C> { - pub fn new(lower_ctx: &'a mut C, isa_flags: &'a aarch64_settings::Flags) -> Self { - IsleContext { - lower_ctx, - isa_flags, - emitted_insts: SmallVec::new(), - } - } - - pub fn into_emitted_insts(self) -> SmallVec<[MInst; 6]> { - self.emitted_insts - } +pub struct SinkableAtomicLoad { + atomic_load: Inst, + atomic_addr: Value, } -impl<'a, C> generated_code::Context for IsleContext<'a, C> +impl generated_code::Context for IsleContext<'_, C, Flags, 6> where C: LowerCtx, { @@ -275,4 +224,23 @@ where n => Some(n as u64), } } + + fn sinkable_atomic_load(&mut self, val: Value) -> Option { + let input = self.lower_ctx.get_value_as_source_or_const(val); + if let Some((atomic_load, 0)) = input.inst { + if self.lower_ctx.data(atomic_load).opcode() == Opcode::AtomicLoad { + let atomic_addr = self.lower_ctx.input_as_value(atomic_load, 0); + return Some(SinkableAtomicLoad { + atomic_load, + atomic_addr, + }); + } + } + None + } + + fn sink_atomic_load(&mut self, load: &SinkableAtomicLoad) -> Reg { + self.lower_ctx.sink_inst(load.atomic_load); + self.put_in_reg(load.atomic_addr) + } } diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest b/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest index 7ce3fa9828..af66f7179f 100644 --- a/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest +++ b/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest @@ -1,4 +1,4 @@ src/clif.isle be1359b4b6b153f378517c1dd95cd80f4a6bed0c7b86eaba11c088fd71b7bfe80a3c868ace245b2da0bfbbd6ded262ea9576c8e0eeacbf89d03c34a17a709602 src/prelude.isle d3d2a6a42fb778231a4cdca30995324e1293a9ca8073c5a27a501535759eb51f84a6718322a93dfba4b66ee4f0c9afce7dcec0428516ef0c5bc96e8c8b76925d -src/isa/aarch64/inst.isle cec03d88680e8da01424eecc05ef73a48e4055d29fe841fceaa3e6ea4e7cb9abb887401bb5acb2e058c9fc993188640990b699e88272d62e243781b231cdfb0d -src/isa/aarch64/lower.isle e1ae53adc953ad395feeecd8edc8bcfd288491a4e4a71510e5f06e221f767518c6e060ff0d795c7c2510b7d898cc8b9bc0313906412e0176605c33427926f828 +src/isa/aarch64/inst.isle 70d7b319ba0b28173d2ef1820bd0e9c4b8cf7a5ab34475a43f03bdc5a6b945a7faf40d7b539a12050ddd8ebc4c6b0fe82df5940eaf966420bb4d58e7420d4206 +src/isa/aarch64/lower.isle dfc622b2fecea98079fff182ce3443ada5448256662f598ea009caed3d9bcf6b4816f736a8c7f70142467febf8fc97230c57287f06e80e6101f3b401208c599c diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs b/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs index f3ad32cd48..b5efba649d 100644 --- a/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs +++ b/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs @@ -73,6 +73,8 @@ pub trait Context { fn zero_reg(&mut self) -> Reg; fn writable_zero_reg(&mut self) -> WritableReg; fn load_constant64_full(&mut self, arg0: u64) -> Reg; + fn sinkable_atomic_load(&mut self, arg0: Value) -> Option; + fn sink_atomic_load(&mut self, arg0: &SinkableAtomicLoad) -> Reg; fn safe_divisor_from_imm64(&mut self, arg0: Imm64) -> Option; } @@ -1598,31 +1600,126 @@ pub fn constructor_vec_rr_long( return Some(expr4_0); } +// Generated as internal constructor for term mov_from_vec. +pub fn constructor_mov_from_vec( + ctx: &mut C, + arg0: Reg, + arg1: u8, + arg2: &VectorSize, +) -> Option { + let pattern0_0 = arg0; + let pattern1_0 = arg1; + let pattern2_0 = arg2; + // Rule at src/isa/aarch64/inst.isle line 1520. + let expr0_0: Type = I64; + let expr1_0 = C::temp_writable_reg(ctx, expr0_0); + let expr2_0 = MInst::MovFromVec { + rd: expr1_0, + rn: pattern0_0, + idx: pattern1_0, + size: pattern2_0.clone(), + }; + let expr3_0 = C::emit(ctx, &expr2_0); + let expr4_0 = C::writable_reg_to_reg(ctx, expr1_0); + return Some(expr4_0); +} + +// Generated as internal constructor for term mov_from_vec_signed. +pub fn constructor_mov_from_vec_signed( + ctx: &mut C, + arg0: Reg, + arg1: u8, + arg2: &VectorSize, + arg3: &OperandSize, +) -> Option { + let pattern0_0 = arg0; + let pattern1_0 = arg1; + let pattern2_0 = arg2; + let pattern3_0 = arg3; + // Rule at src/isa/aarch64/inst.isle line 1527. + let expr0_0: Type = I64; + let expr1_0 = C::temp_writable_reg(ctx, expr0_0); + let expr2_0 = MInst::MovFromVecSigned { + rd: expr1_0, + rn: pattern0_0, + idx: pattern1_0, + size: pattern2_0.clone(), + scalar_size: pattern3_0.clone(), + }; + let expr3_0 = C::emit(ctx, &expr2_0); + let expr4_0 = C::writable_reg_to_reg(ctx, expr1_0); + return Some(expr4_0); +} + +// Generated as internal constructor for term extend. +pub fn constructor_extend( + ctx: &mut C, + arg0: Reg, + arg1: bool, + arg2: u8, + arg3: u8, +) -> Option { + let pattern0_0 = arg0; + let pattern1_0 = arg1; + let pattern2_0 = arg2; + let pattern3_0 = arg3; + // Rule at src/isa/aarch64/inst.isle line 1534. + let expr0_0: Type = I64; + let expr1_0 = C::temp_writable_reg(ctx, expr0_0); + let expr2_0 = MInst::Extend { + rd: expr1_0, + rn: pattern0_0, + signed: pattern1_0, + from_bits: pattern2_0, + to_bits: pattern3_0, + }; + let expr3_0 = C::emit(ctx, &expr2_0); + let expr4_0 = C::writable_reg_to_reg(ctx, expr1_0); + return Some(expr4_0); +} + +// Generated as internal constructor for term load_acquire. +pub fn constructor_load_acquire(ctx: &mut C, arg0: Type, arg1: Reg) -> Option { + let pattern0_0 = arg0; + let pattern1_0 = arg1; + // Rule at src/isa/aarch64/inst.isle line 1541. + let expr0_0: Type = I64; + let expr1_0 = C::temp_writable_reg(ctx, expr0_0); + let expr2_0 = MInst::LoadAcquire { + access_ty: pattern0_0, + rt: expr1_0, + rn: pattern1_0, + }; + let expr3_0 = C::emit(ctx, &expr2_0); + let expr4_0 = C::writable_reg_to_reg(ctx, expr1_0); + return Some(expr4_0); +} + // Generated as internal constructor for term imm. pub fn constructor_imm(ctx: &mut C, arg0: Type, arg1: u64) -> Option { let pattern0_0 = arg0; if let Some(pattern1_0) = C::integral_ty(ctx, pattern0_0) { let pattern2_0 = arg1; if let Some(pattern3_0) = C::imm_logic_from_u64(ctx, pattern2_0) { - // Rule at src/isa/aarch64/inst.isle line 1531. + // Rule at src/isa/aarch64/inst.isle line 1559. let expr0_0 = ALUOp::Orr64; let expr1_0 = C::zero_reg(ctx); let expr2_0 = constructor_alu_rr_imm_logic(ctx, &expr0_0, expr1_0, pattern3_0)?; return Some(expr2_0); } if let Some(pattern3_0) = C::move_wide_const_from_u64(ctx, pattern2_0) { - // Rule at src/isa/aarch64/inst.isle line 1523. + // Rule at src/isa/aarch64/inst.isle line 1551. let expr0_0 = OperandSize::Size64; let expr1_0 = constructor_movz(ctx, pattern3_0, &expr0_0)?; return Some(expr1_0); } if let Some(pattern3_0) = C::move_wide_const_from_negated_u64(ctx, pattern2_0) { - // Rule at src/isa/aarch64/inst.isle line 1527. + // Rule at src/isa/aarch64/inst.isle line 1555. let expr0_0 = OperandSize::Size64; let expr1_0 = constructor_movn(ctx, pattern3_0, &expr0_0)?; return Some(expr1_0); } - // Rule at src/isa/aarch64/inst.isle line 1538. + // Rule at src/isa/aarch64/inst.isle line 1566. let expr0_0 = C::load_constant64_full(ctx, pattern2_0); return Some(expr0_0); } @@ -1634,28 +1731,18 @@ pub fn constructor_put_in_reg_sext64(ctx: &mut C, arg0: Value) -> Op let pattern0_0 = arg0; let pattern1_0 = C::value_type(ctx, pattern0_0); if pattern1_0 == I64 { - // Rule at src/isa/aarch64/inst.isle line 1552. + // Rule at src/isa/aarch64/inst.isle line 1577. let expr0_0 = C::put_in_reg(ctx, pattern0_0); return Some(expr0_0); } if let Some(pattern2_0) = C::fits_in_32(ctx, pattern1_0) { - // Rule at src/isa/aarch64/inst.isle line 1545. - let expr0_0: Type = I32; - let expr1_0 = C::temp_writable_reg(ctx, expr0_0); - let expr2_0 = C::put_in_reg(ctx, pattern0_0); - let expr3_0: bool = true; - let expr4_0 = C::ty_bits(ctx, pattern2_0); - let expr5_0: u8 = 64; - let expr6_0 = MInst::Extend { - rd: expr1_0, - rn: expr2_0, - signed: expr3_0, - from_bits: expr4_0, - to_bits: expr5_0, - }; - let expr7_0 = C::emit(ctx, &expr6_0); - let expr8_0 = C::writable_reg_to_reg(ctx, expr1_0); - return Some(expr8_0); + // Rule at src/isa/aarch64/inst.isle line 1573. + let expr0_0 = C::put_in_reg(ctx, pattern0_0); + let expr1_0: bool = true; + let expr2_0 = C::ty_bits(ctx, pattern2_0); + let expr3_0: u8 = 64; + let expr4_0 = constructor_extend(ctx, expr0_0, expr1_0, expr2_0, expr3_0)?; + return Some(expr4_0); } return None; } @@ -1665,28 +1752,18 @@ pub fn constructor_put_in_reg_zext64(ctx: &mut C, arg0: Value) -> Op let pattern0_0 = arg0; let pattern1_0 = C::value_type(ctx, pattern0_0); if pattern1_0 == I64 { - // Rule at src/isa/aarch64/inst.isle line 1563. + // Rule at src/isa/aarch64/inst.isle line 1585. let expr0_0 = C::put_in_reg(ctx, pattern0_0); return Some(expr0_0); } if let Some(pattern2_0) = C::fits_in_32(ctx, pattern1_0) { - // Rule at src/isa/aarch64/inst.isle line 1556. - let expr0_0: Type = I32; - let expr1_0 = C::temp_writable_reg(ctx, expr0_0); - let expr2_0 = C::put_in_reg(ctx, pattern0_0); - let expr3_0: bool = false; - let expr4_0 = C::ty_bits(ctx, pattern2_0); - let expr5_0: u8 = 64; - let expr6_0 = MInst::Extend { - rd: expr1_0, - rn: expr2_0, - signed: expr3_0, - from_bits: expr4_0, - to_bits: expr5_0, - }; - let expr7_0 = C::emit(ctx, &expr6_0); - let expr8_0 = C::writable_reg_to_reg(ctx, expr1_0); - return Some(expr8_0); + // Rule at src/isa/aarch64/inst.isle line 1581. + let expr0_0 = C::put_in_reg(ctx, pattern0_0); + let expr1_0: bool = false; + let expr2_0 = C::ty_bits(ctx, pattern2_0); + let expr3_0: u8 = 64; + let expr4_0 = constructor_extend(ctx, expr0_0, expr1_0, expr2_0, expr3_0)?; + return Some(expr4_0); } return None; } @@ -1694,7 +1771,7 @@ pub fn constructor_put_in_reg_zext64(ctx: &mut C, arg0: Value) -> Op // Generated as internal constructor for term trap_if_zero_divisor. pub fn constructor_trap_if_zero_divisor(ctx: &mut C, arg0: Reg) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/aarch64/inst.isle line 1568. + // Rule at src/isa/aarch64/inst.isle line 1590. let expr0_0 = C::cond_br_zero(ctx, pattern0_0); let expr1_0 = C::trap_code_division_by_zero(ctx); let expr2_0 = MInst::TrapIf { @@ -1709,12 +1786,12 @@ pub fn constructor_trap_if_zero_divisor(ctx: &mut C, arg0: Reg) -> O pub fn constructor_size_from_ty(ctx: &mut C, arg0: Type) -> Option { let pattern0_0 = arg0; if pattern0_0 == I64 { - // Rule at src/isa/aarch64/inst.isle line 1574. + // Rule at src/isa/aarch64/inst.isle line 1596. let expr0_0 = OperandSize::Size64; return Some(expr0_0); } if let Some(pattern1_0) = C::fits_in_32(ctx, pattern0_0) { - // Rule at src/isa/aarch64/inst.isle line 1573. + // Rule at src/isa/aarch64/inst.isle line 1595. let expr0_0 = OperandSize::Size32; return Some(expr0_0); } @@ -1731,7 +1808,7 @@ pub fn constructor_trap_if_div_overflow( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1580. + // Rule at src/isa/aarch64/inst.isle line 1602. let expr0_0 = constructor_adds_op(ctx, pattern0_0)?; let expr1_0 = C::writable_zero_reg(ctx); let expr2_0: u8 = 1; @@ -1775,12 +1852,12 @@ pub fn constructor_trap_if_div_overflow( pub fn constructor_adds_op(ctx: &mut C, arg0: Type) -> Option { let pattern0_0 = arg0; if pattern0_0 == I64 { - // Rule at src/isa/aarch64/inst.isle line 1600. + // Rule at src/isa/aarch64/inst.isle line 1622. let expr0_0 = ALUOp::AddS64; return Some(expr0_0); } if let Some(pattern1_0) = C::fits_in_32(ctx, pattern0_0) { - // Rule at src/isa/aarch64/inst.isle line 1599. + // Rule at src/isa/aarch64/inst.isle line 1621. let expr0_0 = ALUOp::AddS32; return Some(expr0_0); } @@ -1826,78 +1903,196 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { - let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/aarch64/lower.isle line 94. - let expr0_0 = C::put_in_regs(ctx, pattern7_0); - let expr1_0: usize = 0; - let expr2_0 = C::value_regs_get(ctx, expr0_0, expr1_0); - let expr3_0: usize = 1; - let expr4_0 = C::value_regs_get(ctx, expr0_0, expr3_0); - let expr5_0 = C::put_in_regs(ctx, pattern7_1); - let expr6_0: usize = 0; - let expr7_0 = C::value_regs_get(ctx, expr5_0, expr6_0); - let expr8_0: usize = 1; - let expr9_0 = C::value_regs_get(ctx, expr5_0, expr8_0); - let expr10_0 = constructor_add64_with_flags(ctx, expr2_0, expr7_0)?; - let expr11_0 = constructor_adc64(ctx, expr4_0, expr9_0)?; - let expr12_0 = constructor_with_flags(ctx, &expr10_0, &expr11_0)?; - return Some(expr12_0); + match &pattern4_0 { + &InstructionData::Binary { + opcode: ref pattern5_0, + args: ref pattern5_1, + } => { + match &pattern5_0 { + &Opcode::Iadd => { + let (pattern7_0, pattern7_1) = + C::unpack_value_array_2(ctx, &pattern5_1); + // Rule at src/isa/aarch64/lower.isle line 94. + let expr0_0 = C::put_in_regs(ctx, pattern7_0); + let expr1_0: usize = 0; + let expr2_0 = C::value_regs_get(ctx, expr0_0, expr1_0); + let expr3_0: usize = 1; + let expr4_0 = C::value_regs_get(ctx, expr0_0, expr3_0); + let expr5_0 = C::put_in_regs(ctx, pattern7_1); + let expr6_0: usize = 0; + let expr7_0 = C::value_regs_get(ctx, expr5_0, expr6_0); + let expr8_0: usize = 1; + let expr9_0 = C::value_regs_get(ctx, expr5_0, expr8_0); + let expr10_0 = constructor_add64_with_flags(ctx, expr2_0, expr7_0)?; + let expr11_0 = constructor_adc64(ctx, expr4_0, expr9_0)?; + let expr12_0 = constructor_with_flags(ctx, &expr10_0, &expr11_0)?; + return Some(expr12_0); + } + &Opcode::Isub => { + let (pattern7_0, pattern7_1) = + C::unpack_value_array_2(ctx, &pattern5_1); + // Rule at src/isa/aarch64/lower.isle line 145. + let expr0_0 = C::put_in_regs(ctx, pattern7_0); + let expr1_0: usize = 0; + let expr2_0 = C::value_regs_get(ctx, expr0_0, expr1_0); + let expr3_0: usize = 1; + let expr4_0 = C::value_regs_get(ctx, expr0_0, expr3_0); + let expr5_0 = C::put_in_regs(ctx, pattern7_1); + let expr6_0: usize = 0; + let expr7_0 = C::value_regs_get(ctx, expr5_0, expr6_0); + let expr8_0: usize = 1; + let expr9_0 = C::value_regs_get(ctx, expr5_0, expr8_0); + let expr10_0 = constructor_sub64_with_flags(ctx, expr2_0, expr7_0)?; + let expr11_0 = constructor_sbc64(ctx, expr4_0, expr9_0)?; + let expr12_0 = constructor_with_flags(ctx, &expr10_0, &expr11_0)?; + return Some(expr12_0); + } + &Opcode::Imul => { + let (pattern7_0, pattern7_1) = + C::unpack_value_array_2(ctx, &pattern5_1); + // Rule at src/isa/aarch64/lower.isle line 200. + let expr0_0 = C::put_in_regs(ctx, pattern7_0); + let expr1_0: usize = 0; + let expr2_0 = C::value_regs_get(ctx, expr0_0, expr1_0); + let expr3_0: usize = 1; + let expr4_0 = C::value_regs_get(ctx, expr0_0, expr3_0); + let expr5_0 = C::put_in_regs(ctx, pattern7_1); + let expr6_0: usize = 0; + let expr7_0 = C::value_regs_get(ctx, expr5_0, expr6_0); + let expr8_0: usize = 1; + let expr9_0 = C::value_regs_get(ctx, expr5_0, expr8_0); + let expr10_0 = ALUOp::UMulH; + let expr11_0 = constructor_alu_rrr(ctx, &expr10_0, expr2_0, expr7_0)?; + let expr12_0 = ALUOp3::MAdd64; + let expr13_0 = + constructor_alu_rrrr(ctx, &expr12_0, expr2_0, expr9_0, expr11_0)?; + let expr14_0 = ALUOp3::MAdd64; + let expr15_0 = + constructor_alu_rrrr(ctx, &expr14_0, expr4_0, expr7_0, expr13_0)?; + let expr16_0 = ALUOp3::MAdd64; + let expr17_0 = C::zero_reg(ctx); + let expr18_0 = + constructor_alu_rrrr(ctx, &expr16_0, expr2_0, expr7_0, expr17_0)?; + let expr19_0 = C::value_regs(ctx, expr18_0, expr15_0); + return Some(expr19_0); + } + _ => {} } - &Opcode::Isub => { - let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/aarch64/lower.isle line 145. - let expr0_0 = C::put_in_regs(ctx, pattern7_0); - let expr1_0: usize = 0; - let expr2_0 = C::value_regs_get(ctx, expr0_0, expr1_0); - let expr3_0: usize = 1; - let expr4_0 = C::value_regs_get(ctx, expr0_0, expr3_0); - let expr5_0 = C::put_in_regs(ctx, pattern7_1); - let expr6_0: usize = 0; - let expr7_0 = C::value_regs_get(ctx, expr5_0, expr6_0); - let expr8_0: usize = 1; - let expr9_0 = C::value_regs_get(ctx, expr5_0, expr8_0); - let expr10_0 = constructor_sub64_with_flags(ctx, expr2_0, expr7_0)?; - let expr11_0 = constructor_sbc64(ctx, expr4_0, expr9_0)?; - let expr12_0 = constructor_with_flags(ctx, &expr10_0, &expr11_0)?; - return Some(expr12_0); - } - &Opcode::Imul => { - let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); - // Rule at src/isa/aarch64/lower.isle line 200. - let expr0_0 = C::put_in_regs(ctx, pattern7_0); - let expr1_0: usize = 0; - let expr2_0 = C::value_regs_get(ctx, expr0_0, expr1_0); - let expr3_0: usize = 1; - let expr4_0 = C::value_regs_get(ctx, expr0_0, expr3_0); - let expr5_0 = C::put_in_regs(ctx, pattern7_1); - let expr6_0: usize = 0; - let expr7_0 = C::value_regs_get(ctx, expr5_0, expr6_0); - let expr8_0: usize = 1; - let expr9_0 = C::value_regs_get(ctx, expr5_0, expr8_0); - let expr10_0 = ALUOp::UMulH; - let expr11_0 = constructor_alu_rrr(ctx, &expr10_0, expr2_0, expr7_0)?; - let expr12_0 = ALUOp3::MAdd64; - let expr13_0 = - constructor_alu_rrrr(ctx, &expr12_0, expr2_0, expr9_0, expr11_0)?; - let expr14_0 = ALUOp3::MAdd64; - let expr15_0 = - constructor_alu_rrrr(ctx, &expr14_0, expr4_0, expr7_0, expr13_0)?; - let expr16_0 = ALUOp3::MAdd64; - let expr17_0 = C::zero_reg(ctx); - let expr18_0 = - constructor_alu_rrrr(ctx, &expr16_0, expr2_0, expr7_0, expr17_0)?; - let expr19_0 = C::value_regs(ctx, expr18_0, expr15_0); - return Some(expr19_0); - } - _ => {} } + &InstructionData::Unary { + opcode: ref pattern5_0, + arg: pattern5_1, + } => { + match &pattern5_0 { + &Opcode::Uextend => { + if let Some(pattern7_0) = C::def_inst(ctx, pattern5_1) { + let pattern8_0 = C::inst_data(ctx, pattern7_0); + if let &InstructionData::BinaryImm8 { + opcode: ref pattern9_0, + arg: pattern9_1, + imm: pattern9_2, + } = &pattern8_0 + { + if let &Opcode::Extractlane = &pattern9_0 { + let pattern11_0 = C::value_type(ctx, pattern9_1); + let pattern12_0 = C::u8_from_uimm8(ctx, pattern9_2); + // Rule at src/isa/aarch64/lower.isle line 533. + let expr0_0 = C::put_in_reg(ctx, pattern9_1); + let expr1_0 = constructor_vector_size(ctx, pattern11_0)?; + let expr2_0 = constructor_mov_from_vec( + ctx, + expr0_0, + pattern12_0, + &expr1_0, + )?; + let expr3_0: Type = I64; + let expr4_0: u64 = 0; + let expr5_0 = constructor_imm(ctx, expr3_0, expr4_0)?; + let expr6_0 = C::value_regs(ctx, expr2_0, expr5_0); + return Some(expr6_0); + } + } + } + // Rule at src/isa/aarch64/lower.isle line 528. + let expr0_0 = constructor_put_in_reg_zext64(ctx, pattern5_1)?; + let expr1_0: Type = I64; + let expr2_0: u64 = 0; + let expr3_0 = constructor_imm(ctx, expr1_0, expr2_0)?; + let expr4_0 = C::value_regs(ctx, expr0_0, expr3_0); + return Some(expr4_0); + } + &Opcode::Sextend => { + if let Some(pattern7_0) = C::def_inst(ctx, pattern5_1) { + let pattern8_0 = C::inst_data(ctx, pattern7_0); + if let &InstructionData::BinaryImm8 { + opcode: ref pattern9_0, + arg: pattern9_1, + imm: pattern9_2, + } = &pattern8_0 + { + if let &Opcode::Extractlane = &pattern9_0 { + let pattern11_0 = C::value_type(ctx, pattern9_1); + if pattern11_0 == I64X2 { + let pattern13_0 = C::u8_from_uimm8(ctx, pattern9_2); + // Rule at src/isa/aarch64/lower.isle line 581. + let expr0_0 = C::put_in_reg(ctx, pattern9_1); + let expr1_0 = VectorSize::Size64x2; + let expr2_0 = constructor_mov_from_vec( + ctx, + expr0_0, + pattern13_0, + &expr1_0, + )?; + let expr3_0 = ALUOp::Asr64; + let expr4_0: u8 = 63; + let expr5_0 = C::imm_shift_from_u8(ctx, expr4_0); + let expr6_0 = constructor_alu_rr_imm_shift( + ctx, &expr3_0, expr2_0, expr5_0, + )?; + let expr7_0 = C::value_regs(ctx, expr2_0, expr6_0); + return Some(expr7_0); + } + if let Some(()) = C::not_i64x2(ctx, pattern11_0) { + let pattern13_0 = C::u8_from_uimm8(ctx, pattern9_2); + // Rule at src/isa/aarch64/lower.isle line 568. + let expr0_0 = C::put_in_reg(ctx, pattern9_1); + let expr1_0 = + constructor_vector_size(ctx, pattern11_0)?; + let expr2_0: Type = I64; + let expr3_0 = constructor_size_from_ty(ctx, expr2_0)?; + let expr4_0 = constructor_mov_from_vec_signed( + ctx, + expr0_0, + pattern13_0, + &expr1_0, + &expr3_0, + )?; + let expr5_0 = ALUOp::Asr64; + let expr6_0: u8 = 63; + let expr7_0 = C::imm_shift_from_u8(ctx, expr6_0); + let expr8_0 = constructor_alu_rr_imm_shift( + ctx, &expr5_0, expr4_0, expr7_0, + )?; + let expr9_0 = C::value_regs(ctx, expr4_0, expr8_0); + return Some(expr9_0); + } + } + } + } + // Rule at src/isa/aarch64/lower.isle line 556. + let expr0_0 = constructor_put_in_reg_sext64(ctx, pattern5_1)?; + let expr1_0 = ALUOp::Asr64; + let expr2_0: u8 = 63; + let expr3_0 = C::imm_shift_from_u8(ctx, expr2_0); + let expr4_0 = + constructor_alu_rr_imm_shift(ctx, &expr1_0, expr0_0, expr3_0)?; + let expr5_0 = C::value_regs(ctx, expr0_0, expr4_0); + return Some(expr5_0); + } + _ => {} + } + } + _ => {} } } if pattern2_0 == I16X8 { @@ -3035,14 +3230,100 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { - if let &Opcode::Ineg = &pattern5_0 { - // Rule at src/isa/aarch64/lower.isle line 186. - let expr0_0 = constructor_isub_op(ctx, pattern3_0)?; - let expr1_0 = C::zero_reg(ctx); - let expr2_0 = C::put_in_reg(ctx, pattern5_1); - let expr3_0 = constructor_alu_rrr(ctx, &expr0_0, expr1_0, expr2_0)?; - let expr4_0 = C::value_reg(ctx, expr3_0); - return Some(expr4_0); + match &pattern5_0 { + &Opcode::Ineg => { + // Rule at src/isa/aarch64/lower.isle line 186. + let expr0_0 = constructor_isub_op(ctx, pattern3_0)?; + let expr1_0 = C::zero_reg(ctx); + let expr2_0 = C::put_in_reg(ctx, pattern5_1); + let expr3_0 = constructor_alu_rrr(ctx, &expr0_0, expr1_0, expr2_0)?; + let expr4_0 = C::value_reg(ctx, expr3_0); + return Some(expr4_0); + } + &Opcode::Uextend => { + if let Some(pattern7_0) = C::def_inst(ctx, pattern5_1) { + let pattern8_0 = C::inst_data(ctx, pattern7_0); + if let &InstructionData::BinaryImm8 { + opcode: ref pattern9_0, + arg: pattern9_1, + imm: pattern9_2, + } = &pattern8_0 + { + if let &Opcode::Extractlane = &pattern9_0 { + let pattern11_0 = C::value_type(ctx, pattern9_1); + let pattern12_0 = C::u8_from_uimm8(ctx, pattern9_2); + // Rule at src/isa/aarch64/lower.isle line 515. + let expr0_0 = C::put_in_reg(ctx, pattern9_1); + let expr1_0 = constructor_vector_size(ctx, pattern11_0)?; + let expr2_0 = constructor_mov_from_vec( + ctx, + expr0_0, + pattern12_0, + &expr1_0, + )?; + let expr3_0 = C::value_reg(ctx, expr2_0); + return Some(expr3_0); + } + } + } + let pattern7_0 = C::value_type(ctx, pattern5_1); + if let Some(pattern8_0) = C::sinkable_atomic_load(ctx, pattern5_1) { + // Rule at src/isa/aarch64/lower.isle line 522. + let expr0_0 = C::sink_atomic_load(ctx, &pattern8_0); + let expr1_0 = constructor_load_acquire(ctx, pattern7_0, expr0_0)?; + let expr2_0 = C::value_reg(ctx, expr1_0); + return Some(expr2_0); + } + // Rule at src/isa/aarch64/lower.isle line 510. + let expr0_0 = C::put_in_reg(ctx, pattern5_1); + let expr1_0: bool = false; + let expr2_0 = C::ty_bits(ctx, pattern7_0); + let expr3_0 = C::ty_bits(ctx, pattern3_0); + let expr4_0 = + constructor_extend(ctx, expr0_0, expr1_0, expr2_0, expr3_0)?; + let expr5_0 = C::value_reg(ctx, expr4_0); + return Some(expr5_0); + } + &Opcode::Sextend => { + if let Some(pattern7_0) = C::def_inst(ctx, pattern5_1) { + let pattern8_0 = C::inst_data(ctx, pattern7_0); + if let &InstructionData::BinaryImm8 { + opcode: ref pattern9_0, + arg: pattern9_1, + imm: pattern9_2, + } = &pattern8_0 + { + if let &Opcode::Extractlane = &pattern9_0 { + let pattern11_0 = C::value_type(ctx, pattern9_1); + let pattern12_0 = C::u8_from_uimm8(ctx, pattern9_2); + // Rule at src/isa/aarch64/lower.isle line 547. + let expr0_0 = C::put_in_reg(ctx, pattern9_1); + let expr1_0 = constructor_vector_size(ctx, pattern11_0)?; + let expr2_0 = constructor_size_from_ty(ctx, pattern3_0)?; + let expr3_0 = constructor_mov_from_vec_signed( + ctx, + expr0_0, + pattern12_0, + &expr1_0, + &expr2_0, + )?; + let expr4_0 = C::value_reg(ctx, expr3_0); + return Some(expr4_0); + } + } + } + let pattern7_0 = C::value_type(ctx, pattern5_1); + // Rule at src/isa/aarch64/lower.isle line 542. + let expr0_0 = C::put_in_reg(ctx, pattern5_1); + let expr1_0: bool = true; + let expr2_0 = C::ty_bits(ctx, pattern7_0); + let expr3_0 = C::ty_bits(ctx, pattern3_0); + let expr4_0 = + constructor_extend(ctx, expr0_0, expr1_0, expr2_0, expr3_0)?; + let expr5_0 = C::value_reg(ctx, expr4_0); + return Some(expr5_0); + } + _ => {} } } _ => {} diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index e097836b85..43a7167a93 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -77,112 +77,7 @@ pub(crate) fn lower_insn_to_regs>( Opcode::Udiv | Opcode::Sdiv | Opcode::Urem | Opcode::Srem => implemented_in_isle(ctx), - Opcode::Uextend | Opcode::Sextend => { - let output_ty = ty.unwrap(); - - if output_ty.is_vector() { - return Err(CodegenError::Unsupported(format!( - "{}: Unsupported type: {:?}", - op, output_ty - ))); - } - - if op == Opcode::Uextend { - let inputs = ctx.get_input_as_source_or_const(inputs[0].insn, inputs[0].input); - if let Some((atomic_load, 0)) = inputs.inst { - if ctx.data(atomic_load).opcode() == Opcode::AtomicLoad { - let output_ty = ty.unwrap(); - assert!(output_ty == I32 || output_ty == I64); - let rt = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - emit_atomic_load(ctx, rt, atomic_load); - ctx.sink_inst(atomic_load); - return Ok(()); - } - } - } - let input_ty = ctx.input_ty(insn, 0); - let from_bits = ty_bits(input_ty) as u8; - let to_bits = ty_bits(output_ty) as u8; - let to_bits = std::cmp::max(32, to_bits); - assert!(from_bits <= to_bits); - - let signed = op == Opcode::Sextend; - let dst = get_output_reg(ctx, outputs[0]); - let src = - if let Some(extract_insn) = maybe_input_insn(ctx, inputs[0], Opcode::Extractlane) { - put_input_in_regs( - ctx, - InsnInput { - insn: extract_insn, - input: 0, - }, - ) - } else { - put_input_in_regs(ctx, inputs[0]) - }; - - let needs_extend = from_bits < to_bits && to_bits <= 64; - // For i128, we want to extend the lower half, except if it is already 64 bits. - let needs_lower_extend = to_bits > 64 && from_bits < 64; - let pass_through_lower = to_bits > 64 && !needs_lower_extend; - - if needs_extend || needs_lower_extend { - let rn = src.regs()[0]; - let rd = dst.regs()[0]; - - if let Some(extract_insn) = maybe_input_insn(ctx, inputs[0], Opcode::Extractlane) { - let idx = - if let InstructionData::BinaryImm8 { imm, .. } = ctx.data(extract_insn) { - *imm - } else { - unreachable!(); - }; - - let size = VectorSize::from_ty(ctx.input_ty(extract_insn, 0)); - - if signed { - let scalar_size = OperandSize::from_ty(output_ty); - - ctx.emit(Inst::MovFromVecSigned { - rd, - rn, - idx, - size, - scalar_size, - }); - } else { - ctx.emit(Inst::MovFromVec { rd, rn, idx, size }); - } - } else { - // If we reach this point, we weren't able to incorporate the extend as - // a register-mode on another instruction, so we have a 'None' - // narrow-value/extend mode here, and we emit the explicit instruction. - let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); - ctx.emit(Inst::Extend { - rd, - rn, - signed, - from_bits, - to_bits: std::cmp::min(64, to_bits), - }); - } - } else if pass_through_lower { - ctx.emit(Inst::gen_move(dst.regs()[0], src.regs()[0], I64)); - } - - if output_ty == I128 { - if signed { - ctx.emit(Inst::AluRRImmShift { - alu_op: ALUOp::Asr64, - rd: dst.regs()[1], - rn: dst.regs()[0].to_reg(), - immshift: ImmShift::maybe_from_u64(63).unwrap(), - }); - } else { - lower_constant_u64(ctx, dst.regs()[1], 0); - } - } - } + Opcode::Uextend | Opcode::Sextend => implemented_in_isle(ctx), Opcode::Bnot => { let out_regs = get_output_reg(ctx, outputs[0]); @@ -1147,7 +1042,8 @@ pub(crate) fn lower_insn_to_regs>( Opcode::AtomicLoad => { let rt = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - emit_atomic_load(ctx, rt, insn); + let inst = emit_atomic_load(ctx, rt, insn); + ctx.emit(inst); } Opcode::AtomicStore => { diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index 0fb30d9530..2961b1487b 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -9,7 +9,7 @@ use super::{ }; use crate::isa::x64::inst::args::SyntheticAmode; use crate::isa::x64::inst::regs; -use crate::isa::x64::settings as x64_settings; +use crate::isa::x64::settings::Flags; use crate::machinst::isle::*; use crate::{ ir::{immediates::*, types::*, Inst, InstructionData, Opcode, TrapCode, Value, ValueList}, @@ -19,9 +19,8 @@ use crate::{ }, x64_map_regs, }, - machinst::{get_output_reg, InsnInput, InsnOutput, LowerCtx, RegRenamer}, + machinst::{InsnInput, InsnOutput, LowerCtx}, }; -use smallvec::SmallVec; use std::convert::TryFrom; pub struct SinkableLoad { @@ -33,78 +32,24 @@ pub struct SinkableLoad { /// The main entry point for lowering with ISLE. pub(crate) fn lower( lower_ctx: &mut C, - isa_flags: &x64_settings::Flags, + isa_flags: &Flags, outputs: &[InsnOutput], inst: Inst, ) -> Result<(), ()> where C: LowerCtx, { - // TODO: reuse the ISLE context across lowerings so we can reuse its - // internal heap allocations. - let mut isle_ctx = IsleContext::new(lower_ctx, isa_flags); - - let temp_regs = generated_code::constructor_lower(&mut isle_ctx, inst).ok_or(())?; - let mut temp_regs = temp_regs.regs().iter(); - - #[cfg(debug_assertions)] - { - let all_dsts_len = outputs - .iter() - .map(|out| get_output_reg(isle_ctx.lower_ctx, *out).len()) - .sum(); - debug_assert_eq!( - temp_regs.len(), - all_dsts_len, - "the number of temporary registers and destination registers do \ - not match ({} != {}); ensure the correct registers are being \ - returned.", - temp_regs.len(), - all_dsts_len, - ); - } - - // The ISLE generated code emits its own registers to define the - // instruction's lowered values in. We rename those registers to the - // registers they were assigned when their value was used as an operand in - // earlier lowerings. - let mut renamer = RegRenamer::default(); - for output in outputs { - let dsts = get_output_reg(isle_ctx.lower_ctx, *output); - for (temp, dst) in temp_regs.by_ref().zip(dsts.regs()) { - renamer.add_rename(*temp, dst.to_reg()); - } - } - - for mut inst in isle_ctx.into_emitted_insts() { - x64_map_regs(&mut inst, &renamer); - lower_ctx.emit(inst); - } - - Ok(()) + lower_common( + lower_ctx, + isa_flags, + outputs, + inst, + |cx, insn| generated_code::constructor_lower(cx, insn), + x64_map_regs, + ) } -pub struct IsleContext<'a, C> { - lower_ctx: &'a mut C, - isa_flags: &'a x64_settings::Flags, - emitted_insts: SmallVec<[MInst; 6]>, -} - -impl<'a, C> IsleContext<'a, C> { - pub fn new(lower_ctx: &'a mut C, isa_flags: &'a x64_settings::Flags) -> Self { - IsleContext { - lower_ctx, - isa_flags, - emitted_insts: SmallVec::new(), - } - } - - pub fn into_emitted_insts(self) -> SmallVec<[MInst; 6]> { - self.emitted_insts - } -} - -impl<'a, C> generated_code::Context for IsleContext<'a, C> +impl generated_code::Context for IsleContext<'_, C, Flags, 6> where C: LowerCtx, { diff --git a/cranelift/codegen/src/machinst/isle.rs b/cranelift/codegen/src/machinst/isle.rs index c8d702a062..7a7ddcf6f2 100644 --- a/cranelift/codegen/src/machinst/isle.rs +++ b/cranelift/codegen/src/machinst/isle.rs @@ -1,5 +1,7 @@ -use crate::ir::Value; +use crate::ir::{Inst, Value}; +use crate::machinst::{get_output_reg, InsnOutput, LowerCtx, MachInst, RegRenamer}; use regalloc::{Reg, Writable}; +use smallvec::SmallVec; pub type Unit = (); pub type ValueSlice<'a> = &'a [Value]; @@ -8,6 +10,8 @@ pub type ValueArray3 = [Value; 3]; pub type WritableReg = Writable; pub type ValueRegs = crate::machinst::ValueRegs; +/// Helper macro to define methods in `prelude.isle` within `impl Context for +/// ...` for each backend. These methods are shared amongst all backends. #[macro_export] #[doc(hidden)] macro_rules! isle_prelude_methods { @@ -228,6 +232,102 @@ macro_rules! isle_prelude_methods { }; } +/// This structure is used to implement the ISLE-generated `Context` trait and +/// internally has a temporary reference to a machinst `LowerCtx`. +pub(crate) struct IsleContext<'a, C: LowerCtx, F, const N: usize> +where + [C::I; N]: smallvec::Array, +{ + pub lower_ctx: &'a mut C, + pub isa_flags: &'a F, + pub emitted_insts: SmallVec<[C::I; N]>, +} + +/// Shared lowering code amongst all backends for doing ISLE-based lowering. +/// +/// The `isle_lower` argument here is an ISLE-generated function for `lower` and +/// then this function otherwise handles register mapping and such around the +/// lowering. +pub(crate) fn lower_common( + lower_ctx: &mut C, + isa_flags: &F, + outputs: &[InsnOutput], + inst: Inst, + isle_lower: fn(&mut IsleContext<'_, C, F, N>, Inst) -> Option, + map_regs: fn(&mut C::I, &RegRenamer), +) -> Result<(), ()> +where + C: LowerCtx, + [C::I; N]: smallvec::Array, +{ + // TODO: reuse the ISLE context across lowerings so we can reuse its + // internal heap allocations. + let mut isle_ctx = IsleContext { + lower_ctx, + isa_flags, + emitted_insts: SmallVec::new(), + }; + + let temp_regs = isle_lower(&mut isle_ctx, inst).ok_or(())?; + let mut temp_regs = temp_regs.regs().iter(); + + #[cfg(debug_assertions)] + { + let all_dsts_len = outputs + .iter() + .map(|out| get_output_reg(isle_ctx.lower_ctx, *out).len()) + .sum(); + debug_assert_eq!( + temp_regs.len(), + all_dsts_len, + "the number of temporary registers and destination registers do \ + not match ({} != {}); ensure the correct registers are being \ + returned.", + temp_regs.len(), + all_dsts_len, + ); + } + + // The ISLE generated code emits its own registers to define the + // instruction's lowered values in. We rename those registers to the + // registers they were assigned when their value was used as an operand in + // earlier lowerings. + let mut renamer = RegRenamer::default(); + for output in outputs { + let dsts = get_output_reg(isle_ctx.lower_ctx, *output); + let ty = isle_ctx.lower_ctx.output_ty(output.insn, output.output); + let (_, tys) = ::rc_for_type(ty).unwrap(); + for ((temp, dst), ty) in temp_regs.by_ref().zip(dsts.regs()).zip(tys) { + renamer.add_rename(*temp, dst.to_reg(), *ty); + } + } + for inst in isle_ctx.emitted_insts.iter_mut() { + map_regs(inst, &renamer); + } + + // If any renamed register wasn't actually defined in the ISLE-generated + // instructions then what we're actually doing is "renaming" an input to a + // new name which requires manually inserting a `mov` instruction. Note that + // this typically doesn't happen and is only here for cases where the input + // is sometimes passed through unmodified to the output, such as + // zero-extending a 64-bit input to a 128-bit output which doesn't actually + // change the input and simply produces another zero'd register. + for (old, new, ty) in renamer.unmapped_defs() { + isle_ctx + .lower_ctx + .emit(::gen_move(Writable::from_reg(new), old, ty)); + } + + // Once everything is remapped we forward all emitted instructions to the + // `lower_ctx`. Note that this happens after the synthetic mov's above in + // case any of these instruction use those movs. + for inst in isle_ctx.emitted_insts { + lower_ctx.emit(inst); + } + + Ok(()) +} + #[inline(never)] #[cold] pub fn out_of_line_panic(msg: &str) -> ! { diff --git a/cranelift/codegen/src/machinst/regmapping.rs b/cranelift/codegen/src/machinst/regmapping.rs index 0f132e47b4..4b51c426bd 100644 --- a/cranelift/codegen/src/machinst/regmapping.rs +++ b/cranelift/codegen/src/machinst/regmapping.rs @@ -1,5 +1,7 @@ +use crate::ir::Type; use regalloc::{Reg, RegUsageMapper, Writable}; use smallvec::SmallVec; +use std::cell::Cell; // Define our own register-mapping trait so we can do arbitrary register // renaming that are more free form than what `regalloc` constrains us to with @@ -48,36 +50,59 @@ where } } -#[derive(Default)] +#[derive(Debug, Default)] pub struct RegRenamer { - // Map of `(old, new)` register names. Use a `SmallVec` because we typically - // only have one or two renamings. - renames: SmallVec<[(Reg, Reg); 2]>, + // Map of `(old, new, used, ty)` register names. Use a `SmallVec` because + // we typically only have one or two renamings. + // + // The `used` flag indicates whether the mapping has been used for + // `get_def`, later used afterwards during `unmapped_defs` to know what + // moves need to be generated. + renames: SmallVec<[(Reg, Reg, Cell, Type); 2]>, } impl RegRenamer { - pub fn add_rename(&mut self, old: Reg, new: Reg) { - self.renames.push((old, new)); + /// Adds a new mapping which means that `old` reg should now be called + /// `new`. The type of `old` is `ty` as specified. + pub fn add_rename(&mut self, old: Reg, new: Reg, ty: Type) { + self.renames.push((old, new, Cell::new(false), ty)); } - fn get_rename(&self, reg: Reg) -> Option { - self.renames - .iter() - .find(|(old, _)| reg == *old) - .map(|(_, new)| *new) + fn get_rename(&self, reg: Reg, set_used_def: bool) -> Option { + let (_, new, used_def, _) = self.renames.iter().find(|(old, _, _, _)| reg == *old)?; + used_def.set(used_def.get() || set_used_def); + Some(*new) + } + + /// Returns the list of register mappings, with their type, which were not + /// actually mapped. + /// + /// This list is used because it means that the `old` name for the register + /// was never actually defined, so to correctly rename this register the + /// caller needs to move `old` into `new`. + /// + /// This yields tuples of `(old, new, ty)`. + pub fn unmapped_defs(&self) -> impl Iterator + '_ { + self.renames.iter().filter_map(|(old, new, used_def, ty)| { + if used_def.get() { + None + } else { + Some((*old, *new, *ty)) + } + }) } } impl RegMapper for RegRenamer { fn get_use(&self, reg: Reg) -> Option { - self.get_rename(reg) + self.get_rename(reg, false) } fn get_def(&self, reg: Reg) -> Option { - self.get_rename(reg) + self.get_rename(reg, true) } fn get_mod(&self, reg: Reg) -> Option { - self.get_rename(reg) + self.get_rename(reg, false) } } diff --git a/cranelift/filetests/filetests/isa/aarch64/extend-op.clif b/cranelift/filetests/filetests/isa/aarch64/extend-op.clif index f8e25d7814..c3910ccaab 100644 --- a/cranelift/filetests/filetests/isa/aarch64/extend-op.clif +++ b/cranelift/filetests/filetests/isa/aarch64/extend-op.clif @@ -106,3 +106,211 @@ block0(v0: i8): ; check: sxtb x0, w0 ; nextln: asr x1, x0, #63 ; nextln: ret + +function %i8x16_uextend_i16(i8x16) -> i16 { +block0(v0: i8x16): + v1 = extractlane v0, 1 + v2 = uextend.i16 v1 + return v2 +} + +; check: umov w0, v0.b[1] +; nextln: ret + +function %i8x16_uextend_i32(i8x16) -> i32 { +block0(v0: i8x16): + v1 = extractlane v0, 1 + v2 = uextend.i32 v1 + return v2 +} + +; check: umov w0, v0.b[1] +; nextln: ret + +function %i8x16_uextend_i64(i8x16) -> i64 { +block0(v0: i8x16): + v1 = extractlane v0, 1 + v2 = uextend.i64 v1 + return v2 +} + +; check: umov w0, v0.b[1] +; nextln: ret + +function %i8x16_uextend_i128(i8x16) -> i128 { +block0(v0: i8x16): + v1 = extractlane v0, 1 + v2 = uextend.i128 v1 + return v2 +} + +; check: umov w0, v0.b[1] +; nextln: movz x1, #0 +; nextln: ret + +function %i8x16_sextend_i16(i8x16) -> i16 { +block0(v0: i8x16): + v1 = extractlane v0, 1 + v2 = sextend.i16 v1 + return v2 +} + +; check: smov w0, v0.b[1] +; nextln: ret + +function %i8x16_sextend_i32(i8x16) -> i32 { +block0(v0: i8x16): + v1 = extractlane v0, 1 + v2 = sextend.i32 v1 + return v2 +} + +; check: smov w0, v0.b[1] +; nextln: ret + +function %i8x16_sextend_i64(i8x16) -> i64 { +block0(v0: i8x16): + v1 = extractlane v0, 1 + v2 = sextend.i64 v1 + return v2 +} + +; check: smov x0, v0.b[1] +; nextln: ret + +function %i8x16_sextend_i128(i8x16) -> i128 { +block0(v0: i8x16): + v1 = extractlane v0, 1 + v2 = sextend.i128 v1 + return v2 +} + +; check: smov x0, v0.b[1] +; nextln: asr x1, x0, #63 +; nextln: ret + +function %i16x8_uextend_i32(i16x8) -> i32 { +block0(v0: i16x8): + v1 = extractlane v0, 1 + v2 = uextend.i32 v1 + return v2 +} + +; check: umov w0, v0.h[1] +; nextln: ret + +function %i16x8_uextend_i64(i16x8) -> i64 { +block0(v0: i16x8): + v1 = extractlane v0, 1 + v2 = uextend.i64 v1 + return v2 +} + +; check: umov w0, v0.h[1] +; nextln: ret + +function %i16x8_uextend_i128(i16x8) -> i128 { +block0(v0: i16x8): + v1 = extractlane v0, 1 + v2 = uextend.i128 v1 + return v2 +} + +; check: umov w0, v0.h[1] +; nextln: movz x1, #0 +; nextln: ret + +function %i16x8_sextend_i32(i16x8) -> i32 { +block0(v0: i16x8): + v1 = extractlane v0, 1 + v2 = sextend.i32 v1 + return v2 +} + +; check: smov w0, v0.h[1] +; nextln: ret + +function %i16x8_sextend_i64(i16x8) -> i64 { +block0(v0: i16x8): + v1 = extractlane v0, 1 + v2 = sextend.i64 v1 + return v2 +} + +; check: smov x0, v0.h[1] +; nextln: ret + +function %i16x8_sextend_i128(i16x8) -> i128 { +block0(v0: i16x8): + v1 = extractlane v0, 1 + v2 = sextend.i128 v1 + return v2 +} + +; check: smov x0, v0.h[1] +; nextln: asr x1, x0, #63 +; nextln: ret + +function %i32x4_uextend_i64(i32x4) -> i64 { +block0(v0: i32x4): + v1 = extractlane v0, 1 + v2 = uextend.i64 v1 + return v2 +} + +; check: mov w0, v0.s[1] +; nextln: ret + +function %i32x4_uextend_i128(i32x4) -> i128 { +block0(v0: i32x4): + v1 = extractlane v0, 1 + v2 = uextend.i128 v1 + return v2 +} + +; check: mov w0, v0.s[1] +; nextln: movz x1, #0 +; nextln: ret + +function %i32x4_sextend_i64(i32x4) -> i64 { +block0(v0: i32x4): + v1 = extractlane v0, 1 + v2 = sextend.i64 v1 + return v2 +} + +; check: smov x0, v0.s[1] +; nextln: ret + +function %i32x4_sextend_i128(i32x4) -> i128 { +block0(v0: i32x4): + v1 = extractlane v0, 1 + v2 = sextend.i128 v1 + return v2 +} + +; check: smov x0, v0.s[1] +; nextln: asr x1, x0, #63 +; nextln: ret + +function %i64x2_uextend_i128(i64x2) -> i128 { +block0(v0: i64x2): + v1 = extractlane v0, 1 + v2 = uextend.i128 v1 + return v2 +} + +; check: mov x0, v0.d[1] +; nextln: movz x1, #0 +; nextln: ret + +function %i64x2_sextend_i128(i64x2) -> i128 { +block0(v0: i64x2): + v1 = extractlane v0, 1 + v2 = sextend.i128 v1 + return v2 +} + +; check: mov x0, v0.d[1] +; nextln: asr x1, x0, #63 +; nextln: ret