AArch64: port load and store operations to ISLE. (#4785)

This retains `lower_amode` in the handwritten code (@akirilov-arm
reports that there is an upcoming patch to port this), but tweaks it
slightly to take a `Value` rather than an `Inst`.
This commit is contained in:
Chris Fallin
2022-08-29 17:45:55 -07:00
committed by GitHub
parent 5d05d7676f
commit 955d4e4ba1
15 changed files with 442 additions and 296 deletions

View File

@@ -1,6 +1,7 @@
//! Dynamic IR types //! Dynamic IR types
use crate::ir::entities::DynamicType; use crate::ir::entities::DynamicType;
use crate::ir::types::*;
use crate::ir::GlobalValue; use crate::ir::GlobalValue;
use crate::ir::PrimaryMap; use crate::ir::PrimaryMap;
use crate::ir::Type; use crate::ir::Type;
@@ -36,3 +37,19 @@ impl DynamicTypeData {
/// All allocated dynamic types. /// All allocated dynamic types.
pub type DynamicTypes = PrimaryMap<DynamicType, DynamicTypeData>; pub type DynamicTypes = PrimaryMap<DynamicType, DynamicTypeData>;
/// Convert a dynamic-vector type to a fixed-vector type.
pub fn dynamic_to_fixed(ty: Type) -> Type {
match ty {
I8X8XN => I8X8,
I8X16XN => I8X16,
I16X4XN => I16X4,
I16X8XN => I16X8,
I32X2XN => I32X2,
I32X4XN => I32X4,
I64X2XN => I64X2,
F32X4XN => F32X4,
F64X2XN => F64X2,
_ => unreachable!("unhandled type: {}", ty),
}
}

View File

@@ -35,7 +35,7 @@ pub use crate::ir::builder::{
}; };
pub use crate::ir::constant::{ConstantData, ConstantPool}; pub use crate::ir::constant::{ConstantData, ConstantPool};
pub use crate::ir::dfg::{DataFlowGraph, ValueDef}; pub use crate::ir::dfg::{DataFlowGraph, ValueDef};
pub use crate::ir::dynamic_type::{DynamicTypeData, DynamicTypes}; pub use crate::ir::dynamic_type::{dynamic_to_fixed, DynamicTypeData, DynamicTypes};
pub use crate::ir::entities::{ pub use crate::ir::entities::{
Block, Constant, DynamicStackSlot, DynamicType, FuncRef, GlobalValue, Heap, Immediate, Inst, Block, Constant, DynamicStackSlot, DynamicType, FuncRef, GlobalValue, Heap, Immediate, Inst,
JumpTable, SigRef, StackSlot, Table, UserExternalNameRef, Value, JumpTable, SigRef, StackSlot, Table, UserExternalNameRef, Value,

View File

@@ -5,7 +5,7 @@ use crate::ir::types;
use crate::ir::types::*; use crate::ir::types::*;
use crate::ir::MemFlags; use crate::ir::MemFlags;
use crate::ir::Opcode; use crate::ir::Opcode;
use crate::ir::{ExternalName, LibCall, Signature}; use crate::ir::{dynamic_to_fixed, ExternalName, LibCall, Signature};
use crate::isa; use crate::isa;
use crate::isa::aarch64::{inst::EmitState, inst::*, settings as aarch64_settings}; use crate::isa::aarch64::{inst::EmitState, inst::*, settings as aarch64_settings};
use crate::isa::unwind::UnwindInst; use crate::isa::unwind::UnwindInst;

View File

@@ -1502,10 +1502,13 @@
(extern constructor cond_br_cond cond_br_cond) (extern constructor cond_br_cond cond_br_cond)
;; Lower the address of a load or a store. ;; Lower the address of a load or a store.
(decl amode (Type Inst u32) AMode) (decl amode (Type Value u32) AMode)
;; TODO: Port lower_address() to ISLE. ;; TODO: Port lower_address() to ISLE.
(extern constructor amode amode) (extern constructor amode amode)
(decl pair_amode (Value u32) PairAMode)
(extern constructor pair_amode pair_amode)
;; Matches an `AMode` that is just a register. ;; Matches an `AMode` that is just a register.
(decl pure amode_is_reg (AMode) Reg) (decl pure amode_is_reg (AMode) Reg)
;; TODO: Implement in ISLE. ;; TODO: Implement in ISLE.
@@ -2337,6 +2340,92 @@
(rule (udf trap_code) (rule (udf trap_code)
(SideEffectNoResult.Inst (MInst.Udf trap_code))) (SideEffectNoResult.Inst (MInst.Udf trap_code)))
;; Helpers for generating various load instructions, with varying
;; widths and sign/zero-extending properties.
(decl aarch64_uload8 (AMode MemFlags) Reg)
(rule (aarch64_uload8 amode flags)
(let ((dst WritableReg (temp_writable_reg $I64))
(_ Unit (emit (MInst.ULoad8 dst amode flags))))
dst))
(decl aarch64_sload8 (AMode MemFlags) Reg)
(rule (aarch64_sload8 amode flags)
(let ((dst WritableReg (temp_writable_reg $I64))
(_ Unit (emit (MInst.SLoad8 dst amode flags))))
dst))
(decl aarch64_uload16 (AMode MemFlags) Reg)
(rule (aarch64_uload16 amode flags)
(let ((dst WritableReg (temp_writable_reg $I64))
(_ Unit (emit (MInst.ULoad16 dst amode flags))))
dst))
(decl aarch64_sload16 (AMode MemFlags) Reg)
(rule (aarch64_sload16 amode flags)
(let ((dst WritableReg (temp_writable_reg $I64))
(_ Unit (emit (MInst.SLoad16 dst amode flags))))
dst))
(decl aarch64_uload32 (AMode MemFlags) Reg)
(rule (aarch64_uload32 amode flags)
(let ((dst WritableReg (temp_writable_reg $I64))
(_ Unit (emit (MInst.ULoad32 dst amode flags))))
dst))
(decl aarch64_sload32 (AMode MemFlags) Reg)
(rule (aarch64_sload32 amode flags)
(let ((dst WritableReg (temp_writable_reg $I64))
(_ Unit (emit (MInst.SLoad32 dst amode flags))))
dst))
(decl aarch64_uload64 (AMode MemFlags) Reg)
(rule (aarch64_uload64 amode flags)
(let ((dst WritableReg (temp_writable_reg $I64))
(_ Unit (emit (MInst.ULoad64 dst amode flags))))
dst))
(decl aarch64_fpuload32 (AMode MemFlags) Reg)
(rule (aarch64_fpuload32 amode flags)
(let ((dst WritableReg (temp_writable_reg $F64))
(_ Unit (emit (MInst.FpuLoad32 dst amode flags))))
dst))
(decl aarch64_fpuload64 (AMode MemFlags) Reg)
(rule (aarch64_fpuload64 amode flags)
(let ((dst WritableReg (temp_writable_reg $F64))
(_ Unit (emit (MInst.FpuLoad64 dst amode flags))))
dst))
(decl aarch64_fpuload128 (AMode MemFlags) Reg)
(rule (aarch64_fpuload128 amode flags)
(let ((dst WritableReg (temp_writable_reg $F64X2))
(_ Unit (emit (MInst.FpuLoad128 dst amode flags))))
dst))
(decl aarch64_loadp64 (PairAMode MemFlags) ValueRegs)
(rule (aarch64_loadp64 amode flags)
(let ((dst1 WritableReg (temp_writable_reg $I64))
(dst2 WritableReg (temp_writable_reg $I64))
(_ Unit (emit (MInst.LoadP64 dst1 dst2 amode flags))))
(value_regs dst1 dst2)))
;; Helpers for generating various store instructions with varying
;; widths.
(decl aarch64_store8 (AMode MemFlags Reg) SideEffectNoResult)
(rule (aarch64_store8 amode flags val)
(SideEffectNoResult.Inst (MInst.Store8 val amode flags)))
(decl aarch64_store16 (AMode MemFlags Reg) SideEffectNoResult)
(rule (aarch64_store16 amode flags val)
(SideEffectNoResult.Inst (MInst.Store16 val amode flags)))
(decl aarch64_store32 (AMode MemFlags Reg) SideEffectNoResult)
(rule (aarch64_store32 amode flags val)
(SideEffectNoResult.Inst (MInst.Store32 val amode flags)))
(decl aarch64_store64 (AMode MemFlags Reg) SideEffectNoResult)
(rule (aarch64_store64 amode flags val)
(SideEffectNoResult.Inst (MInst.Store64 val amode flags)))
(decl aarch64_fpustore32 (AMode MemFlags Reg) SideEffectNoResult)
(rule (aarch64_fpustore32 amode flags val)
(SideEffectNoResult.Inst (MInst.FpuStore32 val amode flags)))
(decl aarch64_fpustore64 (AMode MemFlags Reg) SideEffectNoResult)
(rule (aarch64_fpustore64 amode flags val)
(SideEffectNoResult.Inst (MInst.FpuStore64 val amode flags)))
(decl aarch64_fpustore128 (AMode MemFlags Reg) SideEffectNoResult)
(rule (aarch64_fpustore128 amode flags val)
(SideEffectNoResult.Inst (MInst.FpuStore128 val amode flags)))
(decl aarch64_storep64 (PairAMode MemFlags Reg Reg) SideEffectNoResult)
(rule (aarch64_storep64 amode flags val1 val2)
(SideEffectNoResult.Inst (MInst.StoreP64 val1 val2 amode flags)))
;; Immediate value helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Immediate value helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Type of extension performed by an immediate helper ;; Type of extension performed by an immediate helper

View File

@@ -773,18 +773,3 @@ impl VectorSize {
} }
} }
} }
pub(crate) fn dynamic_to_fixed(ty: Type) -> Type {
match ty {
I8X8XN => I8X8,
I8X16XN => I8X16,
I16X4XN => I16X4,
I16X8XN => I16X8,
I32X2XN => I32X2,
I32X4XN => I32X4,
I64X2XN => I64X2,
F32X4XN => F32X4,
F64X2XN => F64X2,
_ => unreachable!("unhandled type: {}", ty),
}
}

View File

@@ -1777,10 +1777,10 @@
(rule (lower (has_type ty (splat (ireduce (iconst (u64_from_imm64 n)))))) (rule (lower (has_type ty (splat (ireduce (iconst (u64_from_imm64 n))))))
(splat_const n (vector_size ty))) (splat_const n (vector_size ty)))
(rule (lower (has_type ty (splat x @ (load flags _addr offset)))) (rule (lower (has_type ty (splat x @ (load flags addr offset))))
(if-let mem_op (is_sinkable_inst x)) (if-let mem_op (is_sinkable_inst x))
(let ((_ Unit (sink_inst mem_op)) (let ((_ Unit (sink_inst mem_op))
(addr AMode (amode (lane_type ty) mem_op offset)) (addr AMode (amode (lane_type ty) addr offset))
(address Reg (load_addr addr))) (address Reg (load_addr addr)))
(ld1r address (vector_size ty) flags))) (ld1r address (vector_size ty) flags)))
@@ -2031,6 +2031,174 @@
(rule (lower (return args)) (rule (lower (return args))
(lower_return (range 0 (value_slice_len args)) args)) (lower_return (range 0 (value_slice_len args)) args))
;;;; Rules for loads ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower
(has_type $I8 (load flags address offset)))
(aarch64_uload8 (amode $I8 address offset) flags))
(rule (lower
(has_type $I16 (load flags address offset)))
(aarch64_uload16 (amode $I16 address offset) flags))
(rule (lower
(has_type $I32 (load flags address offset)))
(aarch64_uload32 (amode $I32 address offset) flags))
(rule (lower
(has_type $I64 (load flags address offset)))
(aarch64_uload64 (amode $I64 address offset) flags))
(rule (lower
(has_type $R64 (load flags address offset)))
(aarch64_uload64 (amode $I64 address offset) flags))
(rule (lower
(has_type $F32 (load flags address offset)))
(aarch64_fpuload32 (amode $F32 address offset) flags))
(rule (lower
(has_type $F64 (load flags address offset)))
(aarch64_fpuload64 (amode $F64 address offset) flags))
(rule (lower
(has_type $I128 (load flags address offset)))
(aarch64_loadp64 (pair_amode address offset) flags))
(rule (lower
(has_type (ty_vec64 _)
(load flags address offset)))
(aarch64_fpuload128 (amode $F64 address offset) flags))
(rule (lower
(has_type (ty_vec128 _)
(load flags address offset)))
(aarch64_fpuload128 (amode $I8X16 address offset) flags))
(rule (lower
(has_type (ty_dyn_vec64 _)
(load flags address offset)))
(aarch64_fpuload64 (amode $F64 address offset) flags))
(rule (lower
(has_type (ty_dyn_vec128 _)
(load flags address offset)))
(aarch64_fpuload128 (amode $I8X16 address offset) flags))
(rule (lower
(uload8 flags address offset))
(aarch64_uload8 (amode $I8 address offset) flags))
(rule (lower
(sload8 flags address offset))
(aarch64_sload8 (amode $I8 address offset) flags))
(rule (lower
(uload16 flags address offset))
(aarch64_uload16 (amode $I16 address offset) flags))
(rule (lower
(sload16 flags address offset))
(aarch64_sload16 (amode $I16 address offset) flags))
(rule (lower
(uload32 flags address offset))
(aarch64_uload32 (amode $I32 address offset) flags))
(rule (lower
(sload32 flags address offset))
(aarch64_sload32 (amode $I32 address offset) flags))
(rule (lower
(sload8x8 flags address offset))
(vec_extend (VecExtendOp.Sxtl)
(aarch64_fpuload64 (amode $F64 address offset) flags)
$false
(ScalarSize.Size16)))
(rule (lower
(uload8x8 flags address offset))
(vec_extend (VecExtendOp.Uxtl)
(aarch64_fpuload64 (amode $F64 address offset) flags)
$false
(ScalarSize.Size16)))
(rule (lower
(sload16x4 flags address offset))
(vec_extend (VecExtendOp.Sxtl)
(aarch64_fpuload64 (amode $F64 address offset) flags)
$false
(ScalarSize.Size32)))
(rule (lower
(uload16x4 flags address offset))
(vec_extend (VecExtendOp.Uxtl)
(aarch64_fpuload64 (amode $F64 address offset) flags)
$false
(ScalarSize.Size32)))
(rule (lower
(sload32x2 flags address offset))
(vec_extend (VecExtendOp.Sxtl)
(aarch64_fpuload64 (amode $F64 address offset) flags)
$false
(ScalarSize.Size64)))
(rule (lower
(uload32x2 flags address offset))
(vec_extend (VecExtendOp.Uxtl)
(aarch64_fpuload64 (amode $F64 address offset) flags)
$false
(ScalarSize.Size64)))
;;;; Rules for stores ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower
(store flags value @ (value_type $I8) address offset))
(side_effect
(aarch64_store8 (amode $I8 address offset) flags value)))
(rule (lower
(store flags value @ (value_type $I16) address offset))
(side_effect
(aarch64_store16 (amode $I16 address offset) flags value)))
(rule (lower
(store flags value @ (value_type $I32) address offset))
(side_effect
(aarch64_store32 (amode $I32 address offset) flags value)))
(rule (lower
(store flags value @ (value_type $I64) address offset))
(side_effect
(aarch64_store64 (amode $I64 address offset) flags value)))
(rule (lower
(store flags value @ (value_type $R64) address offset))
(side_effect
(aarch64_store64 (amode $I64 address offset) flags value)))
(rule (lower
(istore8 flags value address offset))
(side_effect
(aarch64_store8 (amode $I8 address offset) flags value)))
(rule (lower
(istore16 flags value address offset))
(side_effect
(aarch64_store16 (amode $I16 address offset) flags value)))
(rule (lower
(istore32 flags value address offset))
(side_effect
(aarch64_store32 (amode $I32 address offset) flags value)))
(rule (lower
(store flags value @ (value_type $F32) address offset))
(side_effect
(aarch64_fpustore32 (amode $F32 address offset) flags value)))
(rule (lower
(store flags value @ (value_type $F64) address offset))
(side_effect
(aarch64_fpustore64 (amode $F64 address offset) flags value)))
(rule (lower
(store flags value @ (value_type $I128) address offset))
(side_effect
(aarch64_storep64 (pair_amode address offset) flags
(value_regs_get value 0)
(value_regs_get value 1))))
(rule (lower
(store flags value @ (value_type (ty_vec64 _)) address offset))
(side_effect
(aarch64_fpustore64 (amode $F64 address offset) flags value)))
(rule (lower
(store flags value @ (value_type (ty_vec128 _)) address offset))
(side_effect
(aarch64_fpustore128 (amode $I8X16 address offset) flags value)))
(rule (lower
(store flags value @ (value_type (ty_dyn_vec64 _)) address offset))
(side_effect
(aarch64_fpustore64 (amode $F64 address offset) flags value)))
(rule (lower
(store flags value @ (value_type (ty_dyn_vec128 _)) address offset))
(side_effect
(aarch64_fpustore128 (amode $I8X16 address offset) flags value)))
;;; Rules for `{get,set}_pinned_reg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Rules for `{get,set}_pinned_reg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (get_pinned_reg)) (rule (lower (get_pinned_reg))

View File

@@ -18,7 +18,7 @@ use crate::machinst::lower::*;
use crate::machinst::{Reg, Writable}; use crate::machinst::{Reg, Writable};
use crate::{machinst::*, trace}; use crate::{machinst::*, trace};
use crate::{CodegenError, CodegenResult}; use crate::{CodegenError, CodegenResult};
use smallvec::SmallVec; use smallvec::{smallvec, SmallVec};
use std::cmp; use std::cmp;
pub mod isle; pub mod isle;
@@ -507,19 +507,19 @@ type AddressAddend64List = SmallVec<[Reg; 4]>;
/// then possibly support extensions at these leaves. /// then possibly support extensions at these leaves.
fn collect_address_addends( fn collect_address_addends(
ctx: &mut Lower<Inst>, ctx: &mut Lower<Inst>,
roots: &[InsnInput], root: Value,
) -> (AddressAddend64List, AddressAddend32List, i64) { ) -> (AddressAddend64List, AddressAddend32List, i64) {
let mut result32: AddressAddend32List = SmallVec::new(); let mut result32: AddressAddend32List = SmallVec::new();
let mut result64: AddressAddend64List = SmallVec::new(); let mut result64: AddressAddend64List = SmallVec::new();
let mut offset: i64 = 0; let mut offset: i64 = 0;
let mut workqueue: SmallVec<[InsnInput; 4]> = roots.iter().cloned().collect(); let mut workqueue: SmallVec<[Value; 4]> = smallvec![root];
while let Some(input) = workqueue.pop() { while let Some(value) = workqueue.pop() {
debug_assert!(ty_bits(ctx.input_ty(input.insn, input.input)) == 64); debug_assert_eq!(ty_bits(ctx.value_ty(value)), 64);
if let Some((op, insn)) = maybe_input_insn_multi( if let Some((op, insn)) = maybe_value_multi(
ctx, ctx,
input, value,
&[ &[
Opcode::Uextend, Opcode::Uextend,
Opcode::Sextend, Opcode::Sextend,
@@ -551,12 +551,12 @@ fn collect_address_addends(
} }
} }
Opcode::Uextend | Opcode::Sextend => { Opcode::Uextend | Opcode::Sextend => {
let reg = put_input_in_reg(ctx, input, NarrowValueMode::None); let reg = put_value_in_reg(ctx, value, NarrowValueMode::None);
result64.push(reg); result64.push(reg);
} }
Opcode::Iadd => { Opcode::Iadd => {
for input in 0..ctx.num_inputs(insn) { for input in 0..ctx.num_inputs(insn) {
let addend = InsnInput { insn, input }; let addend = ctx.input_as_value(insn, input);
workqueue.push(addend); workqueue.push(addend);
} }
} }
@@ -567,7 +567,7 @@ fn collect_address_addends(
_ => panic!("Unexpected opcode from maybe_input_insn_multi"), _ => panic!("Unexpected opcode from maybe_input_insn_multi"),
} }
} else { } else {
let reg = put_input_in_reg(ctx, input, NarrowValueMode::ZeroExtend64); let reg = put_value_in_reg(ctx, value, NarrowValueMode::ZeroExtend64);
result64.push(reg); result64.push(reg);
} }
} }
@@ -576,15 +576,11 @@ fn collect_address_addends(
} }
/// Lower the address of a pair load or store. /// Lower the address of a pair load or store.
pub(crate) fn lower_pair_address( pub(crate) fn lower_pair_address(ctx: &mut Lower<Inst>, addr: Value, offset: i32) -> PairAMode {
ctx: &mut Lower<Inst>,
roots: &[InsnInput],
offset: i32,
) -> PairAMode {
// Collect addends through an arbitrary tree of 32-to-64-bit sign/zero // Collect addends through an arbitrary tree of 32-to-64-bit sign/zero
// extends and addition ops. We update these as we consume address // extends and addition ops. We update these as we consume address
// components, so they represent the remaining addends not yet handled. // components, so they represent the remaining addends not yet handled.
let (mut addends64, mut addends32, args_offset) = collect_address_addends(ctx, roots); let (mut addends64, mut addends32, args_offset) = collect_address_addends(ctx, addr);
let offset = args_offset + (offset as i64); let offset = args_offset + (offset as i64);
trace!( trace!(
@@ -636,7 +632,7 @@ pub(crate) fn lower_pair_address(
pub(crate) fn lower_address( pub(crate) fn lower_address(
ctx: &mut Lower<Inst>, ctx: &mut Lower<Inst>,
elem_ty: Type, elem_ty: Type,
roots: &[InsnInput], addr: Value,
offset: i32, offset: i32,
) -> AMode { ) -> AMode {
// TODO: support base_reg + scale * index_reg. For this, we would need to // TODO: support base_reg + scale * index_reg. For this, we would need to
@@ -645,7 +641,7 @@ pub(crate) fn lower_address(
// Collect addends through an arbitrary tree of 32-to-64-bit sign/zero // Collect addends through an arbitrary tree of 32-to-64-bit sign/zero
// extends and addition ops. We update these as we consume address // extends and addition ops. We update these as we consume address
// components, so they represent the remaining addends not yet handled. // components, so they represent the remaining addends not yet handled.
let (mut addends64, mut addends32, args_offset) = collect_address_addends(ctx, roots); let (mut addends64, mut addends32, args_offset) = collect_address_addends(ctx, addr);
let mut offset = args_offset + (offset as i64); let mut offset = args_offset + (offset as i64);
trace!( trace!(
@@ -1088,14 +1084,26 @@ pub(crate) fn maybe_input_insn(
None None
} }
/// Checks for an instance of any one of `ops` feeding the given input. /// Checks for an instance of `op` defining the given value.
pub(crate) fn maybe_input_insn_multi( pub(crate) fn maybe_value(c: &mut Lower<Inst>, value: Value, op: Opcode) -> Option<IRInst> {
let inputs = c.get_value_as_source_or_const(value);
if let Some((src_inst, _)) = inputs.inst.as_inst() {
let data = c.data(src_inst);
if data.opcode() == op {
return Some(src_inst);
}
}
None
}
/// Checks for an instance of any one of `ops` defining the given value.
pub(crate) fn maybe_value_multi(
c: &mut Lower<Inst>, c: &mut Lower<Inst>,
input: InsnInput, value: Value,
ops: &[Opcode], ops: &[Opcode],
) -> Option<(Opcode, IRInst)> { ) -> Option<(Opcode, IRInst)> {
for &op in ops { for &op in ops {
if let Some(inst) = maybe_input_insn(c, input, op) { if let Some(inst) = maybe_value(c, value, op) {
return Some((op, inst)); return Some((op, inst));
} }
} }
@@ -1452,41 +1460,6 @@ pub(crate) fn materialize_bool_result(
} }
} }
fn load_op_to_ty(op: Opcode) -> Option<Type> {
match op {
Opcode::Sload8 | Opcode::Uload8 => Some(I8),
Opcode::Sload16 | Opcode::Uload16 => Some(I16),
Opcode::Sload32 | Opcode::Uload32 => Some(I32),
Opcode::Load => None,
Opcode::Sload8x8 | Opcode::Uload8x8 => Some(I8X8),
Opcode::Sload16x4 | Opcode::Uload16x4 => Some(I16X4),
Opcode::Sload32x2 | Opcode::Uload32x2 => Some(I32X2),
_ => None,
}
}
/// Helper to lower a load instruction; this is used in several places, because
/// a load can sometimes be merged into another operation.
pub(crate) fn lower_load<
F: FnMut(&mut Lower<Inst>, ValueRegs<Writable<Reg>>, Type, AMode) -> CodegenResult<()>,
>(
ctx: &mut Lower<Inst>,
ir_inst: IRInst,
inputs: &[InsnInput],
output: InsnOutput,
mut f: F,
) -> CodegenResult<()> {
let op = ctx.data(ir_inst).opcode();
let elem_ty = load_op_to_ty(op).unwrap_or_else(|| ctx.output_ty(ir_inst, 0));
let off = ctx.data(ir_inst).load_store_offset().unwrap();
let mem = lower_address(ctx, elem_ty, &inputs[..], off);
let rd = get_output_reg(ctx, output);
f(ctx, rd, elem_ty, mem)
}
//============================================================================= //=============================================================================
// Lowering-backend trait implementation. // Lowering-backend trait implementation.

View File

@@ -6,14 +6,14 @@ use generated_code::Context;
// Types that the generated ISLE code uses via `use super::*`. // Types that the generated ISLE code uses via `use super::*`.
use super::{ use super::{
insn_inputs, lower_constant_f128, lower_constant_f32, lower_constant_f64, writable_zero_reg, lower_constant_f128, lower_constant_f32, lower_constant_f64, writable_zero_reg, zero_reg,
zero_reg, AMode, ASIMDFPModImm, ASIMDMovModImm, BranchTarget, CallIndInfo, CallInfo, Cond, AMode, ASIMDFPModImm, ASIMDMovModImm, BranchTarget, CallIndInfo, CallInfo, Cond, CondBrKind,
CondBrKind, ExtendOp, FPUOpRI, FloatCC, Imm12, ImmLogic, ImmShift, Inst as MInst, IntCC, ExtendOp, FPUOpRI, FloatCC, Imm12, ImmLogic, ImmShift, Inst as MInst, IntCC, JTSequenceInfo,
JTSequenceInfo, MachLabel, MoveWideConst, MoveWideOp, NarrowValueMode, Opcode, OperandSize, MachLabel, MoveWideConst, MoveWideOp, NarrowValueMode, Opcode, OperandSize, PairAMode, Reg,
PairAMode, Reg, ScalarSize, ShiftOpAndAmt, UImm5, VecMisc2, VectorSize, NZCV, ScalarSize, ShiftOpAndAmt, UImm5, VecMisc2, VectorSize, NZCV,
}; };
use crate::isa::aarch64::inst::{FPULeftShiftImm, FPURightShiftImm}; use crate::isa::aarch64::inst::{FPULeftShiftImm, FPURightShiftImm};
use crate::isa::aarch64::lower::{lower_address, lower_splat_const}; use crate::isa::aarch64::lower::{lower_address, lower_pair_address, lower_splat_const};
use crate::isa::aarch64::settings::Flags as IsaFlags; use crate::isa::aarch64::settings::Flags as IsaFlags;
use crate::machinst::valueregs; use crate::machinst::valueregs;
use crate::machinst::{isle::*, InputSourceInst}; use crate::machinst::{isle::*, InputSourceInst};
@@ -484,13 +484,12 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
} }
} }
fn amode(&mut self, ty: Type, mem_op: Inst, offset: u32) -> AMode { fn amode(&mut self, ty: Type, addr: Value, offset: u32) -> AMode {
lower_address( lower_address(self.lower_ctx, ty, addr, offset as i32)
self.lower_ctx, }
ty,
&insn_inputs(self.lower_ctx, mem_op)[..], fn pair_amode(&mut self, addr: Value, offset: u32) -> PairAMode {
offset as i32, lower_pair_address(self.lower_ctx, addr, offset as i32)
)
} }
fn amode_is_reg(&mut self, address: &AMode) -> Option<Reg> { fn amode_is_reg(&mut self, address: &AMode) -> Option<Reg> {

View File

@@ -101,133 +101,10 @@ pub(crate) fn lower_insn_to_regs(
| Opcode::Sload16x4 | Opcode::Sload16x4
| Opcode::Uload16x4 | Opcode::Uload16x4
| Opcode::Sload32x2 | Opcode::Sload32x2
| Opcode::Uload32x2 => { | Opcode::Uload32x2 => implemented_in_isle(ctx),
let sign_extend = match op {
Opcode::Sload8 | Opcode::Sload16 | Opcode::Sload32 => true,
_ => false,
};
let flags = ctx
.memflags(insn)
.expect("Load instruction should have memflags");
let out_ty = ctx.output_ty(insn, 0);
if out_ty == I128 {
let off = ctx.data(insn).load_store_offset().unwrap();
let mem = lower_pair_address(ctx, &inputs[..], off);
let dst = get_output_reg(ctx, outputs[0]);
ctx.emit(Inst::LoadP64 {
rt: dst.regs()[0],
rt2: dst.regs()[1],
mem,
flags,
});
} else {
lower_load(
ctx,
insn,
&inputs[..],
outputs[0],
|ctx, dst, mut elem_ty, mem| {
if elem_ty.is_dynamic_vector() {
elem_ty = dynamic_to_fixed(elem_ty);
}
let rd = dst.only_reg().unwrap();
let is_float = ty_has_float_or_vec_representation(elem_ty);
ctx.emit(match (ty_bits(elem_ty), sign_extend, is_float) {
(1, _, _) => Inst::ULoad8 { rd, mem, flags },
(8, false, _) => Inst::ULoad8 { rd, mem, flags },
(8, true, _) => Inst::SLoad8 { rd, mem, flags },
(16, false, _) => Inst::ULoad16 { rd, mem, flags },
(16, true, _) => Inst::SLoad16 { rd, mem, flags },
(32, false, false) => Inst::ULoad32 { rd, mem, flags },
(32, true, false) => Inst::SLoad32 { rd, mem, flags },
(32, _, true) => Inst::FpuLoad32 { rd, mem, flags },
(64, _, false) => Inst::ULoad64 { rd, mem, flags },
// Note that we treat some of the vector loads as scalar floating-point loads,
// which is correct in a little endian environment.
(64, _, true) => Inst::FpuLoad64 { rd, mem, flags },
(128, _, true) => Inst::FpuLoad128 { rd, mem, flags },
_ => {
return Err(CodegenError::Unsupported(format!(
"Unsupported type in load: {:?}",
elem_ty
)))
}
});
let vec_extend = match op {
Opcode::Sload8x8 => Some((VecExtendOp::Sxtl, ScalarSize::Size16)),
Opcode::Uload8x8 => Some((VecExtendOp::Uxtl, ScalarSize::Size16)),
Opcode::Sload16x4 => Some((VecExtendOp::Sxtl, ScalarSize::Size32)),
Opcode::Uload16x4 => Some((VecExtendOp::Uxtl, ScalarSize::Size32)),
Opcode::Sload32x2 => Some((VecExtendOp::Sxtl, ScalarSize::Size64)),
Opcode::Uload32x2 => Some((VecExtendOp::Uxtl, ScalarSize::Size64)),
_ => None,
};
if let Some((t, lane_size)) = vec_extend {
let rd = dst.only_reg().unwrap();
ctx.emit(Inst::VecExtend {
t,
rd,
rn: rd.to_reg(),
high_half: false,
lane_size,
});
}
Ok(())
},
)?;
}
}
Opcode::Store | Opcode::Istore8 | Opcode::Istore16 | Opcode::Istore32 => { Opcode::Store | Opcode::Istore8 | Opcode::Istore16 | Opcode::Istore32 => {
let off = ctx.data(insn).load_store_offset().unwrap(); implemented_in_isle(ctx)
let mut elem_ty = match op {
Opcode::Istore8 => I8,
Opcode::Istore16 => I16,
Opcode::Istore32 => I32,
Opcode::Store => ctx.input_ty(insn, 0),
_ => unreachable!(),
};
let is_float = ty_has_float_or_vec_representation(elem_ty);
let flags = ctx
.memflags(insn)
.expect("Store instruction should have memflags");
let dst = put_input_in_regs(ctx, inputs[0]);
if elem_ty == I128 {
let mem = lower_pair_address(ctx, &inputs[1..], off);
ctx.emit(Inst::StoreP64 {
rt: dst.regs()[0],
rt2: dst.regs()[1],
mem,
flags,
});
} else {
if elem_ty.is_dynamic_vector() {
elem_ty = dynamic_to_fixed(elem_ty);
}
let rd = dst.only_reg().unwrap();
let mem = lower_address(ctx, elem_ty, &inputs[1..], off);
ctx.emit(match (ty_bits(elem_ty), is_float) {
(1, _) | (8, _) => Inst::Store8 { rd, mem, flags },
(16, _) => Inst::Store16 { rd, mem, flags },
(32, false) => Inst::Store32 { rd, mem, flags },
(32, true) => Inst::FpuStore32 { rd, mem, flags },
(64, false) => Inst::Store64 { rd, mem, flags },
(64, true) => Inst::FpuStore64 { rd, mem, flags },
(128, _) => Inst::FpuStore128 { rd, mem, flags },
_ => {
return Err(CodegenError::Unsupported(format!(
"Unsupported type in store: {:?}",
elem_ty
)))
}
});
}
} }
Opcode::StackAddr => implemented_in_isle(ctx), Opcode::StackAddr => implemented_in_isle(ctx),

View File

@@ -9,8 +9,8 @@ use target_lexicon::Triple;
pub use super::MachLabel; pub use super::MachLabel;
pub use crate::data_value::DataValue; pub use crate::data_value::DataValue;
pub use crate::ir::{ pub use crate::ir::{
ArgumentExtension, Constant, DynamicStackSlot, ExternalName, FuncRef, GlobalValue, Immediate, dynamic_to_fixed, ArgumentExtension, Constant, DynamicStackSlot, ExternalName, FuncRef,
SigRef, StackSlot, GlobalValue, Immediate, SigRef, StackSlot,
}; };
pub use crate::isa::unwind::UnwindInst; pub use crate::isa::unwind::UnwindInst;
pub use crate::machinst::{ pub use crate::machinst::{
@@ -397,6 +397,15 @@ macro_rules! isle_prelude_methods {
} }
} }
#[inline]
fn ty_vec64_ctor(&mut self, ty: Type) -> Option<Type> {
if ty.is_vector() && ty.bits() == 64 {
Some(ty)
} else {
None
}
}
#[inline] #[inline]
fn ty_vec64(&mut self, ty: Type) -> Option<Type> { fn ty_vec64(&mut self, ty: Type) -> Option<Type> {
if ty.is_vector() && ty.bits() == 64 { if ty.is_vector() && ty.bits() == 64 {
@@ -415,6 +424,24 @@ macro_rules! isle_prelude_methods {
} }
} }
#[inline]
fn ty_dyn_vec64(&mut self, ty: Type) -> Option<Type> {
if ty.is_dynamic_vector() && dynamic_to_fixed(ty).bits() == 64 {
Some(ty)
} else {
None
}
}
#[inline]
fn ty_dyn_vec128(&mut self, ty: Type) -> Option<Type> {
if ty.is_dynamic_vector() && dynamic_to_fixed(ty).bits() == 128 {
Some(ty)
} else {
None
}
}
#[inline] #[inline]
fn ty_vec64_int(&mut self, ty: Type) -> Option<Type> { fn ty_vec64_int(&mut self, ty: Type) -> Option<Type> {
if ty.is_vector() && ty.bits() == 64 && ty.lane_type().is_int() { if ty.is_vector() && ty.bits() == 64 && ty.lane_type().is_int() {

View File

@@ -381,14 +381,25 @@
(decl ty_float_or_vec (Type) Type) (decl ty_float_or_vec (Type) Type)
(extern extractor ty_float_or_vec ty_float_or_vec) (extern extractor ty_float_or_vec ty_float_or_vec)
;; A pure constructor that only matches 64-bit vector types. ;; A pure constructor/extractor that only matches 64-bit vector types.
(decl pure ty_vec64 (Type) Type) (decl pure ty_vec64 (Type) Type)
(extern constructor ty_vec64 ty_vec64) (extern constructor ty_vec64 ty_vec64_ctor)
(extern extractor ty_vec64 ty_vec64)
;; An extractor that only matches 128-bit vector types. ;; An extractor that only matches 128-bit vector types.
(decl ty_vec128 (Type) Type) (decl ty_vec128 (Type) Type)
(extern extractor ty_vec128 ty_vec128) (extern extractor ty_vec128 ty_vec128)
;; An extractor that only matches dynamic vector types with a 64-bit
;; base type.
(decl ty_dyn_vec64 (Type) Type)
(extern extractor ty_dyn_vec64 ty_dyn_vec64)
;; An extractor that only matches dynamic vector types with a 128-bit
;; base type.
(decl ty_dyn_vec128 (Type) Type)
(extern extractor ty_dyn_vec128 ty_dyn_vec128)
;; An extractor that only matches 64-bit vector types with integer ;; An extractor that only matches 64-bit vector types with integer
;; lanes (I8X8, I16X4, I32X2) ;; lanes (I8X8, I16X4, I32X2)
(decl ty_vec64_int (Type) Type) (decl ty_vec64_int (Type) Type)

View File

@@ -36,8 +36,8 @@ block0(v0: i32, v1: i32):
} }
; block0: ; block0:
; mov w6, w0 ; mov w5, w0
; ldr w0, [x6, w1, UXTW] ; ldr w0, [x5, w1, UXTW]
; ret ; ret
function %f8(i64, i32) -> i32 { function %f8(i64, i32) -> i32 {
@@ -52,10 +52,10 @@ block0(v0: i64, v1: i32):
} }
; block0: ; block0:
; add x6, x0, #68 ; add x5, x0, #68
; add x6, x6, x0 ; add x5, x5, x0
; add x6, x6, x1, SXTW ; add x5, x5, x1, SXTW
; ldr w0, [x6, w1, SXTW] ; ldr w0, [x5, w1, SXTW]
; ret ; ret
function %f9(i64, i64, i64) -> i32 { function %f9(i64, i64, i64) -> i32 {
@@ -85,10 +85,10 @@ block0(v0: i64, v1: i64, v2: i64):
} }
; block0: ; block0:
; movz x8, #4100 ; movz x7, #4100
; add x8, x8, x1 ; add x7, x7, x1
; add x8, x8, x2 ; add x7, x7, x2
; ldr w0, [x8, x0] ; ldr w0, [x7, x0]
; ret ; ret
function %f10() -> i32 { function %f10() -> i32 {
@@ -99,8 +99,8 @@ block0:
} }
; block0: ; block0:
; movz x2, #1234 ; movz x1, #1234
; ldr w0, [x2] ; ldr w0, [x1]
; ret ; ret
function %f11(i64) -> i32 { function %f11(i64) -> i32 {
@@ -112,8 +112,8 @@ block0(v0: i64):
} }
; block0: ; block0:
; add x4, x0, #8388608 ; add x3, x0, #8388608
; ldr w0, [x4] ; ldr w0, [x3]
; ret ; ret
function %f12(i64) -> i32 { function %f12(i64) -> i32 {
@@ -125,8 +125,8 @@ block0(v0: i64):
} }
; block0: ; block0:
; sub x4, x0, #4 ; sub x3, x0, #4
; ldr w0, [x4] ; ldr w0, [x3]
; ret ; ret
function %f13(i64) -> i32 { function %f13(i64) -> i32 {
@@ -138,10 +138,10 @@ block0(v0: i64):
} }
; block0: ; block0:
; movz w4, #51712 ; movz w3, #51712
; movk w4, #15258, LSL #16 ; movk w3, #15258, LSL #16
; add x4, x4, x0 ; add x3, x3, x0
; ldr w0, [x4] ; ldr w0, [x3]
; ret ; ret
function %f14(i32) -> i32 { function %f14(i32) -> i32 {
@@ -152,8 +152,8 @@ block0(v0: i32):
} }
; block0: ; block0:
; sxtw x4, w0 ; sxtw x3, w0
; ldr w0, [x4] ; ldr w0, [x3]
; ret ; ret
function %f15(i32, i32) -> i32 { function %f15(i32, i32) -> i32 {
@@ -166,8 +166,8 @@ block0(v0: i32, v1: i32):
} }
; block0: ; block0:
; sxtw x6, w0 ; sxtw x5, w0
; ldr w0, [x6, w1, SXTW] ; ldr w0, [x5, w1, SXTW]
; ret ; ret
function %f18(i64, i64, i64) -> i32 { function %f18(i64, i64, i64) -> i32 {
@@ -179,8 +179,8 @@ block0(v0: i64, v1: i64, v2: i64):
} }
; block0: ; block0:
; movn w8, #4097 ; movn w7, #4097
; ldrsh x0, [x8] ; ldrsh x0, [x7]
; ret ; ret
function %f19(i64, i64, i64) -> i32 { function %f19(i64, i64, i64) -> i32 {
@@ -192,8 +192,8 @@ block0(v0: i64, v1: i64, v2: i64):
} }
; block0: ; block0:
; movz x8, #4098 ; movz x7, #4098
; ldrsh x0, [x8] ; ldrsh x0, [x7]
; ret ; ret
function %f20(i64, i64, i64) -> i32 { function %f20(i64, i64, i64) -> i32 {
@@ -205,9 +205,9 @@ block0(v0: i64, v1: i64, v2: i64):
} }
; block0: ; block0:
; movn w8, #4097 ; movn w7, #4097
; sxtw x10, w8 ; sxtw x9, w7
; ldrsh x0, [x10] ; ldrsh x0, [x9]
; ret ; ret
function %f21(i64, i64, i64) -> i32 { function %f21(i64, i64, i64) -> i32 {
@@ -219,9 +219,9 @@ block0(v0: i64, v1: i64, v2: i64):
} }
; block0: ; block0:
; movz x8, #4098 ; movz x7, #4098
; sxtw x10, w8 ; sxtw x9, w7
; ldrsh x0, [x10] ; ldrsh x0, [x9]
; ret ; ret
function %i128(i64) -> i128 { function %i128(i64) -> i128 {
@@ -232,11 +232,11 @@ block0(v0: i64):
} }
; block0: ; block0:
; mov x8, x0 ; mov x6, x0
; ldp x3, x1, [x8] ; ldp x7, x1, [x6]
; mov x11, x3 ; mov x11, x7
; stp x11, x1, [x0] ; stp x11, x1, [x0]
; mov x0, x3 ; mov x0, x7
; ret ; ret
function %i128_imm_offset(i64) -> i128 { function %i128_imm_offset(i64) -> i128 {
@@ -247,11 +247,11 @@ block0(v0: i64):
} }
; block0: ; block0:
; mov x8, x0 ; mov x6, x0
; ldp x3, x1, [x8, #16] ; ldp x7, x1, [x6, #16]
; mov x11, x3 ; mov x11, x7
; stp x11, x1, [x0, #16] ; stp x11, x1, [x0, #16]
; mov x0, x3 ; mov x0, x7
; ret ; ret
function %i128_imm_offset_large(i64) -> i128 { function %i128_imm_offset_large(i64) -> i128 {
@@ -262,11 +262,11 @@ block0(v0: i64):
} }
; block0: ; block0:
; mov x8, x0 ; mov x6, x0
; ldp x3, x1, [x8, #504] ; ldp x7, x1, [x6, #504]
; mov x11, x3 ; mov x11, x7
; stp x11, x1, [x0, #504] ; stp x11, x1, [x0, #504]
; mov x0, x3 ; mov x0, x7
; ret ; ret
function %i128_imm_offset_negative_large(i64) -> i128 { function %i128_imm_offset_negative_large(i64) -> i128 {
@@ -277,11 +277,11 @@ block0(v0: i64):
} }
; block0: ; block0:
; mov x8, x0 ; mov x6, x0
; ldp x3, x1, [x8, #-512] ; ldp x7, x1, [x6, #-512]
; mov x11, x3 ; mov x11, x7
; stp x11, x1, [x0, #-512] ; stp x11, x1, [x0, #-512]
; mov x0, x3 ; mov x0, x7
; ret ; ret
function %i128_add_offset(i64) -> i128 { function %i128_add_offset(i64) -> i128 {
@@ -293,11 +293,11 @@ block0(v0: i64):
} }
; block0: ; block0:
; mov x8, x0 ; mov x6, x0
; ldp x3, x1, [x8, #32] ; ldp x7, x1, [x6, #32]
; mov x11, x3 ; mov x11, x7
; stp x11, x1, [x0, #32] ; stp x11, x1, [x0, #32]
; mov x0, x3 ; mov x0, x7
; ret ; ret
function %i128_32bit_sextend_simple(i32) -> i128 { function %i128_32bit_sextend_simple(i32) -> i128 {
@@ -309,11 +309,11 @@ block0(v0: i32):
} }
; block0: ; block0:
; sxtw x8, w0 ; sxtw x6, w0
; ldp x4, x1, [x8] ; ldp x10, x1, [x6]
; sxtw x9, w0 ; sxtw x7, w0
; mov x0, x4 ; mov x0, x10
; stp x0, x1, [x9] ; stp x0, x1, [x7]
; ret ; ret
function %i128_32bit_sextend(i64, i32) -> i128 { function %i128_32bit_sextend(i64, i32) -> i128 {
@@ -327,13 +327,13 @@ block0(v0: i64, v1: i32):
} }
; block0: ; block0:
; mov x10, x0 ; mov x8, x0
; add x10, x10, x1, SXTW ; add x8, x8, x1, SXTW
; ldp x6, x7, [x10, #24] ; ldp x10, x11, [x8, #24]
; add x0, x0, x1, SXTW ; add x0, x0, x1, SXTW
; mov x15, x6 ; mov x15, x10
; mov x1, x7 ; mov x1, x11
; stp x15, x1, [x0, #24] ; stp x15, x1, [x0, #24]
; mov x0, x6 ; mov x0, x10
; ret ; ret

View File

@@ -15,15 +15,15 @@ block0(v0: i64, v1: i32):
; block0: ; block0:
; mov w10, w1 ; mov w10, w1
; ldr x5, [x0] ; ldr x11, [x0]
; mov x11, x5 ; mov x11, x11
; subs xzr, x10, x11 ; subs xzr, x10, x11
; b.ls label1 ; b label2 ; b.ls label1 ; b label2
; block1: ; block1:
; add x13, x0, x1, UXTW ; add x12, x0, x1, UXTW
; subs xzr, x10, x11 ; subs xzr, x10, x11
; movz x14, #0 ; movz x13, #0
; csel x0, x14, x13, hi ; csel x0, x13, x12, hi
; csdb ; csdb
; ret ; ret
; block2: ; block2:

View File

@@ -98,8 +98,8 @@ block0(v0: i64):
} }
; block0: ; block0:
; ldr w2, [x0] ; ldr w4, [x0]
; fmov s0, w2 ; fmov s0, w4
; ret ; ret
function %load32_zero_int(i32) -> i32x4 { function %load32_zero_int(i32) -> i32x4 {

View File

@@ -86,9 +86,9 @@ block0(v0: i64, v1: i64):
} }
; block0: ; block0:
; ldrb w4, [x0] ; ldrb w8, [x0]
; ld1r { v0.16b }, [x1] ; ld1r { v0.16b }, [x1]
; dup v1.16b, w4 ; dup v1.16b, w8
; ret ; ret
function %f8(i64, i64) -> i8x16, i8x16 { function %f8(i64, i64) -> i8x16, i8x16 {
@@ -100,9 +100,9 @@ block0(v0: i64, v1: i64):
} }
; block0: ; block0:
; ldrb w4, [x0] ; ldrb w8, [x0]
; dup v0.16b, w4 ; dup v0.16b, w8
; dup v1.16b, w4 ; dup v1.16b, w8
; ret ; ret
function %f9() -> i32x2 { function %f9() -> i32x2 {