Cranelift AArch64: Migrate Splat to ISLE (#4521)

Copyright (c) 2022, Arm Limited.
This commit is contained in:
Anton Kirilov
2022-07-26 18:57:15 +01:00
committed by GitHub
parent 1321c234e5
commit ead6edb0c5
21 changed files with 593 additions and 338 deletions

View File

@@ -171,8 +171,8 @@ impl Type {
self.replace_lanes(match self.lane_type() { self.replace_lanes(match self.lane_type() {
I8 | B1 | B8 => I8, I8 | B1 | B8 => I8,
I16 | B16 => I16, I16 | B16 => I16,
I32 | B32 => I32, I32 | B32 | F32 => I32,
I64 | B64 => I64, I64 | B64 | F64 => I64,
I128 | B128 => I128, I128 | B128 => I128,
_ => unimplemented!(), _ => unimplemented!(),
}) })

View File

@@ -627,7 +627,8 @@
(VecLoadReplicate (VecLoadReplicate
(rd WritableReg) (rd WritableReg)
(rn Reg) (rn Reg)
(size VectorSize)) (size VectorSize)
(flags MemFlags))
;; Vector conditional select, 128 bit. A synthetic instruction, which generates a 4-insn ;; Vector conditional select, 128 bit. A synthetic instruction, which generates a 4-insn
;; control-flow diamond. ;; control-flow diamond.
@@ -1376,6 +1377,16 @@
(decl cond_br_cond (Cond) CondBrKind) (decl cond_br_cond (Cond) CondBrKind)
(extern constructor cond_br_cond cond_br_cond) (extern constructor cond_br_cond cond_br_cond)
;; Lower the address of a load or a store.
(decl amode (Type Inst u32) AMode)
;; TODO: Port lower_address() to ISLE.
(extern constructor amode amode)
;; Matches an `AMode` that is just a register.
(decl pure amode_is_reg (AMode) Reg)
;; TODO: Implement in ISLE.
(extern constructor amode_is_reg amode_is_reg)
;; Instruction creation helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Instruction creation helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Helper for creating the zero register. ;; Helper for creating the zero register.
@@ -1481,6 +1492,13 @@
(_ Unit (emit (MInst.VecDup dst src size)))) (_ Unit (emit (MInst.VecDup dst src size))))
dst)) dst))
;; Helper for emitting `MInst.VecDupFromFpu` instructions.
(decl vec_dup_from_fpu (Reg VectorSize) Reg)
(rule (vec_dup_from_fpu src size)
(let ((dst WritableReg (temp_writable_reg $I8X16))
(_ Unit (emit (MInst.VecDupFromFpu dst src size))))
dst))
;; Helper for emitting `MInst.AluRRImm12` instructions. ;; Helper for emitting `MInst.AluRRImm12` instructions.
(decl alu_rr_imm12 (ALUOp Type Reg Imm12) Reg) (decl alu_rr_imm12 (ALUOp Type Reg Imm12) Reg)
(rule (alu_rr_imm12 op ty src imm) (rule (alu_rr_imm12 op ty src imm)
@@ -2167,7 +2185,7 @@
(decl sinkable_atomic_load (SinkableAtomicLoad) Value) (decl sinkable_atomic_load (SinkableAtomicLoad) Value)
(extern extractor sinkable_atomic_load sinkable_atomic_load) (extern extractor sinkable_atomic_load sinkable_atomic_load)
;; Sink a `SinkableLoad` into a `Reg`. ;; Sink a `SinkableAtomicLoad` into a `Reg`.
;; ;;
;; This is a side-effectful operation that notifies the context that the ;; This is a side-effectful operation that notifies the context that the
;; instruction that produced the `SinkableAtomicLoad` has been sunk into another ;; instruction that produced the `SinkableAtomicLoad` has been sunk into another
@@ -2230,6 +2248,29 @@
(alu_rrr op ty x_lo y_lo) (alu_rrr op ty x_lo y_lo)
(alu_rrr op ty x_hi y_hi)))) (alu_rrr op ty x_hi y_hi))))
;; Helper for emitting `MInst.VecLoadReplicate` instructions.
(decl ld1r (Reg VectorSize MemFlags) Reg)
(rule (ld1r src size flags)
(let ((dst WritableReg (temp_writable_reg $I8X16))
(_ Unit (emit (MInst.VecLoadReplicate dst src size flags))))
dst))
;; Helper for emitting `MInst.LoadAddr` instructions.
(decl load_addr (AMode) Reg)
(rule (load_addr addr)
(let ((dst WritableReg (temp_writable_reg $I64))
(_ Unit (emit (MInst.LoadAddr dst addr))))
dst))
(rule (load_addr addr)
(if-let addr_reg (amode_is_reg addr))
addr_reg)
;; Lower a vector splat with a constant parameter.
(decl splat_const (u64 VectorSize) Reg)
;; TODO: Port lower_splat_const() to ISLE.
(extern constructor splat_const splat_const)
;; Generate comparison to zero operator from input condition code ;; Generate comparison to zero operator from input condition code
(decl float_cc_cmp_zero_to_vec_misc_op (FloatCC) VecMisc2) (decl float_cc_cmp_zero_to_vec_misc_op (FloatCC) VecMisc2)
(extern constructor float_cc_cmp_zero_to_vec_misc_op float_cc_cmp_zero_to_vec_misc_op) (extern constructor float_cc_cmp_zero_to_vec_misc_op float_cc_cmp_zero_to_vec_misc_op)

View File

@@ -2258,10 +2258,10 @@ impl MachInstEmit for Inst {
ScalarSize::Size16 => 0b00010, ScalarSize::Size16 => 0b00010,
ScalarSize::Size32 => 0b00100, ScalarSize::Size32 => 0b00100,
ScalarSize::Size64 => 0b01000, ScalarSize::Size64 => 0b01000,
_ => unimplemented!("Unexpected VectorSize: {:?}", size), _ => unreachable!(),
}; };
sink.put4( sink.put4(
0b000_01110000_00000_000011_00000_00000 0b0_0_0_01110000_00000_000011_00000_00000
| (q << 30) | (q << 30)
| (imm5 << 16) | (imm5 << 16)
| (machreg_to_gpr(rn) << 5) | (machreg_to_gpr(rn) << 5)
@@ -2625,13 +2625,18 @@ impl MachInstEmit for Inst {
}; };
sink.put4(enc_vec_rrr(top11 | q << 9, rm, bit15_10, rn, rd)); sink.put4(enc_vec_rrr(top11 | q << 9, rm, bit15_10, rn, rd));
} }
&Inst::VecLoadReplicate { rd, rn, size } => { &Inst::VecLoadReplicate {
rd,
rn,
size,
flags,
} => {
let rd = allocs.next_writable(rd); let rd = allocs.next_writable(rd);
let rn = allocs.next(rn); let rn = allocs.next(rn);
let (q, size) = size.enc_size(); let (q, size) = size.enc_size();
let srcloc = state.cur_srcloc(); let srcloc = state.cur_srcloc();
if srcloc != SourceLoc::default() { if srcloc != SourceLoc::default() && !flags.notrap() {
// Register the offset at which the actual load instruction starts. // Register the offset at which the actual load instruction starts.
sink.add_trap(TrapCode::HeapOutOfBounds); sink.add_trap(TrapCode::HeapOutOfBounds);
} }

View File

@@ -2351,10 +2351,10 @@ fn test_aarch64_binemit() {
Inst::VecDup { Inst::VecDup {
rd: writable_vreg(25), rd: writable_vreg(25),
rn: xreg(7), rn: xreg(7),
size: VectorSize::Size8x16, size: VectorSize::Size8x8,
}, },
"F90C014E", "F90C010E",
"dup v25.16b, w7", "dup v25.8b, w7",
)); ));
insns.push(( insns.push((
Inst::VecDup { Inst::VecDup {
@@ -2387,10 +2387,10 @@ fn test_aarch64_binemit() {
Inst::VecDup { Inst::VecDup {
rd: writable_vreg(0), rd: writable_vreg(0),
rn: xreg(28), rn: xreg(28),
size: VectorSize::Size32x4, size: VectorSize::Size32x2,
}, },
"800F044E", "800F040E",
"dup v0.4s, w28", "dup v0.2s, w28",
)); ));
insns.push(( insns.push((
Inst::VecDup { Inst::VecDup {
@@ -5199,8 +5199,8 @@ fn test_aarch64_binemit() {
Inst::VecLoadReplicate { Inst::VecLoadReplicate {
rd: writable_vreg(31), rd: writable_vreg(31),
rn: xreg(0), rn: xreg(0),
size: VectorSize::Size64x2, size: VectorSize::Size64x2,
flags: MemFlags::trusted(),
}, },
"1FCC404D", "1FCC404D",
"ld1r { v31.2d }, [x0]", "ld1r { v31.2d }, [x0]",
@@ -5210,8 +5210,8 @@ fn test_aarch64_binemit() {
Inst::VecLoadReplicate { Inst::VecLoadReplicate {
rd: writable_vreg(0), rd: writable_vreg(0),
rn: xreg(25), rn: xreg(25),
size: VectorSize::Size8x8, size: VectorSize::Size8x8,
flags: MemFlags::trusted(),
}, },
"20C3400D", "20C3400D",
"ld1r { v0.8b }, [x25]", "ld1r { v0.8b }, [x25]",

View File

@@ -530,17 +530,6 @@ impl Inst {
} }
} }
} }
/// Generate a LoadAddr instruction (load address of an amode into
/// register). Elides when possible (when amode is just a register). Returns
/// destination register: either `rd` or a register directly from the amode.
pub fn gen_load_addr(rd: Writable<Reg>, mem: AMode) -> (Reg, Option<Inst>) {
if let Some(r) = mem.is_reg() {
(r, None)
} else {
(rd.to_reg(), Some(Inst::LoadAddr { rd, mem }))
}
}
} }
//============================================================================= //=============================================================================

View File

@@ -165,6 +165,8 @@ pub fn create_reg_env(flags: &settings::Flags) -> MachineEnv {
preg(xreg(14)), preg(xreg(14)),
preg(xreg(15)), preg(xreg(15)),
// x16 and x17 are spilltmp and tmp2 (see above). // x16 and x17 are spilltmp and tmp2 (see above).
// x18 could be used by the platform to carry inter-procedural state;
// conservatively assume so and make it not allocatable.
// x19-28 are callee-saved and so not preferred. // x19-28 are callee-saved and so not preferred.
// x21 is the pinned register (if enabled) and not allocatable if so. // x21 is the pinned register (if enabled) and not allocatable if so.
// x29 is FP, x30 is LR, x31 is SP/ZR. // x29 is FP, x30 is LR, x31 is SP/ZR.
@@ -178,30 +180,7 @@ pub fn create_reg_env(flags: &settings::Flags) -> MachineEnv {
preg(vreg(5)), preg(vreg(5)),
preg(vreg(6)), preg(vreg(6)),
preg(vreg(7)), preg(vreg(7)),
preg(vreg(8)), // v8-15 are callee-saved and so not preferred.
preg(vreg(9)),
preg(vreg(10)),
preg(vreg(11)),
preg(vreg(12)),
preg(vreg(13)),
preg(vreg(14)),
preg(vreg(15)),
],
],
non_preferred_regs_by_class: [
vec![
preg(xreg(19)),
preg(xreg(20)),
// x21 is pinned reg if enabled; we add to this list below if not.
preg(xreg(22)),
preg(xreg(23)),
preg(xreg(24)),
preg(xreg(25)),
preg(xreg(26)),
preg(xreg(27)),
preg(xreg(28)),
],
vec![
preg(vreg(16)), preg(vreg(16)),
preg(vreg(17)), preg(vreg(17)),
preg(vreg(18)), preg(vreg(18)),
@@ -220,6 +199,30 @@ pub fn create_reg_env(flags: &settings::Flags) -> MachineEnv {
preg(vreg(31)), preg(vreg(31)),
], ],
], ],
non_preferred_regs_by_class: [
vec![
preg(xreg(19)),
preg(xreg(20)),
// x21 is pinned reg if enabled; we add to this list below if not.
preg(xreg(22)),
preg(xreg(23)),
preg(xreg(24)),
preg(xreg(25)),
preg(xreg(26)),
preg(xreg(27)),
preg(xreg(28)),
],
vec![
preg(vreg(8)),
preg(vreg(9)),
preg(vreg(10)),
preg(vreg(11)),
preg(vreg(12)),
preg(vreg(13)),
preg(vreg(14)),
preg(vreg(15)),
],
],
fixed_stack_slots: vec![], fixed_stack_slots: vec![],
}; };

View File

@@ -1423,7 +1423,8 @@
;;;; Rules for `bitselect` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Rules for `bitselect` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type (ty_int_bool_ref_scalar_64 ty) (bitselect c x y))) (rule (lower (has_type ty (bitselect c x y)))
(if (ty_int_bool_ref_scalar_64 ty))
(let ((tmp1 Reg (and_reg ty x c)) (let ((tmp1 Reg (and_reg ty x c))
(tmp2 Reg (bic ty y c))) (tmp2 Reg (bic ty y c)))
(orr ty tmp1 tmp2))) (orr ty tmp1 tmp2)))
@@ -1441,12 +1442,14 @@
;; T -> I{64,32,16,8}: We can simply pass through the value: values ;; T -> I{64,32,16,8}: We can simply pass through the value: values
;; are always stored with high bits undefined, so we can just leave ;; are always stored with high bits undefined, so we can just leave
;; them be. ;; them be.
(rule (lower (has_type (ty_int_bool_ref_scalar_64 ty) (ireduce src))) (rule (lower (has_type ty (ireduce src)))
(if (ty_int_bool_ref_scalar_64 ty))
(value_regs_get src 0)) (value_regs_get src 0))
;; Likewise for breduce. ;; Likewise for breduce.
(rule (lower (has_type (ty_int_bool_ref_scalar_64 ty) (breduce src))) (rule (lower (has_type ty (breduce src)))
(if (ty_int_bool_ref_scalar_64 ty))
(value_regs_get src 0)) (value_regs_get src 0))
@@ -1515,6 +1518,39 @@
(let ((use_allocated_encoding bool (is_not_baldrdash_call_conv))) (let ((use_allocated_encoding bool (is_not_baldrdash_call_conv)))
(side_effect (udf use_allocated_encoding trap_code)))) (side_effect (udf use_allocated_encoding trap_code))))
;;;; Rules for `splat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty (splat x @ (value_type in_ty))))
(if (ty_int_bool_ref_scalar_64 in_ty))
(vec_dup x (vector_size ty)))
(rule (lower (has_type ty (splat x @ (value_type (ty_scalar_float _)))))
(vec_dup_from_fpu x (vector_size ty)))
(rule (lower (has_type ty (splat (bconst (u64_from_bool n)))))
(splat_const n (vector_size ty)))
(rule (lower (has_type ty (splat (breduce (bconst (u64_from_bool n))))))
(splat_const n (vector_size ty)))
(rule (lower (has_type ty (splat (f32const (u64_from_ieee32 n)))))
(splat_const n (vector_size ty)))
(rule (lower (has_type ty (splat (f64const (u64_from_ieee64 n)))))
(splat_const n (vector_size ty)))
(rule (lower (has_type ty (splat (iconst (u64_from_imm64 n)))))
(splat_const n (vector_size ty)))
(rule (lower (has_type ty (splat (ireduce (iconst (u64_from_imm64 n))))))
(splat_const n (vector_size ty)))
(rule (lower (has_type ty (splat x @ (load flags _addr offset))))
(if-let mem_op (is_sinkable_inst x))
(let ((_ Unit (sink_inst mem_op))
(addr AMode (amode (lane_type ty) mem_op offset))
(address Reg (load_addr addr)))
(ld1r address (vector_size ty) flags)))
;;;; Rules for `AtomicLoad` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Rules for `AtomicLoad` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type (valid_atomic_transaction ty) (atomic_load flags addr))) (rule (lower (has_type (valid_atomic_transaction ty) (atomic_load flags addr)))
@@ -1527,7 +1563,6 @@
addr)) addr))
(side_effect (store_release ty src addr))) (side_effect (store_release ty src addr)))
;;;; Rules for `AtomicRMW` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Rules for `AtomicRMW` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 1 (lower (and (use_lse) (rule 1 (lower (and (use_lse)

View File

@@ -5,12 +5,13 @@ pub mod generated_code;
// Types that the generated ISLE code uses via `use super::*`. // Types that the generated ISLE code uses via `use super::*`.
use super::{ use super::{
writable_zero_reg, zero_reg, AMode, ASIMDFPModImm, ASIMDMovModImm, BranchTarget, CallIndInfo, insn_inputs, writable_zero_reg, zero_reg, AMode, ASIMDFPModImm, ASIMDMovModImm, BranchTarget,
CallInfo, Cond, CondBrKind, ExtendOp, FPUOpRI, FloatCC, Imm12, ImmLogic, ImmShift, CallIndInfo, CallInfo, Cond, CondBrKind, ExtendOp, FPUOpRI, FloatCC, Imm12, ImmLogic, ImmShift,
Inst as MInst, IntCC, JTSequenceInfo, MachLabel, MoveWideConst, MoveWideOp, NarrowValueMode, Inst as MInst, IntCC, JTSequenceInfo, MachLabel, MoveWideConst, MoveWideOp, NarrowValueMode,
Opcode, OperandSize, PairAMode, Reg, ScalarSize, ShiftOpAndAmt, UImm5, VecMisc2, VectorSize, Opcode, OperandSize, PairAMode, Reg, ScalarSize, ShiftOpAndAmt, UImm5, VecMisc2, VectorSize,
NZCV, NZCV,
}; };
use crate::isa::aarch64::lower::{lower_address, lower_splat_const};
use crate::isa::aarch64::settings::Flags as IsaFlags; use crate::isa::aarch64::settings::Flags as IsaFlags;
use crate::machinst::{isle::*, InputSourceInst}; use crate::machinst::{isle::*, InputSourceInst};
use crate::settings::Flags; use crate::settings::Flags;
@@ -442,4 +443,25 @@ where
_ => panic!(), _ => panic!(),
} }
} }
fn amode(&mut self, ty: Type, mem_op: Inst, offset: u32) -> AMode {
lower_address(
self.lower_ctx,
ty,
&insn_inputs(self.lower_ctx, mem_op)[..],
offset as i32,
)
}
fn amode_is_reg(&mut self, address: &AMode) -> Option<Reg> {
address.is_reg()
}
fn splat_const(&mut self, value: u64, size: &VectorSize) -> Reg {
let rd = self.temp_writable_reg(I8X16);
lower_splat_const(self.lower_ctx, rd, value, *size);
rd.to_reg()
}
} }

View File

@@ -741,80 +741,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
} }
} }
Opcode::Splat => { Opcode::Splat => implemented_in_isle(ctx),
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let ty = ty.unwrap();
// TODO: Handle SVE Dup.
let ty = if ty.is_dynamic_vector() {
dynamic_to_fixed(ty)
} else {
ty
};
let size = VectorSize::from_ty(ty);
if let Some((_, insn)) = maybe_input_insn_multi(
ctx,
inputs[0],
&[
Opcode::Bconst,
Opcode::F32const,
Opcode::F64const,
Opcode::Iconst,
],
) {
lower_splat_const(ctx, rd, ctx.get_constant(insn).unwrap(), size);
} else if let Some(insn) =
maybe_input_insn_via_conv(ctx, inputs[0], Opcode::Iconst, Opcode::Ireduce)
{
lower_splat_const(ctx, rd, ctx.get_constant(insn).unwrap(), size);
} else if let Some(insn) =
maybe_input_insn_via_conv(ctx, inputs[0], Opcode::Bconst, Opcode::Breduce)
{
lower_splat_const(ctx, rd, ctx.get_constant(insn).unwrap(), size);
} else if let Some((_, insn)) = maybe_input_insn_multi(
ctx,
inputs[0],
&[
Opcode::Uload8,
Opcode::Sload8,
Opcode::Uload16,
Opcode::Sload16,
Opcode::Uload32,
Opcode::Sload32,
Opcode::Load,
],
) {
ctx.sink_inst(insn);
let load_inputs = insn_inputs(ctx, insn);
let load_outputs = insn_outputs(ctx, insn);
lower_load(
ctx,
insn,
&load_inputs[..],
load_outputs[0],
|ctx, _rd, _elem_ty, mem| {
let tmp = ctx.alloc_tmp(I64).only_reg().unwrap();
let (addr, addr_inst) = Inst::gen_load_addr(tmp, mem);
if let Some(addr_inst) = addr_inst {
ctx.emit(addr_inst);
}
ctx.emit(Inst::VecLoadReplicate { rd, rn: addr, size });
Ok(())
},
)?;
} else {
let input_ty = ctx.input_ty(insn, 0);
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let inst = if ty_has_int_representation(input_ty) {
Inst::VecDup { rd, rn, size }
} else {
Inst::VecDupFromFpu { rd, rn, size }
};
ctx.emit(inst);
}
}
Opcode::ScalarToVector => implemented_in_isle(ctx), Opcode::ScalarToVector => implemented_in_isle(ctx),

View File

@@ -868,7 +868,7 @@
;; Pseudoinstruction to keep a value alive. ;; Pseudoinstruction to keep a value alive.
(DummyUse (DummyUse
(reg Reg)) (reg Reg))
;; An unwind pseudoinstruction describing the state of the ;; An unwind pseudoinstruction describing the state of the
;; machine at this program point. ;; machine at this program point.
(Unwind (Unwind
@@ -1641,15 +1641,6 @@
(decl sinkable_inst (Inst) Value) (decl sinkable_inst (Inst) Value)
(extern extractor sinkable_inst sinkable_inst) (extern extractor sinkable_inst sinkable_inst)
;; Sink a sinkable instruction.
;;
;; This is a side-effectful operation that notifies the context that the
;; sinkable instruction been sunk into another instruction, and no longer
;; needs to be lowered.
(decl sink_inst (Inst) Unit)
(extern constructor sink_inst sink_inst)
;; Sinkable big-endian load instruction. ;; Sinkable big-endian load instruction.
(decl sinkable_load (Inst) Value) (decl sinkable_load (Inst) Value)
(extractor (sinkable_load inst) (extractor (sinkable_load inst)

View File

@@ -1656,8 +1656,9 @@
;; Insert vector lane from general-purpose register. ;; Insert vector lane from general-purpose register.
(rule (lower (insertlane x @ (value_type ty) (rule (lower (insertlane x @ (value_type ty)
y @ (value_type (ty_int_bool_ref_scalar_64 _)) y @ (value_type in_ty)
(u8_from_uimm8 idx))) (u8_from_uimm8 idx)))
(if (ty_int_bool_ref_scalar_64 in_ty))
(vec_insert_lane ty x y (be_lane_idx ty idx) (zero_reg))) (vec_insert_lane ty x y (be_lane_idx ty idx) (zero_reg)))
;; Insert vector lane from floating-point register. ;; Insert vector lane from floating-point register.
@@ -1771,8 +1772,9 @@
;;;; Rules for `extractlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Rules for `extractlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Extract vector lane to general-purpose register. ;; Extract vector lane to general-purpose register.
(rule (lower (has_type (ty_int_bool_ref_scalar_64 _) (rule (lower (has_type out_ty
(extractlane x @ (value_type ty) (u8_from_uimm8 idx)))) (extractlane x @ (value_type ty) (u8_from_uimm8 idx))))
(if (ty_int_bool_ref_scalar_64 out_ty))
(vec_extract_lane ty x (be_lane_idx ty idx) (zero_reg))) (vec_extract_lane ty x (be_lane_idx ty idx) (zero_reg)))
;; Extract vector lane to floating-point register. ;; Extract vector lane to floating-point register.
@@ -1828,8 +1830,8 @@
;;;; Rules for `splat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Rules for `splat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Load replicated value from general-purpose register. ;; Load replicated value from general-purpose register.
(rule (lower (has_type ty (splat (rule (lower (has_type ty (splat x @ (value_type in_ty))))
x @ (value_type (ty_int_bool_ref_scalar_64 _))))) (if (ty_int_bool_ref_scalar_64 in_ty))
(vec_replicate_lane ty (vec_insert_lane_undef ty x 0 (zero_reg)) 0)) (vec_replicate_lane ty (vec_insert_lane_undef ty x 0 (zero_reg)) 0))
;; Load replicated value from floating-point register. ;; Load replicated value from floating-point register.
@@ -1888,7 +1890,8 @@
;; Load scalar value from general-purpose register. ;; Load scalar value from general-purpose register.
(rule (lower (has_type ty (scalar_to_vector (rule (lower (has_type ty (scalar_to_vector
x @ (value_type (ty_int_bool_ref_scalar_64 _))))) x @ (value_type in_ty))))
(if (ty_int_bool_ref_scalar_64 in_ty))
(vec_insert_lane ty (vec_imm ty 0) x (be_lane_idx ty 0) (zero_reg))) (vec_insert_lane ty (vec_imm ty 0) x (be_lane_idx ty 0) (zero_reg)))
;; Load scalar value from floating-point register. ;; Load scalar value from floating-point register.

View File

@@ -666,11 +666,6 @@ where
None None
} }
#[inline]
fn sink_inst(&mut self, inst: Inst) -> Unit {
self.lower_ctx.sink_inst(inst);
}
#[inline] #[inline]
fn emit(&mut self, inst: &MInst) -> Unit { fn emit(&mut self, inst: &MInst) -> Unit {
self.lower_ctx.emit(inst.clone()); self.lower_ctx.emit(inst.clone());

View File

@@ -11,7 +11,9 @@ pub use crate::ir::{
SigRef, StackSlot, SigRef, StackSlot,
}; };
pub use crate::isa::unwind::UnwindInst; pub use crate::isa::unwind::UnwindInst;
pub use crate::machinst::{ABIArg, ABIArgSlot, ABISig, RealReg, Reg, RelocDistance, Writable}; pub use crate::machinst::{
ABIArg, ABIArgSlot, ABISig, InputSourceInst, RealReg, Reg, RelocDistance, Writable,
};
pub type Unit = (); pub type Unit = ();
pub type ValueSlice = (ValueList, usize); pub type ValueSlice = (ValueList, usize);
@@ -425,6 +427,15 @@ macro_rules! isle_prelude_methods {
imm.bits() as u64 imm.bits() as u64
} }
#[inline]
fn u64_from_bool(&mut self, b: bool) -> u64 {
if b {
u64::MAX
} else {
0
}
}
#[inline] #[inline]
fn inst_results(&mut self, inst: Inst) -> ValueSlice { fn inst_results(&mut self, inst: Inst) -> ValueSlice {
(self.lower_ctx.dfg().inst_results_list(inst), 0) (self.lower_ctx.dfg().inst_results_list(inst), 0)
@@ -854,6 +865,21 @@ macro_rules! isle_prelude_methods {
fn real_reg_to_writable_reg(&mut self, reg: RealReg) -> WritableReg { fn real_reg_to_writable_reg(&mut self, reg: RealReg) -> WritableReg {
Writable::from_reg(Reg::from(reg)) Writable::from_reg(Reg::from(reg))
} }
fn is_sinkable_inst(&mut self, val: Value) -> Option<Inst> {
let input = self.lower_ctx.get_value_as_source_or_const(val);
if let InputSourceInst::UniqueUse(inst, _) = input.inst {
Some(inst)
} else {
None
}
}
#[inline]
fn sink_inst(&mut self, inst: Inst) {
self.lower_ctx.sink_inst(inst);
}
}; };
} }

View File

@@ -308,10 +308,10 @@
(decl fits_in_64 (Type) Type) (decl fits_in_64 (Type) Type)
(extern extractor fits_in_64 fits_in_64) (extern extractor fits_in_64 fits_in_64)
;; An extractor that only matches scalar booleans, integers, and references that ;; A pure constructor that only matches scalar booleans, integers, and
;; can fit in 64 bits. ;; references that can fit in 64 bits.
(decl ty_int_bool_ref_scalar_64 (Type) Type) (decl pure ty_int_bool_ref_scalar_64 (Type) Type)
(extern extractor ty_int_bool_ref_scalar_64 ty_int_bool_ref_scalar_64) (extern constructor ty_int_bool_ref_scalar_64 ty_int_bool_ref_scalar_64)
;; An extractor that matches 32- and 64-bit types only. ;; An extractor that matches 32- and 64-bit types only.
(decl ty_32_or_64 (Type) Type) (decl ty_32_or_64 (Type) Type)
@@ -407,6 +407,10 @@
(decl u8_from_uimm8 (u8) Uimm8) (decl u8_from_uimm8 (u8) Uimm8)
(extern extractor infallible u8_from_uimm8 u8_from_uimm8) (extern extractor infallible u8_from_uimm8 u8_from_uimm8)
;; Extract a `u64` from a `bool`.
(decl u64_from_bool (u64) bool)
(extern extractor infallible u64_from_bool u64_from_bool)
;; Extract a `u64` from an `Imm64`. ;; Extract a `u64` from an `Imm64`.
(decl u64_from_imm64 (u64) Imm64) (decl u64_from_imm64 (u64) Imm64)
(extern extractor infallible u64_from_imm64 u64_from_imm64) (extern extractor infallible u64_from_imm64 u64_from_imm64)
@@ -498,6 +502,10 @@
(decl pure zero_value (Value) Value) (decl pure zero_value (Value) Value)
(extern constructor zero_value zero_value) (extern constructor zero_value zero_value)
;; Match a sinkable instruction from a value operand.
(decl pure is_sinkable_inst (Value) Inst)
(extern constructor is_sinkable_inst is_sinkable_inst)
;; Instruction creation helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Instruction creation helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Emit an instruction. ;; Emit an instruction.
@@ -508,6 +516,14 @@
(decl emit (MInst) Unit) (decl emit (MInst) Unit)
(extern constructor emit emit) (extern constructor emit emit)
;; Sink an instruction.
;;
;; This is a side-effectful operation that notifies the context that the
;; instruction has been sunk into another instruction, and no longer needs to
;; be lowered.
(decl sink_inst (Inst) Unit)
(extern constructor sink_inst sink_inst)
;; Constant pool emission. ;; Constant pool emission.
(type VCodeConstant (primitive VCodeConstant)) (type VCodeConstant (primitive VCodeConstant))

View File

@@ -244,18 +244,13 @@ block0(v0: i128):
return v1 return v1
} }
; stp fp, lr, [sp, #-16]!
; mov fp, sp
; stp d11, d13, [sp, #-16]!
; block0: ; block0:
; fmov d6, x0 ; fmov d6, x0
; mov v6.d[1], x1 ; mov v6.d[1], x1
; cnt v11.16b, v6.16b ; cnt v19.16b, v6.16b
; addv b13, v11.16b ; addv b21, v19.16b
; umov w0, v13.b[0] ; umov w0, v21.b[0]
; movz w1, #0 ; movz w1, #0
; ldp d11, d13, [sp], #16
; ldp fp, lr, [sp], #16
; ret ; ret
function %d(i64) -> i64 { function %d(i64) -> i64 {

View File

@@ -15,9 +15,9 @@ block0(v0: i16):
} }
; block0: ; block0:
; dup v2.4h, w0 ; dup v6.4h, w0
; mov v7.16b, v2.16b ; mov v7.16b, v6.16b
; mov v7.d[1], v2.d[0] ; mov v7.d[1], v6.d[0]
; sqxtn v0.8b, v7.8h ; sqxtn v0.8b, v7.8h
; ret ; ret
@@ -35,9 +35,9 @@ block0(v0: i16):
} }
; block0: ; block0:
; dup v2.8h, w0 ; dup v6.8h, w0
; sqxtn v0.8b, v2.8h ; sqxtn v0.8b, v6.8h
; sqxtn2 v0.16b, v2.8h ; sqxtn2 v0.16b, v6.8h
; ret ; ret
function %snarrow_i32x2(i32) -> i16x4 { function %snarrow_i32x2(i32) -> i16x4 {
@@ -54,9 +54,9 @@ block0(v0: i32):
} }
; block0: ; block0:
; dup v2.2s, w0 ; dup v6.2s, w0
; mov v7.16b, v2.16b ; mov v7.16b, v6.16b
; mov v7.d[1], v2.d[0] ; mov v7.d[1], v6.d[0]
; sqxtn v0.4h, v7.4s ; sqxtn v0.4h, v7.4s
; ret ; ret
@@ -74,9 +74,9 @@ block0(v0: i32):
} }
; block0: ; block0:
; dup v2.4s, w0 ; dup v6.4s, w0
; sqxtn v0.4h, v2.4s ; sqxtn v0.4h, v6.4s
; sqxtn2 v0.8h, v2.4s ; sqxtn2 v0.8h, v6.4s
; ret ; ret
function %snarrow_i64x2(i64) -> i32x4 { function %snarrow_i64x2(i64) -> i32x4 {
@@ -93,9 +93,9 @@ block0(v0: i64):
} }
; block0: ; block0:
; dup v2.2d, x0 ; dup v6.2d, x0
; sqxtn v0.2s, v2.2d ; sqxtn v0.2s, v6.2d
; sqxtn2 v0.4s, v2.2d ; sqxtn2 v0.4s, v6.2d
; ret ; ret
function %unarrow_i16x4(i16) -> i8x8 { function %unarrow_i16x4(i16) -> i8x8 {
@@ -112,9 +112,9 @@ block0(v0: i16):
} }
; block0: ; block0:
; dup v2.4h, w0 ; dup v6.4h, w0
; mov v7.16b, v2.16b ; mov v7.16b, v6.16b
; mov v7.d[1], v2.d[0] ; mov v7.d[1], v6.d[0]
; sqxtun v0.8b, v7.8h ; sqxtun v0.8b, v7.8h
; ret ; ret
@@ -132,9 +132,9 @@ block0(v0: i16):
} }
; block0: ; block0:
; dup v2.8h, w0 ; dup v6.8h, w0
; sqxtun v0.8b, v2.8h ; sqxtun v0.8b, v6.8h
; sqxtun2 v0.16b, v2.8h ; sqxtun2 v0.16b, v6.8h
; ret ; ret
function %unarrow_i32x2(i32) -> i16x4 { function %unarrow_i32x2(i32) -> i16x4 {
@@ -151,9 +151,9 @@ block0(v0: i32):
} }
; block0: ; block0:
; dup v2.2s, w0 ; dup v6.2s, w0
; mov v7.16b, v2.16b ; mov v7.16b, v6.16b
; mov v7.d[1], v2.d[0] ; mov v7.d[1], v6.d[0]
; sqxtun v0.4h, v7.4s ; sqxtun v0.4h, v7.4s
; ret ; ret
@@ -171,9 +171,9 @@ block0(v0: i32):
} }
; block0: ; block0:
; dup v2.4s, w0 ; dup v6.4s, w0
; sqxtun v0.4h, v2.4s ; sqxtun v0.4h, v6.4s
; sqxtun2 v0.8h, v2.4s ; sqxtun2 v0.8h, v6.4s
; ret ; ret
function %unarrow_i64x2(i64) -> i32x4 { function %unarrow_i64x2(i64) -> i32x4 {
@@ -190,9 +190,9 @@ block0(v0: i64):
} }
; block0: ; block0:
; dup v2.2d, x0 ; dup v6.2d, x0
; sqxtun v0.2s, v2.2d ; sqxtun v0.2s, v6.2d
; sqxtun2 v0.4s, v2.2d ; sqxtun2 v0.4s, v6.2d
; ret ; ret
function %uunarrow_i16x4(i16) -> i8x8 { function %uunarrow_i16x4(i16) -> i8x8 {
@@ -209,9 +209,9 @@ block0(v0: i16):
} }
; block0: ; block0:
; dup v2.4h, w0 ; dup v6.4h, w0
; mov v7.16b, v2.16b ; mov v7.16b, v6.16b
; mov v7.d[1], v2.d[0] ; mov v7.d[1], v6.d[0]
; uqxtn v0.8b, v7.8h ; uqxtn v0.8b, v7.8h
; ret ; ret
@@ -229,9 +229,9 @@ block0(v0: i16):
} }
; block0: ; block0:
; dup v2.8h, w0 ; dup v6.8h, w0
; uqxtn v0.8b, v2.8h ; uqxtn v0.8b, v6.8h
; uqxtn2 v0.16b, v2.8h ; uqxtn2 v0.16b, v6.8h
; ret ; ret
function %uunarrow_i32x2(i32) -> i16x4 { function %uunarrow_i32x2(i32) -> i16x4 {
@@ -248,9 +248,9 @@ block0(v0: i32):
} }
; block0: ; block0:
; dup v2.2s, w0 ; dup v6.2s, w0
; mov v7.16b, v2.16b ; mov v7.16b, v6.16b
; mov v7.d[1], v2.d[0] ; mov v7.d[1], v6.d[0]
; uqxtn v0.4h, v7.4s ; uqxtn v0.4h, v7.4s
; ret ; ret
@@ -268,9 +268,9 @@ block0(v0: i32):
} }
; block0: ; block0:
; dup v2.4s, w0 ; dup v6.4s, w0
; uqxtn v0.4h, v2.4s ; uqxtn v0.4h, v6.4s
; uqxtn2 v0.8h, v2.4s ; uqxtn2 v0.8h, v6.4s
; ret ; ret
function %uunarrow_i64x2(i64) -> i32x4 { function %uunarrow_i64x2(i64) -> i32x4 {
@@ -287,8 +287,7 @@ block0(v0: i64):
} }
; block0: ; block0:
; dup v2.2d, x0 ; dup v6.2d, x0
; uqxtn v0.2s, v2.2d ; uqxtn v0.2s, v6.2d
; uqxtn2 v0.4s, v2.2d ; uqxtn2 v0.4s, v6.2d
; ret ; ret

View File

@@ -1,4 +1,4 @@
test compile test compile precise-output
target aarch64 target aarch64
function %i8x16_splat_add(i8, i8) -> i8x16 { function %i8x16_splat_add(i8, i8) -> i8x16 {
@@ -13,10 +13,11 @@ block0(v0: i8, v1: i8):
return v5 return v5
} }
; check: dup v4.16b, w0 ; block0:
; nextln: dup v6.16b, w1 ; dup v16.16b, w0
; nextln: add v0.16b, v4.16b, v6.16b ; dup v17.16b, w1
; nextln: ret ; add v0.16b, v16.16b, v17.16b
; ret
function %i16x8_splat_add(i16, i16) -> i16x8 { function %i16x8_splat_add(i16, i16) -> i16x8 {
gv0 = dyn_scale_target_const.i16x8 gv0 = dyn_scale_target_const.i16x8
@@ -30,10 +31,11 @@ block0(v0: i16, v1: i16):
return v5 return v5
} }
; check: dup v4.8h, w0 ; block0:
; nextln: dup v6.8h, w1 ; dup v16.8h, w0
; nextln: add v0.8h, v4.8h, v6.8h ; dup v17.8h, w1
; nextln: ret ; add v0.8h, v16.8h, v17.8h
; ret
function %i32x4_splat_mul(i32, i32) -> i32x4 { function %i32x4_splat_mul(i32, i32) -> i32x4 {
gv0 = dyn_scale_target_const.i32x4 gv0 = dyn_scale_target_const.i32x4
@@ -47,10 +49,11 @@ block0(v0: i32, v1: i32):
return v5 return v5
} }
; check: dup v4.4s, w0 ; block0:
; nextln: dup v6.4s, w1 ; dup v16.4s, w0
; nextln: mul v0.4s, v4.4s, v6.4s ; dup v17.4s, w1
; nextln: ret ; mul v0.4s, v16.4s, v17.4s
; ret
function %i64x2_splat_sub(i64, i64) -> i64x2 { function %i64x2_splat_sub(i64, i64) -> i64x2 {
gv0 = dyn_scale_target_const.i64x2 gv0 = dyn_scale_target_const.i64x2
@@ -64,10 +67,11 @@ block0(v0: i64, v1: i64):
return v5 return v5
} }
; check: dup v4.2d, x0 ; block0:
; nextln: dup v6.2d, x1 ; dup v16.2d, x0
; nextln: sub v0.2d, v4.2d, v6.2d ; dup v17.2d, x1
; nextln: ret ; sub v0.2d, v16.2d, v17.2d
; ret
function %f32x4_splat_add(f32, f32) -> f32x4 { function %f32x4_splat_add(f32, f32) -> f32x4 {
gv0 = dyn_scale_target_const.f32x4 gv0 = dyn_scale_target_const.f32x4
@@ -81,10 +85,11 @@ block0(v0: f32, v1: f32):
return v5 return v5
} }
; check: dup v4.4s, v0.s[0] ; block0:
; nextln: dup v6.4s, v1.s[0] ; dup v16.4s, v0.s[0]
; nextln: fadd v0.4s, v4.4s, v6.4s ; dup v17.4s, v1.s[0]
; nextln: ret ; fadd v0.4s, v16.4s, v17.4s
; ret
function %f64x2_splat_sub(f64, f64) -> f64x2 { function %f64x2_splat_sub(f64, f64) -> f64x2 {
gv0 = dyn_scale_target_const.f64x2 gv0 = dyn_scale_target_const.f64x2
@@ -98,10 +103,11 @@ block0(v0: f64, v1: f64):
return v5 return v5
} }
; check: dup v4.2d, v0.d[0] ; block0:
; nextln: dup v6.2d, v1.d[0] ; dup v16.2d, v0.d[0]
; nextln: fsub v0.2d, v4.2d, v6.2d ; dup v17.2d, v1.d[0]
; nextln: ret ; fsub v0.2d, v16.2d, v17.2d
; ret
function %f64x2_splat_mul(f64, f64) -> f64x2 { function %f64x2_splat_mul(f64, f64) -> f64x2 {
gv0 = dyn_scale_target_const.f64x2 gv0 = dyn_scale_target_const.f64x2
@@ -115,10 +121,11 @@ block0(v0: f64, v1: f64):
return v5 return v5
} }
; check: dup v4.2d, v0.d[0] ; block0:
; nextln: dup v6.2d, v1.d[0] ; dup v16.2d, v0.d[0]
; nextln: fmul v0.2d, v4.2d, v6.2d ; dup v17.2d, v1.d[0]
; nextln: ret ; fmul v0.2d, v16.2d, v17.2d
; ret
function %f64x2_splat_div(f64, f64) -> f64x2 { function %f64x2_splat_div(f64, f64) -> f64x2 {
gv0 = dyn_scale_target_const.f64x2 gv0 = dyn_scale_target_const.f64x2
@@ -132,10 +139,11 @@ block0(v0: f64, v1: f64):
return v5 return v5
} }
; check: dup v4.2d, v0.d[0] ; block0:
; nextln: dup v6.2d, v1.d[0] ; dup v16.2d, v0.d[0]
; nextln: fdiv v0.2d, v4.2d, v6.2d ; dup v17.2d, v1.d[0]
; nextln: ret ; fdiv v0.2d, v16.2d, v17.2d
; ret
function %f64x2_splat_min(f64, f64) -> f64x2 { function %f64x2_splat_min(f64, f64) -> f64x2 {
gv0 = dyn_scale_target_const.f64x2 gv0 = dyn_scale_target_const.f64x2
@@ -149,10 +157,11 @@ block0(v0: f64, v1: f64):
return v5 return v5
} }
; check: dup v4.2d, v0.d[0] ; block0:
; nextln: dup v6.2d, v1.d[0] ; dup v16.2d, v0.d[0]
; nextln: fmin v0.2d, v4.2d, v6.2d ; dup v17.2d, v1.d[0]
; nextln: ret ; fmin v0.2d, v16.2d, v17.2d
; ret
function %f64x2_splat_max(f64, f64) -> f64x2 { function %f64x2_splat_max(f64, f64) -> f64x2 {
gv0 = dyn_scale_target_const.f64x2 gv0 = dyn_scale_target_const.f64x2
@@ -166,10 +175,11 @@ block0(v0: f64, v1: f64):
return v5 return v5
} }
; check: dup v4.2d, v0.d[0] ; block0:
; nextln: dup v6.2d, v1.d[0] ; dup v16.2d, v0.d[0]
; nextln: fmax v0.2d, v4.2d, v6.2d ; dup v17.2d, v1.d[0]
; nextln: ret ; fmax v0.2d, v16.2d, v17.2d
; ret
function %f64x2_splat_min_pseudo(f64, f64) -> f64x2 { function %f64x2_splat_min_pseudo(f64, f64) -> f64x2 {
gv0 = dyn_scale_target_const.f64x2 gv0 = dyn_scale_target_const.f64x2
@@ -183,11 +193,12 @@ block0(v0: f64, v1: f64):
return v5 return v5
} }
; check: dup v4.2d, v0.d[0] ; block0:
; nextln: dup v6.2d, v1.d[0] ; dup v17.2d, v0.d[0]
; nextln: fcmgt v0.2d, v4.2d, v6.2d ; dup v18.2d, v1.d[0]
; nextln: bsl v0.16b, v6.16b, v4.16b ; fcmgt v0.2d, v17.2d, v18.2d
; nextln: ret ; bsl v0.16b, v18.16b, v17.16b
; ret
function %f64x2_splat_max_pseudo(f64, f64) -> f64x2 { function %f64x2_splat_max_pseudo(f64, f64) -> f64x2 {
gv0 = dyn_scale_target_const.f64x2 gv0 = dyn_scale_target_const.f64x2
@@ -201,8 +212,9 @@ block0(v0: f64, v1: f64):
return v5 return v5
} }
; check: dup v4.2d, v0.d[0] ; block0:
; nextln: dup v6.2d, v1.d[0] ; dup v17.2d, v0.d[0]
; nextln: fcmgt v0.2d, v6.2d, v4.2d ; dup v18.2d, v1.d[0]
; nextln: bsl v0.16b, v6.16b, v4.16b ; fcmgt v0.2d, v18.2d, v17.2d
; nextln: ret ; bsl v0.16b, v18.16b, v17.16b
; ret

View File

@@ -1,4 +1,4 @@
test compile test compile precise-output
target aarch64 target aarch64
function %swidenhigh_i8x16(i8) -> i16x8 { function %swidenhigh_i8x16(i8) -> i16x8 {
@@ -14,9 +14,10 @@ block0(v0: i8):
return v3 return v3
} }
; check: dup v2.16b, w0 ; block0:
; nextln: sxtl2 v0.8h, v2.16b ; dup v5.16b, w0
; nextln: ret ; sxtl2 v0.8h, v5.16b
; ret
function %swidenhigh_i16x8(i16) -> i32x4 { function %swidenhigh_i16x8(i16) -> i32x4 {
gv0 = dyn_scale_target_const.i32x4 gv0 = dyn_scale_target_const.i32x4
@@ -31,9 +32,10 @@ block0(v0: i16):
return v3 return v3
} }
; check: dup v2.8h, w0 ; block0:
; nextln: sxtl2 v0.4s, v2.8h ; dup v5.8h, w0
; nextln: ret ; sxtl2 v0.4s, v5.8h
; ret
function %swidenhigh_i32x4(i32) -> i64x2 { function %swidenhigh_i32x4(i32) -> i64x2 {
gv0 = dyn_scale_target_const.i32x4 gv0 = dyn_scale_target_const.i32x4
@@ -48,9 +50,10 @@ block0(v0: i32):
return v3 return v3
} }
; check: dup v2.4s, w0 ; block0:
; nextln: sxtl2 v0.2d, v2.4s ; dup v5.4s, w0
; nextln: ret ; sxtl2 v0.2d, v5.4s
; ret
function %swidenlow_i8x16(i8) -> i16x8 { function %swidenlow_i8x16(i8) -> i16x8 {
gv0 = dyn_scale_target_const.i16x8 gv0 = dyn_scale_target_const.i16x8
@@ -65,9 +68,10 @@ block0(v0: i8):
return v3 return v3
} }
; check: dup v2.16b, w0 ; block0:
; nextln: sxtl v0.8h, v2.8b ; dup v5.16b, w0
; nextln: ret ; sxtl v0.8h, v5.8b
; ret
function %swidenlow_i16x8(i16) -> i32x4 { function %swidenlow_i16x8(i16) -> i32x4 {
gv0 = dyn_scale_target_const.i32x4 gv0 = dyn_scale_target_const.i32x4
@@ -82,9 +86,10 @@ block0(v0: i16):
return v3 return v3
} }
; check: dup v2.8h, w0 ; block0:
; nextln: sxtl v0.4s, v2.4h ; dup v5.8h, w0
; nextln: ret ; sxtl v0.4s, v5.4h
; ret
function %swidenlow_i32x4(i32) -> i64x2 { function %swidenlow_i32x4(i32) -> i64x2 {
gv0 = dyn_scale_target_const.i32x4 gv0 = dyn_scale_target_const.i32x4
@@ -99,6 +104,7 @@ block0(v0: i32):
return v3 return v3
} }
; check: dup v2.4s, w0 ; block0:
; nextln: sxtl v0.2d, v2.2s ; dup v5.4s, w0
; nextln: ret ; sxtl v0.2d, v5.2s
; ret

View File

@@ -58,9 +58,9 @@ block0(v0: i32):
; mov fp, sp ; mov fp, sp
; sub sp, sp, #16 ; sub sp, sp, #16
; block0: ; block0:
; dup v2.4s, w0 ; dup v3.4s, w0
; mov x4, sp ; mov x3, sp
; str q2, [x4] ; str q3, [x3]
; add sp, sp, #16 ; add sp, sp, #16
; ldp fp, lr, [sp], #16 ; ldp fp, lr, [sp], #16
; ret ; ret
@@ -101,9 +101,9 @@ block0(v0: i32):
; mov fp, sp ; mov fp, sp
; sub sp, sp, #16 ; sub sp, sp, #16
; block0: ; block0:
; dup v2.4s, w0 ; dup v3.4s, w0
; mov x4, sp ; mov x3, sp
; str q2, [x4] ; str q3, [x3]
; add sp, sp, #16 ; add sp, sp, #16
; ldp fp, lr, [sp], #16 ; ldp fp, lr, [sp], #16
; ret ; ret

View File

@@ -82,29 +82,6 @@ block0(v0: f64):
; stp d10, d11, [sp, #-16]! ; stp d10, d11, [sp, #-16]!
; stp d8, d9, [sp, #-16]! ; stp d8, d9, [sp, #-16]!
; block0: ; block0:
; fadd d1, d0, d0
; fadd d2, d0, d0
; fadd d3, d0, d0
; fadd d4, d0, d0
; fadd d5, d0, d0
; fadd d6, d0, d0
; fadd d7, d0, d0
; fadd d8, d0, d0
; fadd d9, d0, d0
; fadd d10, d0, d0
; fadd d11, d0, d0
; fadd d12, d0, d0
; fadd d13, d0, d0
; fadd d14, d0, d0
; fadd d15, d0, d0
; fadd d16, d0, d0
; fadd d17, d0, d0
; fadd d18, d0, d0
; fadd d19, d0, d0
; fadd d20, d0, d0
; fadd d21, d0, d0
; fadd d22, d0, d0
; fadd d23, d0, d0
; fadd d24, d0, d0 ; fadd d24, d0, d0
; fadd d25, d0, d0 ; fadd d25, d0, d0
; fadd d26, d0, d0 ; fadd d26, d0, d0
@@ -113,37 +90,60 @@ block0(v0: f64):
; fadd d29, d0, d0 ; fadd d29, d0, d0
; fadd d30, d0, d0 ; fadd d30, d0, d0
; fadd d31, d0, d0 ; fadd d31, d0, d0
; fadd d0, d0, d1 ; fadd d1, d0, d0
; fadd d1, d2, d3 ; fadd d2, d0, d0
; fadd d2, d4, d5 ; fadd d3, d0, d0
; fadd d3, d6, d7 ; fadd d4, d0, d0
; fadd d5, d0, d0
; fadd d6, d0, d0
; fadd d7, d0, d0
; fadd d16, d0, d0
; fadd d17, d0, d0
; fadd d18, d0, d0
; fadd d19, d0, d0
; fadd d20, d0, d0
; fadd d21, d0, d0
; fadd d22, d0, d0
; fadd d23, d0, d0
; fadd d8, d0, d0
; fadd d9, d0, d0
; fadd d10, d0, d0
; fadd d11, d0, d0
; fadd d12, d0, d0
; fadd d13, d0, d0
; fadd d14, d0, d0
; fadd d15, d0, d0
; fadd d24, d0, d24
; fadd d25, d25, d26
; fadd d26, d27, d28
; fadd d27, d29, d30
; fadd d28, d31, d1
; fadd d29, d2, d3
; fadd d30, d4, d5
; fadd d31, d6, d7
; fadd d0, d16, d17
; fadd d1, d18, d19
; fadd d2, d20, d21
; fadd d3, d22, d23
; fadd d4, d8, d9 ; fadd d4, d8, d9
; fadd d5, d10, d11 ; fadd d5, d10, d11
; fadd d6, d12, d13 ; fadd d6, d12, d13
; fadd d7, d14, d15 ; fadd d7, d14, d15
; fadd d8, d16, d17 ; fadd d24, d24, d25
; fadd d9, d18, d19 ; fadd d25, d26, d27
; fadd d10, d20, d21 ; fadd d26, d28, d29
; fadd d11, d22, d23 ; fadd d27, d30, d31
; fadd d12, d24, d25 ; fadd d28, d0, d1
; fadd d13, d26, d27 ; fadd d29, d2, d3
; fadd d14, d28, d29 ; fadd d30, d4, d5
; fadd d15, d30, d31 ; fadd d31, d6, d7
; fadd d0, d0, d1 ; fadd d24, d24, d25
; fadd d1, d2, d3 ; fadd d25, d26, d27
; fadd d2, d4, d5 ; fadd d26, d28, d29
; fadd d3, d6, d7 ; fadd d27, d30, d31
; fadd d4, d8, d9 ; fadd d24, d24, d25
; fadd d5, d10, d11 ; fadd d25, d26, d27
; fadd d6, d12, d13 ; fadd d0, d24, d25
; fadd d7, d14, d15
; fadd d0, d0, d1
; fadd d1, d2, d3
; fadd d2, d4, d5
; fadd d3, d6, d7
; fadd d0, d0, d1
; fadd d1, d2, d3
; fadd d0, d0, d1
; ldp d8, d9, [sp], #16 ; ldp d8, d9, [sp], #16
; ldp d10, d11, [sp], #16 ; ldp d10, d11, [sp], #16
; ldp d12, d13, [sp], #16 ; ldp d12, d13, [sp], #16
@@ -242,4 +242,3 @@ block0(v0: i64):
; ldr x28, [sp], #16 ; ldr x28, [sp], #16
; ldp fp, lr, [sp], #16 ; ldp fp, lr, [sp], #16
; ret ; ret

View File

@@ -1,4 +1,4 @@
test interpret ; test interpret TODO: Not yet implemented
test run test run
target aarch64 target aarch64
target s390x target s390x
@@ -10,6 +10,8 @@ block0(v0: i8):
v1 = splat.i8x16 v0 v1 = splat.i8x16 v0
return v1 return v1
} }
; run: %splat_i8x16(-1) == [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]
; run: %splat_i8x16(0) == [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
; run: %splat_i8x16(1) == [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] ; run: %splat_i8x16(1) == [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
function %splat_i16x8(i16) -> i16x8 { function %splat_i16x8(i16) -> i16x8 {
@@ -17,6 +19,8 @@ block0(v0: i16):
v1 = splat.i16x8 v0 v1 = splat.i16x8 v0
return v1 return v1
} }
; run: %splat_i16x8(-1) == [-1 -1 -1 -1 -1 -1 -1 -1]
; run: %splat_i16x8(0) == [0 0 0 0 0 0 0 0]
; run: %splat_i16x8(512) == [512 512 512 512 512 512 512 512] ; run: %splat_i16x8(512) == [512 512 512 512 512 512 512 512]
function %splat_i32x4(i32) -> i32x4 { function %splat_i32x4(i32) -> i32x4 {
@@ -24,6 +28,8 @@ block0(v0: i32):
v1 = splat.i32x4 v0 v1 = splat.i32x4 v0
return v1 return v1
} }
; run: %splat_i32x4(-1) == [-1 -1 -1 -1]
; run: %splat_i32x4(0) == [0 0 0 0]
; run: %splat_i32x4(2000000) == [2000000 2000000 2000000 2000000] ; run: %splat_i32x4(2000000) == [2000000 2000000 2000000 2000000]
function %splat_i64x2(i64) -> i64x2 { function %splat_i64x2(i64) -> i64x2 {
@@ -31,4 +37,189 @@ block0(v0: i64):
v1 = splat.i64x2 v0 v1 = splat.i64x2 v0
return v1 return v1
} }
; run: %splat_i64x2(-1) == [-1 -1]
; run: %splat_i64x2(0) == [0 0]
; run: %splat_i64x2(5000000000) == [5000000000 5000000000] ; run: %splat_i64x2(5000000000) == [5000000000 5000000000]
function %splat_f32x4(f32) -> f32x4 {
block0(v0: f32):
v1 = splat.f32x4 v0
return v1
}
; run: %splat_f32x4(-0x0.0) == [-0x0.0 -0x0.0 -0x0.0 -0x0.0]
; run: %splat_f32x4(0x1.0) == [0x1.0 0x1.0 0x1.0 0x1.0]
; run: %splat_f32x4(NaN) == [NaN NaN NaN NaN]
function %splat_f64x2(f64) -> f64x2 {
block0(v0: f64):
v1 = splat.f64x2 v0
return v1
}
; run: %splat_f64x2(0x0.0) == [0x0.0 0x0.0]
; run: %splat_f64x2(0x2.0) == [0x2.0 0x2.0]
; run: %splat_f64x2(NaN) == [NaN NaN]
; TODO: Test combinations of `bconst` and `splat`, potentially with `breduce` in
; the middle
function %splat_i8x16_2(i8x16) -> i8x16 {
block0(v0: i8x16):
v1 = iconst.i8 116
v2 = splat.i8x16 v1
v3 = iadd v0, v2
return v3
}
; run: %splat_i8x16_2([-128 -101 -75 -59 -22 -12 -7 -1 0 3 17 34 68 92 111 127]) == [-12 15 41 57 94 104 109 115 116 119 -123 -106 -72 -48 -29 -13]
function %splat_i8x16_3(i8x16) -> i8x16 {
block0(v0: i8x16):
v1 = iconst.i16 116
v2 = ireduce.i8 v1
v3 = splat.i8x16 v2
v4 = iadd v0, v3
return v4
}
; run: %splat_i8x16_3([-128 -101 -75 -59 -22 -12 -7 -1 0 3 17 34 68 92 111 127]) == [-12 15 41 57 94 104 109 115 116 119 -123 -106 -72 -48 -29 -13]
function %splat_i16x8_2(i16x8) -> i16x8 {
block0(v0: i16x8):
v1 = iconst.i16 42
v2 = splat.i16x8 v1
v3 = iadd v0, v2
return v3
}
; run: %splat_i16x8_2([-32768 -1500 -1 0 42 200 8576 32767]) == [-32726 -1458 41 42 84 242 8618 -32727]
function %splat_i16x8_3(i16x8) -> i16x8 {
block0(v0: i16x8):
v1 = iconst.i64 42
v2 = ireduce.i16 v1
v3 = splat.i16x8 v2
v4 = iadd v0, v3
return v4
}
; run: %splat_i16x8_3([-32768 -1500 -1 0 42 200 8576 32767]) == [-32726 -1458 41 42 84 242 8618 -32727]
function %splat_i32x4_2(i32x4) -> i32x4 {
block0(v0: i32x4):
v1 = iconst.i32 1024
v2 = splat.i32x4 v1
v3 = iadd v0, v2
return v3
}
; run: %splat_i32x4_2([-2147483648 -1 0 2147483647]) == [-2147482624 1023 1024 -2147482625]
function %splat_i32x4_3(i32x4) -> i32x4 {
block0(v0: i32x4):
v1 = iconst.i64 1024
v2 = ireduce.i32 v1
v3 = splat.i32x4 v2
v4 = iadd v0, v3
return v4
}
; run: %splat_i32x4_3([-2147483648 -1 0 2147483647]) == [-2147482624 1023 1024 -2147482625]
function %splat_i64x2_2(i64x2) -> i64x2 {
block0(v0: i64x2):
v1 = iconst.i64 -1
v2 = splat.i64x2 v1
v3 = iadd v0, v2
return v3
}
; run: %splat_i64x2_2([-1 0]) == [-2 -1]
function %splat_f32x4_2(f32x4) -> f32x4 {
block0(v0: f32x4):
v1 = f32const 0x1.5
v2 = splat.f32x4 v1
v3 = fadd v0, v2
return v3
}
; run: %splat_f32x4_2([0x0.0 NaN 0x1.0 0x2.0]) == [0x1.5 NaN 0x2.5 0x3.5]
function %splat_f64x2_2(f64x2) -> f64x2 {
block0(v0: f64x2):
v1 = f64const 0x7.5
v2 = splat.f64x2 v1
v3 = fadd v0, v2
return v3
}
; run: %splat_f64x2_2([0x0.0 0x1.0]) == [0x7.5 0x8.5]
function %load_splat_i8x16(i8) -> i8x16 {
ss0 = explicit_slot 8
block0(v0: i8):
stack_store.i8 v0, ss0
v1 = stack_load.i8 ss0
v2 = splat.i8x16 v1
return v2
}
; run: %load_splat_i8x16(-1) == [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]
; run: %load_splat_i8x16(0) == [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
; run: %load_splat_i8x16(1) == [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
function %load_splat_i16x8(i16) -> i16x8 {
ss0 = explicit_slot 8
block0(v0: i16):
stack_store.i16 v0, ss0
v1 = stack_load.i16 ss0
v2 = splat.i16x8 v1
return v2
}
; run: %load_splat_i16x8(-1) == [-1 -1 -1 -1 -1 -1 -1 -1]
; run: %load_splat_i16x8(0) == [0 0 0 0 0 0 0 0]
; run: %load_splat_i16x8(512) == [512 512 512 512 512 512 512 512]
function %load_splat_i32x4(i32) -> i32x4 {
ss0 = explicit_slot 8
block0(v0: i32):
stack_store.i32 v0, ss0
v1 = stack_load.i32 ss0
v2 = splat.i32x4 v1
return v2
}
; run: %load_splat_i32x4(-1) == [-1 -1 -1 -1]
; run: %load_splat_i32x4(0) == [0 0 0 0]
; run: %load_splat_i32x4(2000000) == [2000000 2000000 2000000 2000000]
function %load_splat_i64x2(i64) -> i64x2 {
ss0 = explicit_slot 8
block0(v0: i64):
stack_store.i64 v0, ss0
v1 = stack_load.i64 ss0
v2 = splat.i64x2 v1
return v2
}
; run: %load_splat_i64x2(-1) == [-1 -1]
; run: %load_splat_i64x2(0) == [0 0]
; run: %load_splat_i64x2(5000000000) == [5000000000 5000000000]
function %load_splat_f32x4(f32) -> f32x4 {
ss0 = explicit_slot 8
block0(v0: f32):
stack_store.f32 v0, ss0
v1 = stack_load.f32 ss0
v2 = splat.f32x4 v1
return v2
}
; run: %load_splat_f32x4(-0x0.0) == [-0x0.0 -0x0.0 -0x0.0 -0x0.0]
; run: %load_splat_f32x4(0x1.0) == [0x1.0 0x1.0 0x1.0 0x1.0]
; run: %load_splat_f32x4(NaN) == [NaN NaN NaN NaN]
function %load_splat_f64x2(f64) -> f64x2 {
ss0 = explicit_slot 8
block0(v0: f64):
stack_store.f64 v0, ss0
v1 = stack_load.f64 ss0
v2 = splat.f64x2 v1
return v2
}
; run: %load_splat_f64x2(0x0.0) == [0x0.0 0x0.0]
; run: %load_splat_f64x2(0x2.0) == [0x2.0 0x2.0]
; run: %load_splat_f64x2(NaN) == [NaN NaN]