Convert scalar_to_vector to ISLE (AArch64) (#4401)
* Convert `scalar_to_vector` to ISLE (AArch64) Converted the exisiting implementation of `scalar_to_vector` for AArch64 to ISLE. Copyright (c) 2022 Arm Limited * Add support for floats and fix FpuExtend - Added rules to cover `f32 -> f32x4` and `f64 -> f64x2` for `scalar_to_vector` - Added tests for `scalar_to_vector` on floats. - Corrected an invalid instruction emitted by `FpuExtend` on 64-bit values. Copyright (c) 2022 Arm Limited
This commit is contained in:
@@ -1637,6 +1637,13 @@
|
|||||||
(_ Unit (emit (MInst.Extend dst rn signed from_bits to_bits))))
|
(_ Unit (emit (MInst.Extend dst rn signed from_bits to_bits))))
|
||||||
dst))
|
dst))
|
||||||
|
|
||||||
|
;; Helper for emitting `MInst.FpuExtend` instructions.
|
||||||
|
(decl fpu_extend (Reg ScalarSize) Reg)
|
||||||
|
(rule (fpu_extend src size)
|
||||||
|
(let ((dst WritableReg (temp_writable_reg $F32X4))
|
||||||
|
(_ Unit (emit (MInst.FpuExtend dst src size))))
|
||||||
|
dst))
|
||||||
|
|
||||||
;; Helper for emitting `MInst.LoadAcquire` instructions.
|
;; Helper for emitting `MInst.LoadAcquire` instructions.
|
||||||
(decl load_acquire (Type Reg) Reg)
|
(decl load_acquire (Type Reg) Reg)
|
||||||
(rule (load_acquire ty addr)
|
(rule (load_acquire ty addr)
|
||||||
|
|||||||
@@ -1688,7 +1688,7 @@ impl MachInstEmit for Inst {
|
|||||||
let rd = allocs.next_writable(rd);
|
let rd = allocs.next_writable(rd);
|
||||||
let rn = allocs.next(rn);
|
let rn = allocs.next(rn);
|
||||||
sink.put4(enc_fpurr(
|
sink.put4(enc_fpurr(
|
||||||
0b000_11110_00_1_000000_10000 | (size.ftype() << 13),
|
0b000_11110_00_1_000000_10000 | (size.ftype() << 12),
|
||||||
rd,
|
rd,
|
||||||
rn,
|
rn,
|
||||||
));
|
));
|
||||||
|
|||||||
@@ -5528,6 +5528,16 @@ fn test_aarch64_binemit() {
|
|||||||
"fmov s31, s0",
|
"fmov s31, s0",
|
||||||
));
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::FpuExtend {
|
||||||
|
rd: writable_vreg(31),
|
||||||
|
rn: vreg(0),
|
||||||
|
size: ScalarSize::Size64,
|
||||||
|
},
|
||||||
|
"1F40601E",
|
||||||
|
"fmov d31, d0",
|
||||||
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::FpuRR {
|
Inst::FpuRR {
|
||||||
fpu_op: FPUOp1::Abs,
|
fpu_op: FPUOp1::Abs,
|
||||||
|
|||||||
@@ -121,6 +121,20 @@
|
|||||||
(rule (lower (has_type $I128 (iconcat lo hi)))
|
(rule (lower (has_type $I128 (iconcat lo hi)))
|
||||||
(output (value_regs lo hi)))
|
(output (value_regs lo hi)))
|
||||||
|
|
||||||
|
;;;; Rules for `scalar_to_vector` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
(rule (lower (has_type $F32X4 (scalar_to_vector x)))
|
||||||
|
(fpu_extend x (ScalarSize.Size32)))
|
||||||
|
|
||||||
|
(rule (lower (has_type $F64X2 (scalar_to_vector x)))
|
||||||
|
(fpu_extend x (ScalarSize.Size64)))
|
||||||
|
|
||||||
|
(rule (lower (scalar_to_vector x @ (value_type (ty_int_bool_64 _))))
|
||||||
|
(mov_to_fpu x (ScalarSize.Size64)))
|
||||||
|
|
||||||
|
(rule (lower (scalar_to_vector x @ (value_type (int_bool_fits_in_32 _))))
|
||||||
|
(mov_to_fpu (put_in_reg_zext32 x) (ScalarSize.Size32)))
|
||||||
|
|
||||||
;;;; Rules for `iadd_pairwise` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;; Rules for `iadd_pairwise` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
(rule (lower (has_type $I16X8 (iadd_pairwise (swiden_low x) (swiden_high y))))
|
(rule (lower (has_type $I16X8 (iadd_pairwise (swiden_low x) (swiden_high y))))
|
||||||
|
|||||||
@@ -816,25 +816,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::ScalarToVector => {
|
Opcode::ScalarToVector => implemented_in_isle(ctx),
|
||||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
|
||||||
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
|
||||||
let input_ty = ctx.input_ty(insn, 0);
|
|
||||||
if (input_ty == I32 && ty.unwrap() == I32X4)
|
|
||||||
|| (input_ty == I64 && ty.unwrap() == I64X2)
|
|
||||||
{
|
|
||||||
ctx.emit(Inst::MovToFpu {
|
|
||||||
rd,
|
|
||||||
rn,
|
|
||||||
size: ScalarSize::from_ty(input_ty),
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
return Err(CodegenError::Unsupported(format!(
|
|
||||||
"ScalarToVector: unsupported types {:?} -> {:?}",
|
|
||||||
input_ty, ty
|
|
||||||
)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Opcode::VallTrue if ctx.input_ty(insn, 0).lane_bits() == 64 => {
|
Opcode::VallTrue if ctx.input_ty(insn, 0).lane_bits() == 64 => {
|
||||||
let input_ty = ctx.input_ty(insn, 0);
|
let input_ty = ctx.input_ty(insn, 0);
|
||||||
|
|||||||
@@ -299,6 +299,14 @@ macro_rules! isle_prelude_methods {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn int_bool_fits_in_32(&mut self, ty: Type) -> Option<Type> {
|
||||||
|
match ty {
|
||||||
|
I8 | I16 | I32 | B8 | B16 | B32 => Some(ty),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn ty_int_bool_64(&mut self, ty: Type) -> Option<Type> {
|
fn ty_int_bool_64(&mut self, ty: Type) -> Option<Type> {
|
||||||
match ty {
|
match ty {
|
||||||
|
|||||||
@@ -313,6 +313,10 @@
|
|||||||
(decl ty_8_or_16 (Type) Type)
|
(decl ty_8_or_16 (Type) Type)
|
||||||
(extern extractor ty_8_or_16 ty_8_or_16)
|
(extern extractor ty_8_or_16 ty_8_or_16)
|
||||||
|
|
||||||
|
;; An extractor that matches int and bool types that fit in 32 bits.
|
||||||
|
(decl int_bool_fits_in_32 (Type) Type)
|
||||||
|
(extern extractor int_bool_fits_in_32 int_bool_fits_in_32)
|
||||||
|
|
||||||
;; An extractor that matches I64 or B64.
|
;; An extractor that matches I64 or B64.
|
||||||
(decl ty_int_bool_64 (Type) Type)
|
(decl ty_int_bool_64 (Type) Type)
|
||||||
(extern extractor ty_int_bool_64 ty_int_bool_64)
|
(extern extractor ty_int_bool_64 ty_int_bool_64)
|
||||||
|
|||||||
@@ -10,9 +10,9 @@ block0:
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; movz x2, #1
|
; movz x1, #1
|
||||||
; movk x2, #1, LSL #48
|
; movk x1, #1, LSL #48
|
||||||
; fmov d0, x2
|
; fmov d0, x1
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
function %f2() -> i32x4 {
|
function %f2() -> i32x4 {
|
||||||
@@ -23,7 +23,31 @@ block0:
|
|||||||
}
|
}
|
||||||
|
|
||||||
; block0:
|
; block0:
|
||||||
; movz x2, #42679
|
; movz x1, #42679
|
||||||
; fmov s0, w2
|
; fmov s0, w1
|
||||||
|
; ret
|
||||||
|
|
||||||
|
function %f3() -> f32x4 {
|
||||||
|
block0:
|
||||||
|
v0 = f32const 0x1.0
|
||||||
|
v1 = scalar_to_vector.f32x4 v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; block0:
|
||||||
|
; fmov s1, #1
|
||||||
|
; fmov s0, s1
|
||||||
|
; ret
|
||||||
|
|
||||||
|
function %f4() -> f64x2 {
|
||||||
|
block0:
|
||||||
|
v0 = f64const 0x1.0
|
||||||
|
v1 = scalar_to_vector.f64x2 v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; block0:
|
||||||
|
; fmov d1, #1
|
||||||
|
; fmov d0, d1
|
||||||
; ret
|
; ret
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,19 @@
|
|||||||
|
test run
|
||||||
|
target aarch64
|
||||||
|
; i8 and i16 are invalid source sizes for x86_64
|
||||||
|
|
||||||
|
function %scalartovector_i8(i8) -> i8x16 {
|
||||||
|
block0(v0: i8):
|
||||||
|
v1 = scalar_to_vector.i8x16 v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
; run: %scalartovector_i8(1) == [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
|
||||||
|
; run: %scalartovector_i8(255) == [255 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
|
||||||
|
|
||||||
|
function %scalartovector_i16(i16) -> i16x8 {
|
||||||
|
block0(v0: i16):
|
||||||
|
v1 = scalar_to_vector.i16x8 v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
; run: %scalartovector_i16(1) == [1 0 0 0 0 0 0 0]
|
||||||
|
; run: %scalartovector_i16(65535) == [65535 0 0 0 0 0 0 0]
|
||||||
@@ -0,0 +1,42 @@
|
|||||||
|
test run
|
||||||
|
target aarch64
|
||||||
|
set enable_simd
|
||||||
|
target x86_64 has_sse3 has_ssse3 has_sse41
|
||||||
|
|
||||||
|
function %scalartovector_i32(i32) -> i32x4 {
|
||||||
|
block0(v0: i32):
|
||||||
|
v1 = scalar_to_vector.i32x4 v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
; run: %scalartovector_i32(1) == [1 0 0 0]
|
||||||
|
; run: %scalartovector_i32(4294967295) == [4294967295 0 0 0]
|
||||||
|
|
||||||
|
function %scalartovector_i64(i64) -> i64x2 {
|
||||||
|
block0(v0: i64):
|
||||||
|
v1 = scalar_to_vector.i64x2 v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
; run: %scalartovector_i64(1) == [1 0]
|
||||||
|
; run: %scalartovector_i64(18446744073709551615) == [18446744073709551615 0]
|
||||||
|
|
||||||
|
function %scalartovector_f32(f32) -> f32x4 {
|
||||||
|
block0(v0: f32):
|
||||||
|
v1 = scalar_to_vector.f32x4 v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
; run: %scalartovector_f32(0x1.0) == [0x1.0 0x0.0 0x0.0 0x0.0]
|
||||||
|
; run: %scalartovector_f32(0x0.1) == [0x0.1 0x0.0 0x0.0 0x0.0]
|
||||||
|
; run: %scalartovector_f32(NaN) == [NaN 0x0.0 0x0.0 0x0.0]
|
||||||
|
; run: %scalartovector_f32(-0x0.0) == [-0x0.0 0x0.0 0x0.0 0x0.0]
|
||||||
|
; run: %scalartovector_f32(0x0.0) == [0x0.0 0x0.0 0x0.0 0x0.0]
|
||||||
|
|
||||||
|
function %scalartovector_f64(f64) -> f64x2 {
|
||||||
|
block0(v0: f64):
|
||||||
|
v1 = scalar_to_vector.f64x2 v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
; run: %scalartovector_f64(0x1.0) == [0x1.0 0x0.0]
|
||||||
|
; run: %scalartovector_f64(0x0.1) == [0x0.1 0x0.0]
|
||||||
|
; run: %scalartovector_f64(NaN) == [NaN 0x0.0]
|
||||||
|
; run: %scalartovector_f64(-0x0.0) == [-0x0.0 0x0.0]
|
||||||
|
; run: %scalartovector_f64(0x0.0) == [0x0.0 0x0.0]
|
||||||
Reference in New Issue
Block a user