From db7f9ccd2b1d606e608e2cbc989e5856736f1c9c Mon Sep 17 00:00:00 2001 From: Damian Heaton <87125748+dheaton-arm@users.noreply.github.com> Date: Mon, 18 Jul 2022 19:11:54 +0100 Subject: [PATCH] Convert `scalar_to_vector` to ISLE (AArch64) (#4401) * Convert `scalar_to_vector` to ISLE (AArch64) Converted the exisiting implementation of `scalar_to_vector` for AArch64 to ISLE. Copyright (c) 2022 Arm Limited * Add support for floats and fix FpuExtend - Added rules to cover `f32 -> f32x4` and `f64 -> f64x2` for `scalar_to_vector` - Added tests for `scalar_to_vector` on floats. - Corrected an invalid instruction emitted by `FpuExtend` on 64-bit values. Copyright (c) 2022 Arm Limited --- cranelift/codegen/src/isa/aarch64/inst.isle | 7 ++++ .../codegen/src/isa/aarch64/inst/emit.rs | 2 +- .../src/isa/aarch64/inst/emit_tests.rs | 10 +++++ cranelift/codegen/src/isa/aarch64/lower.isle | 14 +++++++ .../codegen/src/isa/aarch64/lower_inst.rs | 20 +-------- cranelift/codegen/src/machinst/isle.rs | 8 ++++ cranelift/codegen/src/prelude.isle | 4 ++ .../filetests/isa/aarch64/simd_load_zero.clif | 34 ++++++++++++--- .../runtests/simd-scalartovector-aarch64.clif | 19 +++++++++ .../runtests/simd-scalartovector.clif | 42 +++++++++++++++++++ 10 files changed, 135 insertions(+), 25 deletions(-) create mode 100644 cranelift/filetests/filetests/runtests/simd-scalartovector-aarch64.clif create mode 100644 cranelift/filetests/filetests/runtests/simd-scalartovector.clif diff --git a/cranelift/codegen/src/isa/aarch64/inst.isle b/cranelift/codegen/src/isa/aarch64/inst.isle index 6397ff3c1c..9e211a4c7b 100644 --- a/cranelift/codegen/src/isa/aarch64/inst.isle +++ b/cranelift/codegen/src/isa/aarch64/inst.isle @@ -1637,6 +1637,13 @@ (_ Unit (emit (MInst.Extend dst rn signed from_bits to_bits)))) dst)) +;; Helper for emitting `MInst.FpuExtend` instructions. +(decl fpu_extend (Reg ScalarSize) Reg) +(rule (fpu_extend src size) + (let ((dst WritableReg (temp_writable_reg $F32X4)) + (_ Unit (emit (MInst.FpuExtend dst src size)))) + dst)) + ;; Helper for emitting `MInst.LoadAcquire` instructions. (decl load_acquire (Type Reg) Reg) (rule (load_acquire ty addr) diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index 7ff0a2f2a2..bfbd121b2d 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -1688,7 +1688,7 @@ impl MachInstEmit for Inst { let rd = allocs.next_writable(rd); let rn = allocs.next(rn); sink.put4(enc_fpurr( - 0b000_11110_00_1_000000_10000 | (size.ftype() << 13), + 0b000_11110_00_1_000000_10000 | (size.ftype() << 12), rd, rn, )); diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index c1a124ec80..66d1d8a776 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -5528,6 +5528,16 @@ fn test_aarch64_binemit() { "fmov s31, s0", )); + insns.push(( + Inst::FpuExtend { + rd: writable_vreg(31), + rn: vreg(0), + size: ScalarSize::Size64, + }, + "1F40601E", + "fmov d31, d0", + )); + insns.push(( Inst::FpuRR { fpu_op: FPUOp1::Abs, diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle index 6d5cc87f92..f215654b1d 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.isle +++ b/cranelift/codegen/src/isa/aarch64/lower.isle @@ -121,6 +121,20 @@ (rule (lower (has_type $I128 (iconcat lo hi))) (output (value_regs lo hi))) +;;;; Rules for `scalar_to_vector` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type $F32X4 (scalar_to_vector x))) + (fpu_extend x (ScalarSize.Size32))) + +(rule (lower (has_type $F64X2 (scalar_to_vector x))) + (fpu_extend x (ScalarSize.Size64))) + +(rule (lower (scalar_to_vector x @ (value_type (ty_int_bool_64 _)))) + (mov_to_fpu x (ScalarSize.Size64))) + +(rule (lower (scalar_to_vector x @ (value_type (int_bool_fits_in_32 _)))) + (mov_to_fpu (put_in_reg_zext32 x) (ScalarSize.Size32))) + ;;;; Rules for `iadd_pairwise` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $I16X8 (iadd_pairwise (swiden_low x) (swiden_high y)))) diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index b407ef3dd9..50f01e9f23 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -816,25 +816,7 @@ pub(crate) fn lower_insn_to_regs>( } } - Opcode::ScalarToVector => { - let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); - let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - let input_ty = ctx.input_ty(insn, 0); - if (input_ty == I32 && ty.unwrap() == I32X4) - || (input_ty == I64 && ty.unwrap() == I64X2) - { - ctx.emit(Inst::MovToFpu { - rd, - rn, - size: ScalarSize::from_ty(input_ty), - }); - } else { - return Err(CodegenError::Unsupported(format!( - "ScalarToVector: unsupported types {:?} -> {:?}", - input_ty, ty - ))); - } - } + Opcode::ScalarToVector => implemented_in_isle(ctx), Opcode::VallTrue if ctx.input_ty(insn, 0).lane_bits() == 64 => { let input_ty = ctx.input_ty(insn, 0); diff --git a/cranelift/codegen/src/machinst/isle.rs b/cranelift/codegen/src/machinst/isle.rs index 28005863df..c941ead143 100644 --- a/cranelift/codegen/src/machinst/isle.rs +++ b/cranelift/codegen/src/machinst/isle.rs @@ -299,6 +299,14 @@ macro_rules! isle_prelude_methods { } } + #[inline] + fn int_bool_fits_in_32(&mut self, ty: Type) -> Option { + match ty { + I8 | I16 | I32 | B8 | B16 | B32 => Some(ty), + _ => None, + } + } + #[inline] fn ty_int_bool_64(&mut self, ty: Type) -> Option { match ty { diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index ccaef32341..62933cd7a1 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -313,6 +313,10 @@ (decl ty_8_or_16 (Type) Type) (extern extractor ty_8_or_16 ty_8_or_16) +;; An extractor that matches int and bool types that fit in 32 bits. +(decl int_bool_fits_in_32 (Type) Type) +(extern extractor int_bool_fits_in_32 int_bool_fits_in_32) + ;; An extractor that matches I64 or B64. (decl ty_int_bool_64 (Type) Type) (extern extractor ty_int_bool_64 ty_int_bool_64) diff --git a/cranelift/filetests/filetests/isa/aarch64/simd_load_zero.clif b/cranelift/filetests/filetests/isa/aarch64/simd_load_zero.clif index 894ed03775..70ceecd6db 100644 --- a/cranelift/filetests/filetests/isa/aarch64/simd_load_zero.clif +++ b/cranelift/filetests/filetests/isa/aarch64/simd_load_zero.clif @@ -10,9 +10,9 @@ block0: } ; block0: -; movz x2, #1 -; movk x2, #1, LSL #48 -; fmov d0, x2 +; movz x1, #1 +; movk x1, #1, LSL #48 +; fmov d0, x1 ; ret function %f2() -> i32x4 { @@ -23,7 +23,31 @@ block0: } ; block0: -; movz x2, #42679 -; fmov s0, w2 +; movz x1, #42679 +; fmov s0, w1 +; ret + +function %f3() -> f32x4 { +block0: + v0 = f32const 0x1.0 + v1 = scalar_to_vector.f32x4 v0 + return v1 +} + +; block0: +; fmov s1, #1 +; fmov s0, s1 +; ret + +function %f4() -> f64x2 { +block0: + v0 = f64const 0x1.0 + v1 = scalar_to_vector.f64x2 v0 + return v1 +} + +; block0: +; fmov d1, #1 +; fmov d0, d1 ; ret diff --git a/cranelift/filetests/filetests/runtests/simd-scalartovector-aarch64.clif b/cranelift/filetests/filetests/runtests/simd-scalartovector-aarch64.clif new file mode 100644 index 0000000000..5a458f7de6 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-scalartovector-aarch64.clif @@ -0,0 +1,19 @@ +test run +target aarch64 +; i8 and i16 are invalid source sizes for x86_64 + +function %scalartovector_i8(i8) -> i8x16 { +block0(v0: i8): + v1 = scalar_to_vector.i8x16 v0 + return v1 +} +; run: %scalartovector_i8(1) == [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] +; run: %scalartovector_i8(255) == [255 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] + +function %scalartovector_i16(i16) -> i16x8 { +block0(v0: i16): + v1 = scalar_to_vector.i16x8 v0 + return v1 +} +; run: %scalartovector_i16(1) == [1 0 0 0 0 0 0 0] +; run: %scalartovector_i16(65535) == [65535 0 0 0 0 0 0 0] \ No newline at end of file diff --git a/cranelift/filetests/filetests/runtests/simd-scalartovector.clif b/cranelift/filetests/filetests/runtests/simd-scalartovector.clif new file mode 100644 index 0000000000..37d726f8bf --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-scalartovector.clif @@ -0,0 +1,42 @@ +test run +target aarch64 +set enable_simd +target x86_64 has_sse3 has_ssse3 has_sse41 + +function %scalartovector_i32(i32) -> i32x4 { +block0(v0: i32): + v1 = scalar_to_vector.i32x4 v0 + return v1 +} +; run: %scalartovector_i32(1) == [1 0 0 0] +; run: %scalartovector_i32(4294967295) == [4294967295 0 0 0] + +function %scalartovector_i64(i64) -> i64x2 { +block0(v0: i64): + v1 = scalar_to_vector.i64x2 v0 + return v1 +} +; run: %scalartovector_i64(1) == [1 0] +; run: %scalartovector_i64(18446744073709551615) == [18446744073709551615 0] + +function %scalartovector_f32(f32) -> f32x4 { +block0(v0: f32): + v1 = scalar_to_vector.f32x4 v0 + return v1 +} +; run: %scalartovector_f32(0x1.0) == [0x1.0 0x0.0 0x0.0 0x0.0] +; run: %scalartovector_f32(0x0.1) == [0x0.1 0x0.0 0x0.0 0x0.0] +; run: %scalartovector_f32(NaN) == [NaN 0x0.0 0x0.0 0x0.0] +; run: %scalartovector_f32(-0x0.0) == [-0x0.0 0x0.0 0x0.0 0x0.0] +; run: %scalartovector_f32(0x0.0) == [0x0.0 0x0.0 0x0.0 0x0.0] + +function %scalartovector_f64(f64) -> f64x2 { +block0(v0: f64): + v1 = scalar_to_vector.f64x2 v0 + return v1 +} +; run: %scalartovector_f64(0x1.0) == [0x1.0 0x0.0] +; run: %scalartovector_f64(0x0.1) == [0x0.1 0x0.0] +; run: %scalartovector_f64(NaN) == [NaN 0x0.0] +; run: %scalartovector_f64(-0x0.0) == [-0x0.0 0x0.0] +; run: %scalartovector_f64(0x0.0) == [0x0.0 0x0.0]