From 4a84f3f0732d41e8685d1005fc63b31acad44e1f Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Wed, 19 Aug 2020 18:07:12 +0200 Subject: [PATCH 1/2] Lower fcvt_from_{u,s}int for 8 and 16 bit ints --- .../codegen/src/isa/aarch64/lower_inst.rs | 44 ++++++++++++++----- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 5fe62da697..819071cc96 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -2157,12 +2157,12 @@ pub(crate) fn lower_insn_to_regs>( let out_bits = ty_bits(ctx.output_ty(insn, 0)); let signed = op == Opcode::FcvtToSint; let op = match (signed, in_bits, out_bits) { - (false, 32, 32) => FpuToIntOp::F32ToU32, - (true, 32, 32) => FpuToIntOp::F32ToI32, + (false, 32, 8) | (false, 32, 16) | (false, 32, 32) => FpuToIntOp::F32ToU32, + (true, 32, 8) | (true, 32, 16) | (true, 32, 32) => FpuToIntOp::F32ToI32, (false, 32, 64) => FpuToIntOp::F32ToU64, (true, 32, 64) => FpuToIntOp::F32ToI64, - (false, 64, 32) => FpuToIntOp::F64ToU32, - (true, 64, 32) => FpuToIntOp::F64ToI32, + (false, 64, 8) | (false, 64, 16) | (false, 64, 32) => FpuToIntOp::F64ToU32, + (true, 64, 8) | (true, 64, 16) | (true, 64, 32) => FpuToIntOp::F64ToI32, (false, 64, 64) => FpuToIntOp::F64ToU64, (true, 64, 64) => FpuToIntOp::F64ToI64, _ => panic!("Unknown input/output-bits combination"), @@ -2199,6 +2199,16 @@ pub(crate) fn lower_insn_to_regs>( if in_bits == 32 { // From float32. let (low_bound, low_cond, high_bound) = match (signed, out_bits) { + (true, 8) => ( + i8::min_value() as f32 - 1., + FloatCC::GreaterThan, + i8::max_value() as f32 + 1., + ), + (true, 16) => ( + i16::min_value() as f32 - 1., + FloatCC::GreaterThan, + i16::max_value() as f32 + 1., + ), (true, 32) => ( i32::min_value() as f32, // I32_MIN - 1 isn't precisely representable as a f32. FloatCC::GreaterThanOrEqual, @@ -2209,6 +2219,8 @@ pub(crate) fn lower_insn_to_regs>( FloatCC::GreaterThanOrEqual, i64::max_value() as f32 + 1., ), + (false, 8) => (-1., FloatCC::GreaterThan, u8::max_value() as f32 + 1.), + (false, 16) => (-1., FloatCC::GreaterThan, u16::max_value() as f32 + 1.), (false, 32) => (-1., FloatCC::GreaterThan, u32::max_value() as f32 + 1.), (false, 64) => (-1., FloatCC::GreaterThan, u64::max_value() as f32 + 1.), _ => panic!("Unknown input/output-bits combination"), @@ -2240,6 +2252,16 @@ pub(crate) fn lower_insn_to_regs>( } else { // From float64. let (low_bound, low_cond, high_bound) = match (signed, out_bits) { + (true, 8) => ( + i8::min_value() as f64 - 1., + FloatCC::GreaterThan, + i8::max_value() as f64 + 1., + ), + (true, 16) => ( + i16::min_value() as f64 - 1., + FloatCC::GreaterThan, + i16::max_value() as f64 + 1., + ), (true, 32) => ( i32::min_value() as f64 - 1., FloatCC::GreaterThan, @@ -2250,6 +2272,8 @@ pub(crate) fn lower_insn_to_regs>( FloatCC::GreaterThanOrEqual, i64::max_value() as f64 + 1., ), + (false, 8) => (-1., FloatCC::GreaterThan, u8::max_value() as f64 + 1.), + (false, 16) => (-1., FloatCC::GreaterThan, u16::max_value() as f64 + 1.), (false, 32) => (-1., FloatCC::GreaterThan, u32::max_value() as f64 + 1.), (false, 64) => (-1., FloatCC::GreaterThan, u64::max_value() as f64 + 1.), _ => panic!("Unknown input/output-bits combination"), @@ -2289,10 +2313,10 @@ pub(crate) fn lower_insn_to_regs>( let out_bits = ty_bits(ctx.output_ty(insn, 0)); let signed = op == Opcode::FcvtFromSint; let op = match (signed, in_bits, out_bits) { - (false, 32, 32) => IntToFpuOp::U32ToF32, - (true, 32, 32) => IntToFpuOp::I32ToF32, - (false, 32, 64) => IntToFpuOp::U32ToF64, - (true, 32, 64) => IntToFpuOp::I32ToF64, + (false, 8, 32) | (false, 16, 32) | (false, 32, 32) => IntToFpuOp::U32ToF32, + (true, 8, 32) | (true, 16, 32) | (true, 32, 32) => IntToFpuOp::I32ToF32, + (false, 8, 64) | (false, 16, 64) | (false, 32, 64) => IntToFpuOp::U32ToF64, + (true, 8, 64) | (true, 16, 64) | (true, 32, 64) => IntToFpuOp::I32ToF64, (false, 64, 32) => IntToFpuOp::U64ToF32, (true, 64, 32) => IntToFpuOp::I64ToF32, (false, 64, 64) => IntToFpuOp::U64ToF64, @@ -2300,8 +2324,8 @@ pub(crate) fn lower_insn_to_regs>( _ => panic!("Unknown input/output-bits combination"), }; let narrow_mode = match (signed, in_bits) { - (false, 32) => NarrowValueMode::ZeroExtend32, - (true, 32) => NarrowValueMode::SignExtend32, + (false, 8) | (false, 16) | (false, 32) => NarrowValueMode::ZeroExtend32, + (true, 8) | (true, 16) | (true, 32) => NarrowValueMode::SignExtend32, (false, 64) => NarrowValueMode::ZeroExtend64, (true, 64) => NarrowValueMode::SignExtend64, _ => panic!("Unknown input size"), From 3a16416132ac3ed215179841de664938636d9177 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Wed, 19 Aug 2020 19:17:27 +0200 Subject: [PATCH 2/2] Add tests --- .../filetests/vcode/aarch64/fcvt-small.clif | 134 ++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 cranelift/filetests/filetests/vcode/aarch64/fcvt-small.clif diff --git a/cranelift/filetests/filetests/vcode/aarch64/fcvt-small.clif b/cranelift/filetests/filetests/vcode/aarch64/fcvt-small.clif new file mode 100644 index 0000000000..518c80e17a --- /dev/null +++ b/cranelift/filetests/filetests/vcode/aarch64/fcvt-small.clif @@ -0,0 +1,134 @@ +test compile +target aarch64 + +function u0:0(i8) -> f32 { +block0(v0: i8): + ; check: stp fp, lr, [sp, #-16]! + ; check: mov fp, sp + v1 = fcvt_from_uint.f32 v0 + ; check: uxtb w0, w0 + ; check: ucvtf s0, w0 + return v1 + ; check: mov sp, fp + ; check: ldp fp, lr, [sp], #16 + ; check: ret +} + +function u0:0(i8) -> f64 { +block0(v0: i8): + ; check: stp fp, lr, [sp, #-16]! + ; check: mov fp, sp + v1 = fcvt_from_uint.f64 v0 + ; check: uxtb w0, w0 + ; check: ucvtf d0, w0 + return v1 + ; check: mov sp, fp + ; check: ldp fp, lr, [sp], #16 + ; check: ret +} + +function u0:0(i16) -> f32 { +block0(v0: i16): + ; check: stp fp, lr, [sp, #-16]! + ; check: mov fp, sp + v1 = fcvt_from_uint.f32 v0 + ; check: uxth w0, w0 + ; check: ucvtf s0, w0 + return v1 + ; check: mov sp, fp + ; check: ldp fp, lr, [sp], #16 + ; check: ret +} + +function u0:0(i16) -> f64 { +block0(v0: i16): + ; check: stp fp, lr, [sp, #-16]! + ; check: mov fp, sp + v1 = fcvt_from_uint.f64 v0 + ; check: uxth w0, w0 + ; check: ucvtf d0, w0 + return v1 + ; check: mov sp, fp + ; check: ldp fp, lr, [sp], #16 + ; check: ret +} + +function u0:0(f32) -> i8 { +block0(v0: f32): + ; check: stp fp, lr, [sp, #-16]! + ; check: mov fp, sp + v1 = fcvt_to_uint.i8 v0 + ; check: fcmp s0, s0 + ; check: b.vc 8 ; udf + ; check: ldr s1, pc+8 ; b 8 ; data.f32 -1 + ; check: fcmp s0, s1 + ; check: b.gt 8 ; udf + ; check: ldr s1, pc+8 ; b 8 ; data.f32 256 + ; check: fcmp s0, s1 + ; check: b.mi 8 ; udf + ; check: fcvtzu w0, s0 + return v1 + ; check: mov sp, fp + ; check: ldp fp, lr, [sp], #16 + ; check: ret +} + +function u0:0(f64) -> i8 { +block0(v0: f64): + ; check: stp fp, lr, [sp, #-16]! + ; check: mov fp, sp + v1 = fcvt_to_uint.i8 v0 + ; check: fcmp d0, d0 + ; check: b.vc 8 ; udf + ; check: ldr d1, pc+8 ; b 12 ; data.f64 -1 + ; check: fcmp d0, d1 + ; check: b.gt 8 ; udf + ; check: ldr d1, pc+8 ; b 12 ; data.f64 256 + ; check: fcmp d0, d1 + ; check: b.mi 8 ; udf + ; check: fcvtzu w0, d0 + return v1 + ; check: mov sp, fp + ; check: ldp fp, lr, [sp], #16 + ; check: ret +} + +function u0:0(f32) -> i16 { +block0(v0: f32): + ; check: stp fp, lr, [sp, #-16]! + ; check: mov fp, sp + v1 = fcvt_to_uint.i16 v0 + ; check: fcmp s0, s0 + ; check: b.vc 8 ; udf + ; check: ldr s1, pc+8 ; b 8 ; data.f32 -1 + ; check: fcmp s0, s1 + ; check: b.gt 8 ; udf + ; check: ldr s1, pc+8 ; b 8 ; data.f32 65536 + ; check: fcmp s0, s1 + ; check: b.mi 8 ; udf + ; check: fcvtzu w0, s0 + return v1 + ; check: mov sp, fp + ; check: ldp fp, lr, [sp], #16 + ; check: ret +} + +function u0:0(f64) -> i16 { +block0(v0: f64): + ; check: stp fp, lr, [sp, #-16]! + ; check: mov fp, sp + v1 = fcvt_to_uint.i16 v0 + ; check: fcmp d0, d0 + ; check: b.vc 8 ; udf + ; check: ldr d1, pc+8 ; b 12 ; data.f64 -1 + ; check: fcmp d0, d1 + ; check: b.gt 8 ; udf + ; check: ldr d1, pc+8 ; b 12 ; data.f64 65536 + ; check: fcmp d0, d1 + ; check: b.mi 8 ; udf + ; check: fcvtzu w0, d0 + return v1 + ; check: mov sp, fp + ; check: ldp fp, lr, [sp], #16 + ; check: ret +}