aarch64: implement correct float-to-int conversion semantics;

These are inherited from wasm semantics.
2020-04-22 19:10:22 +02:00
parent cb896e0be3
commit de92b7e014
3 changed files with 205 additions and 1 deletions
--- a/build.rs
+++ b/build.rs
@@ -242,7 +242,6 @@ fn should_panic(testsuite: &str, testname: &str) -> bool {
        | ("simd", _)
        | ("multi_value", "call")
        | ("spec_testsuite", "call")
-        | ("spec_testsuite", "conversions")
        | ("spec_testsuite", "i32")
        | ("spec_testsuite", "i64")
        | ("spec_testsuite", "int_exprs")
--- a/cranelift/codegen/src/isa/aarch64/lower.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower.rs
@@ -2151,8 +2151,125 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
                (true, 64, 64) => FpuToIntOp::F64ToI64,
                _ => panic!("Unknown input/output-bits combination"),
            };
+
            let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
            let rd = output_to_reg(ctx, outputs[0]);
+
+            // First, check the output: it's important to carry the NaN conversion before the
+            // in-bounds conversion, per wasm semantics.
+
+            // Check that the input is not a NaN.
+            if in_bits == 32 {
+                ctx.emit(Inst::FpuCmp32 { rn, rm: rn });
+            } else {
+                ctx.emit(Inst::FpuCmp64 { rn, rm: rn });
+            }
+            ctx.emit(Inst::CondBrLowered {
+                target: BranchTarget::ResolvedOffset(8),
+                kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::Ordered)),
+            });
+            let trap_info = (ctx.srcloc(insn), TrapCode::BadConversionToInteger);
+            ctx.emit(Inst::Udf { trap_info });
+
+            let tmp = ctx.tmp(RegClass::V128, I128);
+
+            // Check that the input is in range, with "truncate towards zero" semantics. This means
+            // we allow values that are slightly out of range:
+            // - for signed conversions, we allow values strictly greater than INT_MIN-1 (when this
+            // can be represented), and strictly less than INT_MAX+1 (when this can be
+            // represented).
+            // - for unsigned conversions, we allow values strictly greater than -1, and strictly
+            // less than UINT_MAX+1 (when this can be represented).
+
+            if in_bits == 32 {
+                // From float32.
+                let (low_bound, low_cond, high_bound) = match (signed, out_bits) {
+                    (true, 32) => (
+                        i32::min_value() as f32, // I32_MIN - 1 isn't precisely representable as a f32.
+                        FloatCC::GreaterThanOrEqual,
+                        i32::max_value() as f32 + 1.,
+                    ),
+                    (true, 64) => (
+                        i64::min_value() as f32, // I64_MIN - 1 isn't precisely representable as a f32.
+                        FloatCC::GreaterThanOrEqual,
+                        i64::max_value() as f32 + 1.,
+                    ),
+                    (false, 32) => (-1., FloatCC::GreaterThan, u32::max_value() as f32 + 1.),
+                    (false, 64) => (-1., FloatCC::GreaterThan, u64::max_value() as f32 + 1.),
+                    _ => panic!("Unknown input/output-bits combination"),
+                };
+
+                // >= low_bound
+                lower_constant_f32(ctx, tmp, low_bound);
+                ctx.emit(Inst::FpuCmp32 {
+                    rn,
+                    rm: tmp.to_reg(),
+                });
+                ctx.emit(Inst::CondBrLowered {
+                    target: BranchTarget::ResolvedOffset(8),
+                    kind: CondBrKind::Cond(lower_fp_condcode(low_cond)),
+                });
+                let trap_info = (ctx.srcloc(insn), TrapCode::IntegerOverflow);
+                ctx.emit(Inst::Udf { trap_info });
+
+                // <= high_bound
+                lower_constant_f32(ctx, tmp, high_bound);
+                ctx.emit(Inst::FpuCmp32 {
+                    rn,
+                    rm: tmp.to_reg(),
+                });
+                ctx.emit(Inst::CondBrLowered {
+                    target: BranchTarget::ResolvedOffset(8),
+                    kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::LessThan)),
+                });
+                let trap_info = (ctx.srcloc(insn), TrapCode::IntegerOverflow);
+                ctx.emit(Inst::Udf { trap_info });
+            } else {
+                // From float64.
+                let (low_bound, low_cond, high_bound) = match (signed, out_bits) {
+                    (true, 32) => (
+                        i32::min_value() as f64 - 1.,
+                        FloatCC::GreaterThan,
+                        i32::max_value() as f64 + 1.,
+                    ),
+                    (true, 64) => (
+                        i64::min_value() as f64, // I64_MIN - 1 is not precisely representable as an i64.
+                        FloatCC::GreaterThanOrEqual,
+                        i64::max_value() as f64 + 1.,
+                    ),
+                    (false, 32) => (-1., FloatCC::GreaterThan, u32::max_value() as f64 + 1.),
+                    (false, 64) => (-1., FloatCC::GreaterThan, u64::max_value() as f64 + 1.),
+                    _ => panic!("Unknown input/output-bits combination"),
+                };
+
+                // >= low_bound
+                lower_constant_f64(ctx, tmp, low_bound);
+                ctx.emit(Inst::FpuCmp64 {
+                    rn,
+                    rm: tmp.to_reg(),
+                });
+                ctx.emit(Inst::CondBrLowered {
+                    target: BranchTarget::ResolvedOffset(8),
+                    kind: CondBrKind::Cond(lower_fp_condcode(low_cond)),
+                });
+                let trap_info = (ctx.srcloc(insn), TrapCode::IntegerOverflow);
+                ctx.emit(Inst::Udf { trap_info });
+
+                // <= high_bound
+                lower_constant_f64(ctx, tmp, high_bound);
+                ctx.emit(Inst::FpuCmp64 {
+                    rn,
+                    rm: tmp.to_reg(),
+                });
+                ctx.emit(Inst::CondBrLowered {
+                    target: BranchTarget::ResolvedOffset(8),
+                    kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::LessThan)),
+                });
+                let trap_info = (ctx.srcloc(insn), TrapCode::IntegerOverflow);
+                ctx.emit(Inst::Udf { trap_info });
+            };
+
+            // Do the conversion.
            ctx.emit(Inst::FpuToInt { op, rd, rn });
        }

--- a/cranelift/filetests/filetests/vcode/aarch64/floating-point.clif
+++ b/cranelift/filetests/filetests/vcode/aarch64/floating-point.clif
@@ -433,6 +433,17 @@ block0(v0: f32):

 ; check:  stp fp, lr, [sp, #-16]!
 ; nextln:  mov fp, sp
+; nextln:  fcmp s0, s0
+; nextln:  b.vc 8
+; nextln:  udf
+; nextln:  ldr s1, pc+8 ; b 8 ; data.f32 -1
+; nextln:  fcmp s0, s1
+; nextln:  b.gt 8
+; nextln:  udf
+; nextln:  ldr s1, pc+8 ; b 8 ; data.f32 4294967300
+; nextln:  fcmp s0, s1
+; nextln:  b.mi 8
+; nextln:  udf
 ; nextln:  fcvtzu w0, s0
 ; nextln:  mov sp, fp
 ; nextln:  ldp fp, lr, [sp], #16
@@ -446,6 +457,17 @@ block0(v0: f32):

 ; check:  stp fp, lr, [sp, #-16]!
 ; nextln:  mov fp, sp
+; nextln:  fcmp s0, s0
+; nextln:  b.vc 8
+; nextln:  udf
+; nextln:  ldr s1, pc+8 ; b 8 ; data.f32 -2147483600
+; nextln:  fcmp s0, s1
+; nextln:  b.ge 8
+; nextln:  udf
+; nextln:  ldr s1, pc+8 ; b 8 ; data.f32 2147483600
+; nextln:  fcmp s0, s1
+; nextln:  b.mi
+; nextln:  udf
 ; nextln:  fcvtzs w0, s0
 ; nextln:  mov sp, fp
 ; nextln:  ldp fp, lr, [sp], #16
@@ -459,6 +481,17 @@ block0(v0: f32):

 ; check:  stp fp, lr, [sp, #-16]!
 ; nextln:  mov fp, sp
+; nextln:  fcmp s0, s0
+; nextln:  b.vc 8
+; nextln:  udf
+; nextln:  ldr s1, pc+8 ; b 8 ; data.f32 -1
+; nextln:  fcmp s0, s1
+; nextln:  b.gt 8
+; nextln:  udf
+; nextln:  ldr s1, pc+8 ; b 8 ; data.f32 18446744000000000000
+; nextln:  fcmp s0, s1
+; nextln:  b.mi 8
+; nextln:  udf
 ; nextln:  fcvtzu x0, s0
 ; nextln:  mov sp, fp
 ; nextln:  ldp fp, lr, [sp], #16
@@ -472,6 +505,17 @@ block0(v0: f32):

 ; check:  stp fp, lr, [sp, #-16]!
 ; nextln:  mov fp, sp
+; nextln:  fcmp s0, s0
+; nextln:  b.vc 8
+; nextln:  udf
+; nextln:  ldr s1, pc+8 ; b 8 ; data.f32 -9223372000000000000
+; nextln:  fcmp s0, s1
+; nextln:  b.ge 8
+; nextln:  udf
+; nextln:  ldr s1, pc+8 ; b 8 ; data.f32 9223372000000000000
+; nextln:  fcmp s0, s1
+; nextln:  b.mi 8
+; nextln:  udf
 ; nextln:  fcvtzs x0, s0
 ; nextln:  mov sp, fp
 ; nextln:  ldp fp, lr, [sp], #16
@@ -485,6 +529,17 @@ block0(v0: f64):

 ; check:  stp fp, lr, [sp, #-16]!
 ; nextln:  mov fp, sp
+; nextln:  fcmp d0, d0
+; nextln:  b.vc 8
+; nextln:  udf
+; nextln:  ldr d1, pc+8 ; b 12 ; data.f64 -1
+; nextln:  fcmp d0, d1
+; nextln:  b.gt 8
+; nextln:  udf
+; nextln:  ldr d1, pc+8 ; b 12 ; data.f64 4294967296
+; nextln:  fcmp d0, d1
+; nextln:  b.mi 8
+; nextln:  udf
 ; nextln:  fcvtzu w0, d0
 ; nextln:  mov sp, fp
 ; nextln:  ldp fp, lr, [sp], #16
@@ -498,6 +553,17 @@ block0(v0: f64):

 ; check:  stp fp, lr, [sp, #-16]!
 ; nextln:  mov fp, sp
+; nextln:  fcmp d0, d0
+; nextln:  b.vc 8
+; nextln:  udf
+; nextln:  ldr d1, pc+8 ; b 12 ; data.f64 -2147483649
+; nextln:  fcmp d0, d1
+; nextln:  b.gt 8
+; nextln:  udf
+; nextln:  ldr d1, pc+8 ; b 12 ; data.f64 2147483648
+; nextln:  fcmp d0, d1
+; nextln:  b.mi 8
+; nextln:  udf
 ; nextln:  fcvtzs w0, d0
 ; nextln:  mov sp, fp
 ; nextln:  ldp fp, lr, [sp], #16
@@ -511,6 +577,17 @@ block0(v0: f64):

 ; check:  stp fp, lr, [sp, #-16]!
 ; nextln:  mov fp, sp
+; nextln:  fcmp d0, d0
+; nextln:  b.vc 8
+; nextln:  udf
+; nextln:  ldr d1, pc+8 ; b 12 ; data.f64 -1
+; nextln:  fcmp d0, d1
+; nextln:  b.gt 8
+; nextln:  udf
+; nextln:  ldr d1, pc+8 ; b 12 ; data.f64 18446744073709552000
+; nextln:  fcmp d0, d1
+; nextln:  b.mi 8
+; nextln:  udf
 ; nextln:  fcvtzu x0, d0
 ; nextln:  mov sp, fp
 ; nextln:  ldp fp, lr, [sp], #16
@@ -524,6 +601,17 @@ block0(v0: f64):

 ; check:  stp fp, lr, [sp, #-16]!
 ; nextln:  mov fp, sp
+; nextln:  fcmp d0, d0
+; nextln:  b.vc 8
+; nextln:  udf
+; nextln:  ldr d1, pc+8 ; b 12 ; data.f64 -9223372036854776000
+; nextln:  fcmp d0, d1
+; nextln:  b.ge 8
+; nextln:  udf
+; nextln:  ldr d1, pc+8 ; b 12 ; data.f64 9223372036854776000
+; nextln:  fcmp d0, d1
+; nextln:  b.mi 8
+; nextln:  udf
 ; nextln:  fcvtzs x0, d0
 ; nextln:  mov sp, fp
 ; nextln:  ldp fp, lr, [sp], #16