aarch64: implement correct float-to-int conversion semantics;

These are inherited from wasm semantics.
2020-04-22 19:10:22 +02:00
parent cb896e0be3
commit de92b7e014
3 changed files with 205 additions and 1 deletions
--- a/build.rs
+++ b/build.rs
@@ -242,7 +242,6 @@ fn should_panic(testsuite: &str, testname: &str) -> bool {
        | ("simd", _)
        | ("multi_value", "call")
        | ("spec_testsuite", "call")
        | ("spec_testsuite", "conversions")
        | ("spec_testsuite", "i32")
        | ("spec_testsuite", "i64")
        | ("spec_testsuite", "int_exprs")
--- a/cranelift/codegen/src/isa/aarch64/lower.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower.rs
@@ -2151,8 +2151,125 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
                (true, 64, 64) => FpuToIntOp::F64ToI64,
                _ => panic!("Unknown input/output-bits combination"),
            };
            let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
            let rd = output_to_reg(ctx, outputs[0]);
            // First, check the output: it's important to carry the NaN conversion before the
            // in-bounds conversion, per wasm semantics.
            // Check that the input is not a NaN.
            if in_bits == 32 {
                ctx.emit(Inst::FpuCmp32 { rn, rm: rn });
            } else {
                ctx.emit(Inst::FpuCmp64 { rn, rm: rn });
            }
            ctx.emit(Inst::CondBrLowered {
                target: BranchTarget::ResolvedOffset(8),
                kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::Ordered)),
            });
            let trap_info = (ctx.srcloc(insn), TrapCode::BadConversionToInteger);
            ctx.emit(Inst::Udf { trap_info });
            let tmp = ctx.tmp(RegClass::V128, I128);
            // Check that the input is in range, with "truncate towards zero" semantics. This means
            // we allow values that are slightly out of range:
            // - for signed conversions, we allow values strictly greater than INT_MIN-1 (when this
            // can be represented), and strictly less than INT_MAX+1 (when this can be
            // represented).
            // - for unsigned conversions, we allow values strictly greater than -1, and strictly
            // less than UINT_MAX+1 (when this can be represented).
            if in_bits == 32 {
                // From float32.
                let (low_bound, low_cond, high_bound) = match (signed, out_bits) {
                    (true, 32) => (
                        i32::min_value() as f32, // I32_MIN - 1 isn't precisely representable as a f32.
                        FloatCC::GreaterThanOrEqual,
                        i32::max_value() as f32 + 1.,
                    ),
                    (true, 64) => (
                        i64::min_value() as f32, // I64_MIN - 1 isn't precisely representable as a f32.
                        FloatCC::GreaterThanOrEqual,
                        i64::max_value() as f32 + 1.,
                    ),
                    (false, 32) => (-1., FloatCC::GreaterThan, u32::max_value() as f32 + 1.),
                    (false, 64) => (-1., FloatCC::GreaterThan, u64::max_value() as f32 + 1.),
                    _ => panic!("Unknown input/output-bits combination"),
                };
                // >= low_bound
                lower_constant_f32(ctx, tmp, low_bound);
                ctx.emit(Inst::FpuCmp32 {
                    rn,
                    rm: tmp.to_reg(),
                });
                ctx.emit(Inst::CondBrLowered {
                    target: BranchTarget::ResolvedOffset(8),
                    kind: CondBrKind::Cond(lower_fp_condcode(low_cond)),
                });
                let trap_info = (ctx.srcloc(insn), TrapCode::IntegerOverflow);
                ctx.emit(Inst::Udf { trap_info });
                // <= high_bound
                lower_constant_f32(ctx, tmp, high_bound);
                ctx.emit(Inst::FpuCmp32 {
                    rn,
                    rm: tmp.to_reg(),
                });
                ctx.emit(Inst::CondBrLowered {
                    target: BranchTarget::ResolvedOffset(8),
                    kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::LessThan)),
                });
                let trap_info = (ctx.srcloc(insn), TrapCode::IntegerOverflow);
                ctx.emit(Inst::Udf { trap_info });
            } else {
                // From float64.
                let (low_bound, low_cond, high_bound) = match (signed, out_bits) {
                    (true, 32) => (
                        i32::min_value() as f64 - 1.,
                        FloatCC::GreaterThan,
                        i32::max_value() as f64 + 1.,
                    ),
                    (true, 64) => (
                        i64::min_value() as f64, // I64_MIN - 1 is not precisely representable as an i64.
                        FloatCC::GreaterThanOrEqual,
                        i64::max_value() as f64 + 1.,
                    ),
                    (false, 32) => (-1., FloatCC::GreaterThan, u32::max_value() as f64 + 1.),
                    (false, 64) => (-1., FloatCC::GreaterThan, u64::max_value() as f64 + 1.),
                    _ => panic!("Unknown input/output-bits combination"),
                };
                // >= low_bound
                lower_constant_f64(ctx, tmp, low_bound);
                ctx.emit(Inst::FpuCmp64 {
                    rn,
                    rm: tmp.to_reg(),
                });
                ctx.emit(Inst::CondBrLowered {
                    target: BranchTarget::ResolvedOffset(8),
                    kind: CondBrKind::Cond(lower_fp_condcode(low_cond)),
                });
                let trap_info = (ctx.srcloc(insn), TrapCode::IntegerOverflow);
                ctx.emit(Inst::Udf { trap_info });
                // <= high_bound
                lower_constant_f64(ctx, tmp, high_bound);
                ctx.emit(Inst::FpuCmp64 {
                    rn,
                    rm: tmp.to_reg(),
                });
                ctx.emit(Inst::CondBrLowered {
                    target: BranchTarget::ResolvedOffset(8),
                    kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::LessThan)),
                });
                let trap_info = (ctx.srcloc(insn), TrapCode::IntegerOverflow);
                ctx.emit(Inst::Udf { trap_info });
            };
            // Do the conversion.
            ctx.emit(Inst::FpuToInt { op, rd, rn });
        }
--- a/cranelift/filetests/filetests/vcode/aarch64/floating-point.clif
+++ b/cranelift/filetests/filetests/vcode/aarch64/floating-point.clif
@@ -433,6 +433,17 @@ block0(v0: f32):
 ; check:  stp fp, lr, [sp, #-16]!
 ; nextln:  mov fp, sp
 ; nextln:  fcmp s0, s0
 ; nextln:  b.vc 8
 ; nextln:  udf
 ; nextln:  ldr s1, pc+8 ; b 8 ; data.f32 -1
 ; nextln:  fcmp s0, s1
 ; nextln:  b.gt 8
 ; nextln:  udf
 ; nextln:  ldr s1, pc+8 ; b 8 ; data.f32 4294967300
 ; nextln:  fcmp s0, s1
 ; nextln:  b.mi 8
 ; nextln:  udf
 ; nextln:  fcvtzu w0, s0
 ; nextln:  mov sp, fp
 ; nextln:  ldp fp, lr, [sp], #16
@@ -446,6 +457,17 @@ block0(v0: f32):
 ; check:  stp fp, lr, [sp, #-16]!
 ; nextln:  mov fp, sp
 ; nextln:  fcmp s0, s0
 ; nextln:  b.vc 8
 ; nextln:  udf
 ; nextln:  ldr s1, pc+8 ; b 8 ; data.f32 -2147483600
 ; nextln:  fcmp s0, s1
 ; nextln:  b.ge 8
 ; nextln:  udf
 ; nextln:  ldr s1, pc+8 ; b 8 ; data.f32 2147483600
 ; nextln:  fcmp s0, s1
 ; nextln:  b.mi
 ; nextln:  udf
 ; nextln:  fcvtzs w0, s0
 ; nextln:  mov sp, fp
 ; nextln:  ldp fp, lr, [sp], #16
@@ -459,6 +481,17 @@ block0(v0: f32):
 ; check:  stp fp, lr, [sp, #-16]!
 ; nextln:  mov fp, sp
 ; nextln:  fcmp s0, s0
 ; nextln:  b.vc 8
 ; nextln:  udf
 ; nextln:  ldr s1, pc+8 ; b 8 ; data.f32 -1
 ; nextln:  fcmp s0, s1
 ; nextln:  b.gt 8
 ; nextln:  udf
 ; nextln:  ldr s1, pc+8 ; b 8 ; data.f32 18446744000000000000
 ; nextln:  fcmp s0, s1
 ; nextln:  b.mi 8
 ; nextln:  udf
 ; nextln:  fcvtzu x0, s0
 ; nextln:  mov sp, fp
 ; nextln:  ldp fp, lr, [sp], #16
@@ -472,6 +505,17 @@ block0(v0: f32):
 ; check:  stp fp, lr, [sp, #-16]!
 ; nextln:  mov fp, sp
 ; nextln:  fcmp s0, s0
 ; nextln:  b.vc 8
 ; nextln:  udf
 ; nextln:  ldr s1, pc+8 ; b 8 ; data.f32 -9223372000000000000
 ; nextln:  fcmp s0, s1
 ; nextln:  b.ge 8
 ; nextln:  udf
 ; nextln:  ldr s1, pc+8 ; b 8 ; data.f32 9223372000000000000
 ; nextln:  fcmp s0, s1
 ; nextln:  b.mi 8
 ; nextln:  udf
 ; nextln:  fcvtzs x0, s0
 ; nextln:  mov sp, fp
 ; nextln:  ldp fp, lr, [sp], #16
@@ -485,6 +529,17 @@ block0(v0: f64):
 ; check:  stp fp, lr, [sp, #-16]!
 ; nextln:  mov fp, sp
 ; nextln:  fcmp d0, d0
 ; nextln:  b.vc 8
 ; nextln:  udf
 ; nextln:  ldr d1, pc+8 ; b 12 ; data.f64 -1
 ; nextln:  fcmp d0, d1
 ; nextln:  b.gt 8
 ; nextln:  udf
 ; nextln:  ldr d1, pc+8 ; b 12 ; data.f64 4294967296
 ; nextln:  fcmp d0, d1
 ; nextln:  b.mi 8
 ; nextln:  udf
 ; nextln:  fcvtzu w0, d0
 ; nextln:  mov sp, fp
 ; nextln:  ldp fp, lr, [sp], #16
@@ -498,6 +553,17 @@ block0(v0: f64):
 ; check:  stp fp, lr, [sp, #-16]!
 ; nextln:  mov fp, sp
 ; nextln:  fcmp d0, d0
 ; nextln:  b.vc 8
 ; nextln:  udf
 ; nextln:  ldr d1, pc+8 ; b 12 ; data.f64 -2147483649
 ; nextln:  fcmp d0, d1
 ; nextln:  b.gt 8
 ; nextln:  udf
 ; nextln:  ldr d1, pc+8 ; b 12 ; data.f64 2147483648
 ; nextln:  fcmp d0, d1
 ; nextln:  b.mi 8
 ; nextln:  udf
 ; nextln:  fcvtzs w0, d0
 ; nextln:  mov sp, fp
 ; nextln:  ldp fp, lr, [sp], #16
@@ -511,6 +577,17 @@ block0(v0: f64):
 ; check:  stp fp, lr, [sp, #-16]!
 ; nextln:  mov fp, sp
 ; nextln:  fcmp d0, d0
 ; nextln:  b.vc 8
 ; nextln:  udf
 ; nextln:  ldr d1, pc+8 ; b 12 ; data.f64 -1
 ; nextln:  fcmp d0, d1
 ; nextln:  b.gt 8
 ; nextln:  udf
 ; nextln:  ldr d1, pc+8 ; b 12 ; data.f64 18446744073709552000
 ; nextln:  fcmp d0, d1
 ; nextln:  b.mi 8
 ; nextln:  udf
 ; nextln:  fcvtzu x0, d0
 ; nextln:  mov sp, fp
 ; nextln:  ldp fp, lr, [sp], #16
@@ -524,6 +601,17 @@ block0(v0: f64):
 ; check:  stp fp, lr, [sp, #-16]!
 ; nextln:  mov fp, sp
 ; nextln:  fcmp d0, d0
 ; nextln:  b.vc 8
 ; nextln:  udf
 ; nextln:  ldr d1, pc+8 ; b 12 ; data.f64 -9223372036854776000
 ; nextln:  fcmp d0, d1
 ; nextln:  b.ge 8
 ; nextln:  udf
 ; nextln:  ldr d1, pc+8 ; b 12 ; data.f64 9223372036854776000
 ; nextln:  fcmp d0, d1
 ; nextln:  b.mi 8
 ; nextln:  udf
 ; nextln:  fcvtzs x0, d0
 ; nextln:  mov sp, fp
 ; nextln:  ldp fp, lr, [sp], #16