Implement wasm saturating conversions;

2018-07-27 19:11:39 +02:00
parent 9dbfbbde10
commit f7e481d9ac
5 changed files with 305 additions and 31 deletions
--- a/cranelift/filetests/wasm/conversions.clif
+++ b/cranelift/filetests/wasm/conversions.clif
@@ -69,6 +69,54 @@ ebb0(v0: f64):
    return v1
 }

+function %i32_trunc_s_sat_f32(f32) -> i32 {
+ebb0(v0: f32):
+    v1 = fcvt_to_sint_sat.i32 v0
+    return v1
+}
+
+function %i32_trunc_u_sat_f32(f32) -> i32 {
+ebb0(v0: f32):
+    v1 = fcvt_to_uint_sat.i32 v0
+    return v1
+}
+
+function %i32_trunc_s_sat_f64(f64) -> i32 {
+ebb0(v0: f64):
+    v1 = fcvt_to_sint_sat.i32 v0
+    return v1
+}
+
+function %i32_trunc_u_sat_f64(f64) -> i32 {
+ebb0(v0: f64):
+    v1 = fcvt_to_uint_sat.i32 v0
+    return v1
+}
+
+function %i64_trunc_s_sat_f32(f32) -> i64 {
+ebb0(v0: f32):
+    v1 = fcvt_to_sint_sat.i64 v0
+    return v1
+}
+
+function %i64_trunc_u_sat_f32(f32) -> i64 {
+ebb0(v0: f32):
+    v1 = fcvt_to_uint_sat.i64 v0
+    return v1
+}
+
+function %i64_trunc_s_sat_f64(f64) -> i64 {
+ebb0(v0: f64):
+    v1 = fcvt_to_sint_sat.i64 v0
+    return v1
+}
+
+function %i64_trunc_u_sat_f64(f64) -> i64 {
+ebb0(v0: f64):
+    v1 = fcvt_to_uint_sat.i64 v0
+    return v1
+}
+
 function %f32_trunc_f64(f64) -> f32 {
 ebb0(v0: f64):
    v1 = fdemote.f32 v0
--- a/lib/codegen/meta-python/base/instructions.py
+++ b/lib/codegen/meta-python/base/instructions.py
@@ -1879,6 +1879,14 @@ fcvt_to_uint = Instruction(
        """,
        ins=x, outs=a, can_trap=True)

+fcvt_to_uint_sat = Instruction(
+        'fcvt_to_uint_sat', r"""
+        Convert floating point to unsigned integer as fcvt_to_uint does, but
+        saturates the input instead of trapping. NaN and negative values are
+        converted to 0.
+        """,
+        ins=x, outs=a)
+
 fcvt_to_sint = Instruction(
        'fcvt_to_sint', r"""
        Convert floating point to signed integer.
@@ -1891,6 +1899,13 @@ fcvt_to_sint = Instruction(
        """,
        ins=x, outs=a, can_trap=True)

+fcvt_to_sint_sat = Instruction(
+        'fcvt_to_sint_sat', r"""
+        Convert floating point to signed integer as fcvt_to_sint does, but
+        saturates the input instead of trapping. NaN values are converted to 0.
+        """,
+        ins=x, outs=a)
+
 x = Operand('x', Int)
 a = Operand('a', FloatTo)

--- a/lib/codegen/meta-python/isa/x86/legalize.py
+++ b/lib/codegen/meta-python/isa/x86/legalize.py
@@ -94,9 +94,11 @@ x86_expand.custom_legalize(insts.fmax, 'expand_minmax')

 # Conversions from unsigned need special handling.
 x86_expand.custom_legalize(insts.fcvt_from_uint, 'expand_fcvt_from_uint')
-# Conversions from float to int can trap.
+# Conversions from float to int can trap and modify the control flow graph.
 x86_expand.custom_legalize(insts.fcvt_to_sint, 'expand_fcvt_to_sint')
 x86_expand.custom_legalize(insts.fcvt_to_uint, 'expand_fcvt_to_uint')
+x86_expand.custom_legalize(insts.fcvt_to_sint_sat, 'expand_fcvt_to_sint_sat')
+x86_expand.custom_legalize(insts.fcvt_to_uint_sat, 'expand_fcvt_to_uint_sat')

 # Count leading and trailing zeroes, for baseline x86_64
 c_minus_one = Var('c_minus_one')
--- a/lib/codegen/src/isa/x86/enc_tables.rs
+++ b/lib/codegen/src/isa/x86/enc_tables.rs
@@ -322,14 +322,13 @@ fn expand_fcvt_to_sint(
    use ir::condcodes::{FloatCC, IntCC};
    use ir::immediates::{Ieee32, Ieee64};

-    let x;
-    match func.dfg[inst] {
+    let x = match func.dfg[inst] {
        ir::InstructionData::Unary {
            opcode: ir::Opcode::FcvtToSint,
            arg,
-        } => x = arg,
+        } => arg,
        _ => panic!("Need fcvt_to_sint: {}", func.dfg.display_inst(inst, None)),
-    }
+    };
    let old_ebb = func.layout.pp_ebb(inst);
    let xty = func.dfg.value_type(x);
    let result = func.dfg.first_result(inst);
@@ -367,15 +366,16 @@ fn expand_fcvt_to_sint(
    let mut overflow_cc = FloatCC::LessThan;
    let output_bits = ty.lane_bits();
    let flimit = match xty {
-        // An f32 can represent `i16::min_value() - 1` exactly with precision to spare, so
-        // there are values less than -2^(N-1) that convert correctly to INT_MIN.
-        ir::types::F32 => pos.ins().f32const(if output_bits < 32 {
-            overflow_cc = FloatCC::LessThanOrEqual;
-            Ieee32::fcvt_to_sint_negative_overflow(output_bits)
-        } else {
-            Ieee32::pow2(output_bits - 1).neg()
-        }),
-        ir::types::F64 => {
+        ir::types::F32 =>
+            // An f32 can represent `i16::min_value() - 1` exactly with precision to spare, so
+            // there are values less than -2^(N-1) that convert correctly to INT_MIN.
+            pos.ins().f32const(if output_bits < 32 {
+                overflow_cc = FloatCC::LessThanOrEqual;
+                Ieee32::fcvt_to_sint_negative_overflow(output_bits)
+            } else {
+                Ieee32::pow2(output_bits - 1).neg()
+            }),
+        ir::types::F64 =>
            // An f64 can represent `i32::min_value() - 1` exactly with precision to spare, so
            // there are values less than -2^(N-1) that convert correctly to INT_MIN.
            pos.ins().f64const(if output_bits < 64 {
@@ -383,8 +383,7 @@ fn expand_fcvt_to_sint(
                Ieee64::fcvt_to_sint_negative_overflow(output_bits)
            } else {
                Ieee64::pow2(output_bits - 1).neg()
-            })
-        }
+            }),
        _ => panic!("Can't convert {}", xty),
    };
    let overflow = pos.ins().fcmp(overflow_cc, x, flimit);
@@ -406,6 +405,122 @@ fn expand_fcvt_to_sint(
    cfg.recompute_ebb(pos.func, done);
 }

+fn expand_fcvt_to_sint_sat(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    cfg: &mut ControlFlowGraph,
+    _isa: &isa::TargetIsa,
+) {
+    use ir::condcodes::{FloatCC, IntCC};
+    use ir::immediates::{Ieee32, Ieee64};
+
+    let x = match func.dfg[inst] {
+        ir::InstructionData::Unary {
+            opcode: ir::Opcode::FcvtToSintSat,
+            arg,
+        } => arg,
+        _ => panic!(
+            "Need fcvt_to_sint_sat: {}",
+            func.dfg.display_inst(inst, None)
+        ),
+    };
+
+    let old_ebb = func.layout.pp_ebb(inst);
+    let xty = func.dfg.value_type(x);
+    let result = func.dfg.first_result(inst);
+    let ty = func.dfg.value_type(result);
+
+    // Final EBB after the bad value checks.
+    let done_ebb = func.dfg.make_ebb();
+    func.dfg.clear_results(inst);
+    func.dfg.attach_ebb_param(done_ebb, result);
+
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+
+    // The `x86_cvtt2si` performs the desired conversion, but it doesn't trap on NaN or
+    // overflow. It produces an INT_MIN result instead.
+    let cvtt2si = pos.ins().x86_cvtt2si(ty, x);
+
+    let is_done = pos
+        .ins()
+        .icmp_imm(IntCC::NotEqual, cvtt2si, 1 << (ty.lane_bits() - 1));
+    pos.ins().brnz(is_done, done_ebb, &[cvtt2si]);
+
+    // We now have the following possibilities:
+    //
+    // 1. INT_MIN was actually the correct conversion result.
+    // 2. The input was NaN -> replace the result value with 0.
+    // 3. The input was out of range -> saturate the result to the min/max value.
+
+    // Check for NaN, which is truncated to 0.
+    let zero = pos.ins().iconst(ty, 0);
+    let is_nan = pos.ins().fcmp(FloatCC::Unordered, x, x);
+    pos.ins().brnz(is_nan, done_ebb, &[zero]);
+
+    // Check for case 1: INT_MIN is the correct result.
+    // Determine the smallest floating point number that would convert to INT_MIN.
+    let mut overflow_cc = FloatCC::LessThan;
+    let output_bits = ty.lane_bits();
+    let flimit = match xty {
+        ir::types::F32 =>
+            // An f32 can represent `i16::min_value() - 1` exactly with precision to spare, so
+            // there are values less than -2^(N-1) that convert correctly to INT_MIN.
+            pos.ins().f32const(if output_bits < 32 {
+                overflow_cc = FloatCC::LessThanOrEqual;
+                Ieee32::fcvt_to_sint_negative_overflow(output_bits)
+            } else {
+                Ieee32::pow2(output_bits - 1).neg()
+            }),
+        ir::types::F64 =>
+            // An f64 can represent `i32::min_value() - 1` exactly with precision to spare, so
+            // there are values less than -2^(N-1) that convert correctly to INT_MIN.
+            pos.ins().f64const(if output_bits < 64 {
+                overflow_cc = FloatCC::LessThanOrEqual;
+                Ieee64::fcvt_to_sint_negative_overflow(output_bits)
+            } else {
+                Ieee64::pow2(output_bits - 1).neg()
+            }),
+        _ => panic!("Can't convert {}", xty),
+    };
+
+    let overflow = pos.ins().fcmp(overflow_cc, x, flimit);
+    let min_imm = match ty {
+        ir::types::I32 => i32::min_value() as i64,
+        ir::types::I64 => i64::min_value(),
+        _ => panic!("Don't know the min value for {}", ty),
+    };
+    let min_value = pos.ins().iconst(ty, min_imm);
+    pos.ins().brnz(overflow, done_ebb, &[min_value]);
+
+    // Finally, we could have a positive value that is too large.
+    let fzero = match xty {
+        ir::types::F32 => pos.ins().f32const(Ieee32::with_bits(0)),
+        ir::types::F64 => pos.ins().f64const(Ieee64::with_bits(0)),
+        _ => panic!("Can't convert {}", xty),
+    };
+
+    let max_imm = match ty {
+        ir::types::I32 => i32::max_value() as i64,
+        ir::types::I64 => i64::max_value(),
+        _ => panic!("Don't know the max value for {}", ty),
+    };
+    let max_value = pos.ins().iconst(ty, max_imm);
+
+    let overflow = pos.ins().fcmp(FloatCC::GreaterThanOrEqual, x, fzero);
+    pos.ins().brnz(overflow, done_ebb, &[max_value]);
+
+    // Recycle the original instruction.
+    pos.func.dfg.replace(inst).jump(done_ebb, &[cvtt2si]);
+
+    // Finally insert a label for the completion.
+    pos.next_inst();
+    pos.insert_ebb(done_ebb);
+
+    cfg.recompute_ebb(pos.func, old_ebb);
+    cfg.recompute_ebb(pos.func, done_ebb);
+}
+
 fn expand_fcvt_to_uint(
    inst: ir::Inst,
    func: &mut ir::Function,
@@ -415,14 +530,14 @@ fn expand_fcvt_to_uint(
    use ir::condcodes::{FloatCC, IntCC};
    use ir::immediates::{Ieee32, Ieee64};

-    let x;
-    match func.dfg[inst] {
+    let x = match func.dfg[inst] {
        ir::InstructionData::Unary {
            opcode: ir::Opcode::FcvtToUint,
            arg,
-        } => x = arg,
+        } => arg,
        _ => panic!("Need fcvt_to_uint: {}", func.dfg.display_inst(inst, None)),
-    }
+    };
+
    let old_ebb = func.layout.pp_ebb(inst);
    let xty = func.dfg.value_type(x);
    let result = func.dfg.first_result(inst);
@@ -487,3 +602,93 @@ fn expand_fcvt_to_uint(
    cfg.recompute_ebb(pos.func, large);
    cfg.recompute_ebb(pos.func, done);
 }
+
+fn expand_fcvt_to_uint_sat(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    cfg: &mut ControlFlowGraph,
+    _isa: &isa::TargetIsa,
+) {
+    use ir::condcodes::{FloatCC, IntCC};
+    use ir::immediates::{Ieee32, Ieee64};
+
+    let x = match func.dfg[inst] {
+        ir::InstructionData::Unary {
+            opcode: ir::Opcode::FcvtToUintSat,
+            arg,
+        } => arg,
+        _ => panic!(
+            "Need fcvt_to_uint_sat: {}",
+            func.dfg.display_inst(inst, None)
+        ),
+    };
+
+    let old_ebb = func.layout.pp_ebb(inst);
+    let xty = func.dfg.value_type(x);
+    let result = func.dfg.first_result(inst);
+    let ty = func.dfg.value_type(result);
+
+    // EBB handling numbers >= 2^(N-1).
+    let large = func.dfg.make_ebb();
+
+    // Final EBB after the bad value checks.
+    let done = func.dfg.make_ebb();
+
+    // Move the `inst` result value onto the `done` EBB.
+    func.dfg.clear_results(inst);
+    func.dfg.attach_ebb_param(done, result);
+
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+
+    // Start by materializing the floating point constant 2^(N-1) where N is the number of bits in
+    // the destination integer type.
+    let pow2nm1 = match xty {
+        ir::types::F32 => pos.ins().f32const(Ieee32::pow2(ty.lane_bits() - 1)),
+        ir::types::F64 => pos.ins().f64const(Ieee64::pow2(ty.lane_bits() - 1)),
+        _ => panic!("Can't convert {}", xty),
+    };
+    let zero = pos.ins().iconst(ty, 0);
+    let is_large = pos.ins().ffcmp(x, pow2nm1);
+    pos.ins()
+        .brff(FloatCC::GreaterThanOrEqual, is_large, large, &[]);
+
+    // We need to generate zero when `x` is NaN, so reuse the flags from the previous comparison.
+    pos.ins().brff(FloatCC::Unordered, is_large, done, &[zero]);
+
+    // Now we know that x < 2^(N-1) and not NaN. If the result of the cvtt2si is positive, we're
+    // done; otherwise saturate to the minimum unsigned value, that is 0.
+    let sres = pos.ins().x86_cvtt2si(ty, x);
+    let is_neg = pos.ins().ifcmp_imm(sres, 0);
+    pos.ins()
+        .brif(IntCC::SignedGreaterThanOrEqual, is_neg, done, &[sres]);
+    pos.ins().jump(done, &[zero]);
+
+    // Handle the case where x >= 2^(N-1) and not NaN.
+    pos.insert_ebb(large);
+    let adjx = pos.ins().fsub(x, pow2nm1);
+    let lres = pos.ins().x86_cvtt2si(ty, adjx);
+    let max_value = pos.ins().iconst(
+        ty,
+        match ty {
+            ir::types::I32 => u32::max_value() as i64,
+            ir::types::I64 => u64::max_value() as i64,
+            _ => panic!("Can't convert {}", ty),
+        },
+    );
+    let is_neg = pos.ins().ifcmp_imm(lres, 0);
+    pos.ins()
+        .brif(IntCC::SignedLessThan, is_neg, done, &[max_value]);
+    let lfinal = pos.ins().iadd_imm(lres, 1 << (ty.lane_bits() - 1));
+
+    // Recycle the original instruction as a jump.
+    pos.func.dfg.replace(inst).jump(done, &[lfinal]);
+
+    // Finally insert a label for the completion.
+    pos.next_inst();
+    pos.insert_ebb(done);
+
+    cfg.recompute_ebb(pos.func, old_ebb);
+    cfg.recompute_ebb(pos.func, large);
+    cfg.recompute_ebb(pos.func, done);
+}
--- a/lib/wasm/src/code_translator.rs
+++ b/lib/wasm/src/code_translator.rs
@@ -631,17 +631,21 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
            let val = state.pop1();
            state.push1(builder.ins().fcvt_to_uint(I32, val));
        }
-        Operator::I64TruncSSatF64
-        | Operator::I64TruncSSatF32
-        | Operator::I32TruncSSatF64
-        | Operator::I32TruncSSatF32
-        | Operator::I64TruncUSatF64
-        | Operator::I64TruncUSatF32
-        | Operator::I32TruncUSatF64
-        | Operator::I32TruncUSatF32 => {
-            return Err(WasmError::Unsupported(
-                "proposed saturating conversion operators",
-            ));
+        Operator::I64TruncSSatF64 | Operator::I64TruncSSatF32 => {
+            let val = state.pop1();
+            state.push1(builder.ins().fcvt_to_sint_sat(I64, val));
+        }
+        Operator::I32TruncSSatF64 | Operator::I32TruncSSatF32 => {
+            let val = state.pop1();
+            state.push1(builder.ins().fcvt_to_sint_sat(I32, val));
+        }
+        Operator::I64TruncUSatF64 | Operator::I64TruncUSatF32 => {
+            let val = state.pop1();
+            state.push1(builder.ins().fcvt_to_uint_sat(I64, val));
+        }
+        Operator::I32TruncUSatF64 | Operator::I32TruncUSatF32 => {
+            let val = state.pop1();
+            state.push1(builder.ins().fcvt_to_uint_sat(I32, val));
        }
        Operator::F32ReinterpretI32 => {
            let val = state.pop1();