diff --git a/cranelift/filetests/wasm/conversions.clif b/cranelift/filetests/wasm/conversions.clif index 0b7630da0d..6784637136 100644 --- a/cranelift/filetests/wasm/conversions.clif +++ b/cranelift/filetests/wasm/conversions.clif @@ -69,6 +69,54 @@ ebb0(v0: f64): return v1 } +function %i32_trunc_s_sat_f32(f32) -> i32 { +ebb0(v0: f32): + v1 = fcvt_to_sint_sat.i32 v0 + return v1 +} + +function %i32_trunc_u_sat_f32(f32) -> i32 { +ebb0(v0: f32): + v1 = fcvt_to_uint_sat.i32 v0 + return v1 +} + +function %i32_trunc_s_sat_f64(f64) -> i32 { +ebb0(v0: f64): + v1 = fcvt_to_sint_sat.i32 v0 + return v1 +} + +function %i32_trunc_u_sat_f64(f64) -> i32 { +ebb0(v0: f64): + v1 = fcvt_to_uint_sat.i32 v0 + return v1 +} + +function %i64_trunc_s_sat_f32(f32) -> i64 { +ebb0(v0: f32): + v1 = fcvt_to_sint_sat.i64 v0 + return v1 +} + +function %i64_trunc_u_sat_f32(f32) -> i64 { +ebb0(v0: f32): + v1 = fcvt_to_uint_sat.i64 v0 + return v1 +} + +function %i64_trunc_s_sat_f64(f64) -> i64 { +ebb0(v0: f64): + v1 = fcvt_to_sint_sat.i64 v0 + return v1 +} + +function %i64_trunc_u_sat_f64(f64) -> i64 { +ebb0(v0: f64): + v1 = fcvt_to_uint_sat.i64 v0 + return v1 +} + function %f32_trunc_f64(f64) -> f32 { ebb0(v0: f64): v1 = fdemote.f32 v0 diff --git a/lib/codegen/meta-python/base/instructions.py b/lib/codegen/meta-python/base/instructions.py index 84c704f888..25b8ba414b 100644 --- a/lib/codegen/meta-python/base/instructions.py +++ b/lib/codegen/meta-python/base/instructions.py @@ -1879,6 +1879,14 @@ fcvt_to_uint = Instruction( """, ins=x, outs=a, can_trap=True) +fcvt_to_uint_sat = Instruction( + 'fcvt_to_uint_sat', r""" + Convert floating point to unsigned integer as fcvt_to_uint does, but + saturates the input instead of trapping. NaN and negative values are + converted to 0. + """, + ins=x, outs=a) + fcvt_to_sint = Instruction( 'fcvt_to_sint', r""" Convert floating point to signed integer. @@ -1891,6 +1899,13 @@ fcvt_to_sint = Instruction( """, ins=x, outs=a, can_trap=True) +fcvt_to_sint_sat = Instruction( + 'fcvt_to_sint_sat', r""" + Convert floating point to signed integer as fcvt_to_sint does, but + saturates the input instead of trapping. NaN values are converted to 0. + """, + ins=x, outs=a) + x = Operand('x', Int) a = Operand('a', FloatTo) diff --git a/lib/codegen/meta-python/isa/x86/legalize.py b/lib/codegen/meta-python/isa/x86/legalize.py index 6da7d53340..15f08a07b9 100644 --- a/lib/codegen/meta-python/isa/x86/legalize.py +++ b/lib/codegen/meta-python/isa/x86/legalize.py @@ -94,9 +94,11 @@ x86_expand.custom_legalize(insts.fmax, 'expand_minmax') # Conversions from unsigned need special handling. x86_expand.custom_legalize(insts.fcvt_from_uint, 'expand_fcvt_from_uint') -# Conversions from float to int can trap. +# Conversions from float to int can trap and modify the control flow graph. x86_expand.custom_legalize(insts.fcvt_to_sint, 'expand_fcvt_to_sint') x86_expand.custom_legalize(insts.fcvt_to_uint, 'expand_fcvt_to_uint') +x86_expand.custom_legalize(insts.fcvt_to_sint_sat, 'expand_fcvt_to_sint_sat') +x86_expand.custom_legalize(insts.fcvt_to_uint_sat, 'expand_fcvt_to_uint_sat') # Count leading and trailing zeroes, for baseline x86_64 c_minus_one = Var('c_minus_one') diff --git a/lib/codegen/src/isa/x86/enc_tables.rs b/lib/codegen/src/isa/x86/enc_tables.rs index 03c33f52c8..52af977ad6 100644 --- a/lib/codegen/src/isa/x86/enc_tables.rs +++ b/lib/codegen/src/isa/x86/enc_tables.rs @@ -322,14 +322,13 @@ fn expand_fcvt_to_sint( use ir::condcodes::{FloatCC, IntCC}; use ir::immediates::{Ieee32, Ieee64}; - let x; - match func.dfg[inst] { + let x = match func.dfg[inst] { ir::InstructionData::Unary { opcode: ir::Opcode::FcvtToSint, arg, - } => x = arg, + } => arg, _ => panic!("Need fcvt_to_sint: {}", func.dfg.display_inst(inst, None)), - } + }; let old_ebb = func.layout.pp_ebb(inst); let xty = func.dfg.value_type(x); let result = func.dfg.first_result(inst); @@ -367,15 +366,16 @@ fn expand_fcvt_to_sint( let mut overflow_cc = FloatCC::LessThan; let output_bits = ty.lane_bits(); let flimit = match xty { - // An f32 can represent `i16::min_value() - 1` exactly with precision to spare, so - // there are values less than -2^(N-1) that convert correctly to INT_MIN. - ir::types::F32 => pos.ins().f32const(if output_bits < 32 { - overflow_cc = FloatCC::LessThanOrEqual; - Ieee32::fcvt_to_sint_negative_overflow(output_bits) - } else { - Ieee32::pow2(output_bits - 1).neg() - }), - ir::types::F64 => { + ir::types::F32 => + // An f32 can represent `i16::min_value() - 1` exactly with precision to spare, so + // there are values less than -2^(N-1) that convert correctly to INT_MIN. + pos.ins().f32const(if output_bits < 32 { + overflow_cc = FloatCC::LessThanOrEqual; + Ieee32::fcvt_to_sint_negative_overflow(output_bits) + } else { + Ieee32::pow2(output_bits - 1).neg() + }), + ir::types::F64 => // An f64 can represent `i32::min_value() - 1` exactly with precision to spare, so // there are values less than -2^(N-1) that convert correctly to INT_MIN. pos.ins().f64const(if output_bits < 64 { @@ -383,8 +383,7 @@ fn expand_fcvt_to_sint( Ieee64::fcvt_to_sint_negative_overflow(output_bits) } else { Ieee64::pow2(output_bits - 1).neg() - }) - } + }), _ => panic!("Can't convert {}", xty), }; let overflow = pos.ins().fcmp(overflow_cc, x, flimit); @@ -406,6 +405,122 @@ fn expand_fcvt_to_sint( cfg.recompute_ebb(pos.func, done); } +fn expand_fcvt_to_sint_sat( + inst: ir::Inst, + func: &mut ir::Function, + cfg: &mut ControlFlowGraph, + _isa: &isa::TargetIsa, +) { + use ir::condcodes::{FloatCC, IntCC}; + use ir::immediates::{Ieee32, Ieee64}; + + let x = match func.dfg[inst] { + ir::InstructionData::Unary { + opcode: ir::Opcode::FcvtToSintSat, + arg, + } => arg, + _ => panic!( + "Need fcvt_to_sint_sat: {}", + func.dfg.display_inst(inst, None) + ), + }; + + let old_ebb = func.layout.pp_ebb(inst); + let xty = func.dfg.value_type(x); + let result = func.dfg.first_result(inst); + let ty = func.dfg.value_type(result); + + // Final EBB after the bad value checks. + let done_ebb = func.dfg.make_ebb(); + func.dfg.clear_results(inst); + func.dfg.attach_ebb_param(done_ebb, result); + + let mut pos = FuncCursor::new(func).at_inst(inst); + pos.use_srcloc(inst); + + // The `x86_cvtt2si` performs the desired conversion, but it doesn't trap on NaN or + // overflow. It produces an INT_MIN result instead. + let cvtt2si = pos.ins().x86_cvtt2si(ty, x); + + let is_done = pos + .ins() + .icmp_imm(IntCC::NotEqual, cvtt2si, 1 << (ty.lane_bits() - 1)); + pos.ins().brnz(is_done, done_ebb, &[cvtt2si]); + + // We now have the following possibilities: + // + // 1. INT_MIN was actually the correct conversion result. + // 2. The input was NaN -> replace the result value with 0. + // 3. The input was out of range -> saturate the result to the min/max value. + + // Check for NaN, which is truncated to 0. + let zero = pos.ins().iconst(ty, 0); + let is_nan = pos.ins().fcmp(FloatCC::Unordered, x, x); + pos.ins().brnz(is_nan, done_ebb, &[zero]); + + // Check for case 1: INT_MIN is the correct result. + // Determine the smallest floating point number that would convert to INT_MIN. + let mut overflow_cc = FloatCC::LessThan; + let output_bits = ty.lane_bits(); + let flimit = match xty { + ir::types::F32 => + // An f32 can represent `i16::min_value() - 1` exactly with precision to spare, so + // there are values less than -2^(N-1) that convert correctly to INT_MIN. + pos.ins().f32const(if output_bits < 32 { + overflow_cc = FloatCC::LessThanOrEqual; + Ieee32::fcvt_to_sint_negative_overflow(output_bits) + } else { + Ieee32::pow2(output_bits - 1).neg() + }), + ir::types::F64 => + // An f64 can represent `i32::min_value() - 1` exactly with precision to spare, so + // there are values less than -2^(N-1) that convert correctly to INT_MIN. + pos.ins().f64const(if output_bits < 64 { + overflow_cc = FloatCC::LessThanOrEqual; + Ieee64::fcvt_to_sint_negative_overflow(output_bits) + } else { + Ieee64::pow2(output_bits - 1).neg() + }), + _ => panic!("Can't convert {}", xty), + }; + + let overflow = pos.ins().fcmp(overflow_cc, x, flimit); + let min_imm = match ty { + ir::types::I32 => i32::min_value() as i64, + ir::types::I64 => i64::min_value(), + _ => panic!("Don't know the min value for {}", ty), + }; + let min_value = pos.ins().iconst(ty, min_imm); + pos.ins().brnz(overflow, done_ebb, &[min_value]); + + // Finally, we could have a positive value that is too large. + let fzero = match xty { + ir::types::F32 => pos.ins().f32const(Ieee32::with_bits(0)), + ir::types::F64 => pos.ins().f64const(Ieee64::with_bits(0)), + _ => panic!("Can't convert {}", xty), + }; + + let max_imm = match ty { + ir::types::I32 => i32::max_value() as i64, + ir::types::I64 => i64::max_value(), + _ => panic!("Don't know the max value for {}", ty), + }; + let max_value = pos.ins().iconst(ty, max_imm); + + let overflow = pos.ins().fcmp(FloatCC::GreaterThanOrEqual, x, fzero); + pos.ins().brnz(overflow, done_ebb, &[max_value]); + + // Recycle the original instruction. + pos.func.dfg.replace(inst).jump(done_ebb, &[cvtt2si]); + + // Finally insert a label for the completion. + pos.next_inst(); + pos.insert_ebb(done_ebb); + + cfg.recompute_ebb(pos.func, old_ebb); + cfg.recompute_ebb(pos.func, done_ebb); +} + fn expand_fcvt_to_uint( inst: ir::Inst, func: &mut ir::Function, @@ -415,14 +530,14 @@ fn expand_fcvt_to_uint( use ir::condcodes::{FloatCC, IntCC}; use ir::immediates::{Ieee32, Ieee64}; - let x; - match func.dfg[inst] { + let x = match func.dfg[inst] { ir::InstructionData::Unary { opcode: ir::Opcode::FcvtToUint, arg, - } => x = arg, + } => arg, _ => panic!("Need fcvt_to_uint: {}", func.dfg.display_inst(inst, None)), - } + }; + let old_ebb = func.layout.pp_ebb(inst); let xty = func.dfg.value_type(x); let result = func.dfg.first_result(inst); @@ -487,3 +602,93 @@ fn expand_fcvt_to_uint( cfg.recompute_ebb(pos.func, large); cfg.recompute_ebb(pos.func, done); } + +fn expand_fcvt_to_uint_sat( + inst: ir::Inst, + func: &mut ir::Function, + cfg: &mut ControlFlowGraph, + _isa: &isa::TargetIsa, +) { + use ir::condcodes::{FloatCC, IntCC}; + use ir::immediates::{Ieee32, Ieee64}; + + let x = match func.dfg[inst] { + ir::InstructionData::Unary { + opcode: ir::Opcode::FcvtToUintSat, + arg, + } => arg, + _ => panic!( + "Need fcvt_to_uint_sat: {}", + func.dfg.display_inst(inst, None) + ), + }; + + let old_ebb = func.layout.pp_ebb(inst); + let xty = func.dfg.value_type(x); + let result = func.dfg.first_result(inst); + let ty = func.dfg.value_type(result); + + // EBB handling numbers >= 2^(N-1). + let large = func.dfg.make_ebb(); + + // Final EBB after the bad value checks. + let done = func.dfg.make_ebb(); + + // Move the `inst` result value onto the `done` EBB. + func.dfg.clear_results(inst); + func.dfg.attach_ebb_param(done, result); + + let mut pos = FuncCursor::new(func).at_inst(inst); + pos.use_srcloc(inst); + + // Start by materializing the floating point constant 2^(N-1) where N is the number of bits in + // the destination integer type. + let pow2nm1 = match xty { + ir::types::F32 => pos.ins().f32const(Ieee32::pow2(ty.lane_bits() - 1)), + ir::types::F64 => pos.ins().f64const(Ieee64::pow2(ty.lane_bits() - 1)), + _ => panic!("Can't convert {}", xty), + }; + let zero = pos.ins().iconst(ty, 0); + let is_large = pos.ins().ffcmp(x, pow2nm1); + pos.ins() + .brff(FloatCC::GreaterThanOrEqual, is_large, large, &[]); + + // We need to generate zero when `x` is NaN, so reuse the flags from the previous comparison. + pos.ins().brff(FloatCC::Unordered, is_large, done, &[zero]); + + // Now we know that x < 2^(N-1) and not NaN. If the result of the cvtt2si is positive, we're + // done; otherwise saturate to the minimum unsigned value, that is 0. + let sres = pos.ins().x86_cvtt2si(ty, x); + let is_neg = pos.ins().ifcmp_imm(sres, 0); + pos.ins() + .brif(IntCC::SignedGreaterThanOrEqual, is_neg, done, &[sres]); + pos.ins().jump(done, &[zero]); + + // Handle the case where x >= 2^(N-1) and not NaN. + pos.insert_ebb(large); + let adjx = pos.ins().fsub(x, pow2nm1); + let lres = pos.ins().x86_cvtt2si(ty, adjx); + let max_value = pos.ins().iconst( + ty, + match ty { + ir::types::I32 => u32::max_value() as i64, + ir::types::I64 => u64::max_value() as i64, + _ => panic!("Can't convert {}", ty), + }, + ); + let is_neg = pos.ins().ifcmp_imm(lres, 0); + pos.ins() + .brif(IntCC::SignedLessThan, is_neg, done, &[max_value]); + let lfinal = pos.ins().iadd_imm(lres, 1 << (ty.lane_bits() - 1)); + + // Recycle the original instruction as a jump. + pos.func.dfg.replace(inst).jump(done, &[lfinal]); + + // Finally insert a label for the completion. + pos.next_inst(); + pos.insert_ebb(done); + + cfg.recompute_ebb(pos.func, old_ebb); + cfg.recompute_ebb(pos.func, large); + cfg.recompute_ebb(pos.func, done); +} diff --git a/lib/wasm/src/code_translator.rs b/lib/wasm/src/code_translator.rs index dacae0a2c9..e158b76c08 100644 --- a/lib/wasm/src/code_translator.rs +++ b/lib/wasm/src/code_translator.rs @@ -631,17 +631,21 @@ pub fn translate_operator( let val = state.pop1(); state.push1(builder.ins().fcvt_to_uint(I32, val)); } - Operator::I64TruncSSatF64 - | Operator::I64TruncSSatF32 - | Operator::I32TruncSSatF64 - | Operator::I32TruncSSatF32 - | Operator::I64TruncUSatF64 - | Operator::I64TruncUSatF32 - | Operator::I32TruncUSatF64 - | Operator::I32TruncUSatF32 => { - return Err(WasmError::Unsupported( - "proposed saturating conversion operators", - )); + Operator::I64TruncSSatF64 | Operator::I64TruncSSatF32 => { + let val = state.pop1(); + state.push1(builder.ins().fcvt_to_sint_sat(I64, val)); + } + Operator::I32TruncSSatF64 | Operator::I32TruncSSatF32 => { + let val = state.pop1(); + state.push1(builder.ins().fcvt_to_sint_sat(I32, val)); + } + Operator::I64TruncUSatF64 | Operator::I64TruncUSatF32 => { + let val = state.pop1(); + state.push1(builder.ins().fcvt_to_uint_sat(I64, val)); + } + Operator::I32TruncUSatF64 | Operator::I32TruncUSatF32 => { + let val = state.pop1(); + state.push1(builder.ins().fcvt_to_uint_sat(I32, val)); } Operator::F32ReinterpretI32 => { let val = state.pop1();