diff --git a/cranelift/codegen/src/isa/x86/enc_tables.rs b/cranelift/codegen/src/isa/x86/enc_tables.rs index 832058c1e0..e0fc05178d 100644 --- a/cranelift/codegen/src/isa/x86/enc_tables.rs +++ b/cranelift/codegen/src/isa/x86/enc_tables.rs @@ -174,6 +174,9 @@ fn expand_sdivrem( return; } + // EBB handling the nominal case. + let nominal = pos.func.dfg.make_ebb(); + // EBB handling the -1 divisor case. let minus_one = pos.func.dfg.make_ebb(); @@ -186,9 +189,11 @@ fn expand_sdivrem( // Start by checking for a -1 divisor which needs to be handled specially. let is_m1 = pos.ins().ifcmp_imm(y, -1); pos.ins().brif(IntCC::Equal, is_m1, minus_one, &[]); + pos.ins().jump(nominal, &[]); // Now it is safe to execute the `x86_sdivmodx` instruction which will still trap on division // by zero. + pos.insert_ebb(nominal); let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1); let (quot, rem) = pos.ins().x86_sdivmodx(x, xhi, y); let divres = if is_srem { rem } else { quot }; @@ -217,6 +222,7 @@ fn expand_sdivrem( pos.insert_ebb(done); cfg.recompute_ebb(pos.func, old_ebb); + cfg.recompute_ebb(pos.func, nominal); cfg.recompute_ebb(pos.func, minus_one); cfg.recompute_ebb(pos.func, done); } @@ -301,12 +307,18 @@ fn expand_minmax( // fmin(0.0, -0.0) -> -0.0 and fmax(0.0, -0.0) -> 0.0. // 3. UN: We need to produce a quiet NaN that is canonical if the inputs are canonical. + // EBB handling case 1) where operands are ordered but not equal. + let one_ebb = func.dfg.make_ebb(); + // EBB handling case 3) where one operand is NaN. let uno_ebb = func.dfg.make_ebb(); // EBB that handles the unordered or equal cases 2) and 3). let ueq_ebb = func.dfg.make_ebb(); + // EBB handling case 2) where operands are ordered and equal. + let eq_ebb = func.dfg.make_ebb(); + // Final EBB with one argument representing the final result value. let done = func.dfg.make_ebb(); @@ -327,8 +339,10 @@ fn expand_minmax( pos.use_srcloc(inst); let cmp_ueq = pos.ins().fcmp(FloatCC::UnorderedOrEqual, x, y); pos.ins().brnz(cmp_ueq, ueq_ebb, &[]); + pos.ins().jump(one_ebb, &[]); // Handle the common ordered, not equal (LT|GT) case. + pos.insert_ebb(one_ebb); let one_inst = pos.ins().Binary(x86_opc, ty, x, y).0; let one_result = pos.func.dfg.first_result(one_inst); pos.ins().jump(done, &[one_result]); @@ -346,9 +360,11 @@ fn expand_minmax( // TODO: When we get support for flag values, we can reuse the above comparison. let cmp_uno = pos.ins().fcmp(FloatCC::Unordered, x, y); pos.ins().brnz(cmp_uno, uno_ebb, &[]); + pos.ins().jump(eq_ebb, &[]); // We are now in case 2) where x and y compare EQ. // We need a bitwise operation to get the sign right. + pos.insert_ebb(eq_ebb); let bw_inst = pos.ins().Binary(bitwise_opc, ty, x, y).0; let bw_result = pos.func.dfg.first_result(bw_inst); // This should become a fall-through for this second most common case. @@ -360,8 +376,10 @@ fn expand_minmax( pos.insert_ebb(done); cfg.recompute_ebb(pos.func, old_ebb); - cfg.recompute_ebb(pos.func, ueq_ebb); + cfg.recompute_ebb(pos.func, one_ebb); cfg.recompute_ebb(pos.func, uno_ebb); + cfg.recompute_ebb(pos.func, ueq_ebb); + cfg.recompute_ebb(pos.func, eq_ebb); cfg.recompute_ebb(pos.func, done); } @@ -397,6 +415,9 @@ fn expand_fcvt_from_uint( let old_ebb = pos.func.layout.pp_ebb(inst); + // EBB handling the case where x >= 0. + let poszero_ebb = pos.func.dfg.make_ebb(); + // EBB handling the case where x < 0. let neg_ebb = pos.func.dfg.make_ebb(); @@ -410,8 +431,10 @@ fn expand_fcvt_from_uint( // If x as a signed int is not negative, we can use the existing `fcvt_from_sint` instruction. let is_neg = pos.ins().icmp_imm(IntCC::SignedLessThan, x, 0); pos.ins().brnz(is_neg, neg_ebb, &[]); + pos.ins().jump(poszero_ebb, &[]); // Easy case: just use a signed conversion. + pos.insert_ebb(poszero_ebb); let posres = pos.ins().fcvt_from_sint(ty, x); pos.ins().jump(done, &[posres]); @@ -434,6 +457,7 @@ fn expand_fcvt_from_uint( pos.insert_ebb(done); cfg.recompute_ebb(pos.func, old_ebb); + cfg.recompute_ebb(pos.func, poszero_ebb); cfg.recompute_ebb(pos.func, neg_ebb); cfg.recompute_ebb(pos.func, done); } @@ -461,6 +485,9 @@ fn expand_fcvt_to_sint( // Final EBB after the bad value checks. let done = func.dfg.make_ebb(); + // EBB for checking failure cases. + let maybe_trap_ebb = func.dfg.make_ebb(); + // The `x86_cvtt2si` performs the desired conversion, but it doesn't trap on NaN or overflow. // It produces an INT_MIN result instead. func.dfg.replace(inst).x86_cvtt2si(ty, x); @@ -472,6 +499,7 @@ fn expand_fcvt_to_sint( .ins() .icmp_imm(IntCC::NotEqual, result, 1 << (ty.lane_bits() - 1)); pos.ins().brnz(is_done, done, &[]); + pos.ins().jump(maybe_trap_ebb, &[]); // We now have the following possibilities: // @@ -479,6 +507,7 @@ fn expand_fcvt_to_sint( // 2. The input was NaN -> trap bad_toint // 3. The input was out of range -> trap int_ovf // + pos.insert_ebb(maybe_trap_ebb); // Check for NaN. let is_nan = pos.ins().fcmp(FloatCC::Unordered, x, x); @@ -530,6 +559,7 @@ fn expand_fcvt_to_sint( pos.insert_ebb(done); cfg.recompute_ebb(pos.func, old_ebb); + cfg.recompute_ebb(pos.func, maybe_trap_ebb); cfg.recompute_ebb(pos.func, done); } @@ -559,6 +589,9 @@ fn expand_fcvt_to_sint_sat( // Final EBB after the bad value checks. let done_ebb = func.dfg.make_ebb(); + let intmin_ebb = func.dfg.make_ebb(); + let minsat_ebb = func.dfg.make_ebb(); + let maxsat_ebb = func.dfg.make_ebb(); func.dfg.clear_results(inst); func.dfg.attach_ebb_param(done_ebb, result); @@ -573,20 +606,24 @@ fn expand_fcvt_to_sint_sat( .ins() .icmp_imm(IntCC::NotEqual, cvtt2si, 1 << (ty.lane_bits() - 1)); pos.ins().brnz(is_done, done_ebb, &[cvtt2si]); + pos.ins().jump(intmin_ebb, &[]); // We now have the following possibilities: // // 1. INT_MIN was actually the correct conversion result. // 2. The input was NaN -> replace the result value with 0. // 3. The input was out of range -> saturate the result to the min/max value. + pos.insert_ebb(intmin_ebb); // Check for NaN, which is truncated to 0. let zero = pos.ins().iconst(ty, 0); let is_nan = pos.ins().fcmp(FloatCC::Unordered, x, x); pos.ins().brnz(is_nan, done_ebb, &[zero]); + pos.ins().jump(minsat_ebb, &[]); // Check for case 1: INT_MIN is the correct result. // Determine the smallest floating point number that would convert to INT_MIN. + pos.insert_ebb(minsat_ebb); let mut overflow_cc = FloatCC::LessThan; let output_bits = ty.lane_bits(); let flimit = match xty { @@ -623,8 +660,10 @@ fn expand_fcvt_to_sint_sat( }; let min_value = pos.ins().iconst(ty, min_imm); pos.ins().brnz(overflow, done_ebb, &[min_value]); + pos.ins().jump(maxsat_ebb, &[]); // Finally, we could have a positive value that is too large. + pos.insert_ebb(maxsat_ebb); let fzero = match xty { ir::types::F32 => pos.ins().f32const(Ieee32::with_bits(0)), ir::types::F64 => pos.ins().f64const(Ieee64::with_bits(0)), @@ -649,6 +688,9 @@ fn expand_fcvt_to_sint_sat( pos.insert_ebb(done_ebb); cfg.recompute_ebb(pos.func, old_ebb); + cfg.recompute_ebb(pos.func, intmin_ebb); + cfg.recompute_ebb(pos.func, minsat_ebb); + cfg.recompute_ebb(pos.func, maxsat_ebb); cfg.recompute_ebb(pos.func, done_ebb); } @@ -673,6 +715,12 @@ fn expand_fcvt_to_uint( let result = func.dfg.first_result(inst); let ty = func.dfg.value_type(result); + // EBB handle numbers < 2^(N-1). + let below_uint_max_ebb = func.dfg.make_ebb(); + + // EBB handle numbers < 0. + let below_zero_ebb = func.dfg.make_ebb(); + // EBB handling numbers >= 2^(N-1). let large = func.dfg.make_ebb(); @@ -696,9 +744,11 @@ fn expand_fcvt_to_uint( let is_large = pos.ins().ffcmp(x, pow2nm1); pos.ins() .brff(FloatCC::GreaterThanOrEqual, is_large, large, &[]); + pos.ins().jump(below_uint_max_ebb, &[]); // We need to generate a specific trap code when `x` is NaN, so reuse the flags from the // previous comparison. + pos.insert_ebb(below_uint_max_ebb); pos.ins().trapff( FloatCC::Unordered, is_large, @@ -710,6 +760,9 @@ fn expand_fcvt_to_uint( let is_neg = pos.ins().ifcmp_imm(sres, 0); pos.ins() .brif(IntCC::SignedGreaterThanOrEqual, is_neg, done, &[sres]); + pos.ins().jump(below_zero_ebb, &[]); + + pos.insert_ebb(below_zero_ebb); pos.ins().trap(ir::TrapCode::IntegerOverflow); // Handle the case where x >= 2^(N-1) and not NaN. @@ -729,6 +782,8 @@ fn expand_fcvt_to_uint( pos.insert_ebb(done); cfg.recompute_ebb(pos.func, old_ebb); + cfg.recompute_ebb(pos.func, below_uint_max_ebb); + cfg.recompute_ebb(pos.func, below_zero_ebb); cfg.recompute_ebb(pos.func, large); cfg.recompute_ebb(pos.func, done); } @@ -757,9 +812,16 @@ fn expand_fcvt_to_uint_sat( let result = func.dfg.first_result(inst); let ty = func.dfg.value_type(result); + // EBB handle numbers < 2^(N-1). + let below_pow2nm1_or_nan_ebb = func.dfg.make_ebb(); + let below_pow2nm1_ebb = func.dfg.make_ebb(); + // EBB handling numbers >= 2^(N-1). let large = func.dfg.make_ebb(); + // EBB handling numbers < 2^N. + let uint_large_ebb = func.dfg.make_ebb(); + // Final EBB after the bad value checks. let done = func.dfg.make_ebb(); @@ -781,12 +843,16 @@ fn expand_fcvt_to_uint_sat( let is_large = pos.ins().ffcmp(x, pow2nm1); pos.ins() .brff(FloatCC::GreaterThanOrEqual, is_large, large, &[]); + pos.ins().jump(below_pow2nm1_or_nan_ebb, &[]); // We need to generate zero when `x` is NaN, so reuse the flags from the previous comparison. + pos.insert_ebb(below_pow2nm1_or_nan_ebb); pos.ins().brff(FloatCC::Unordered, is_large, done, &[zero]); + pos.ins().jump(below_pow2nm1_ebb, &[]); // Now we know that x < 2^(N-1) and not NaN. If the result of the cvtt2si is positive, we're // done; otherwise saturate to the minimum unsigned value, that is 0. + pos.insert_ebb(below_pow2nm1_ebb); let sres = pos.ins().x86_cvtt2si(ty, x); let is_neg = pos.ins().ifcmp_imm(sres, 0); pos.ins() @@ -808,6 +874,9 @@ fn expand_fcvt_to_uint_sat( let is_neg = pos.ins().ifcmp_imm(lres, 0); pos.ins() .brif(IntCC::SignedLessThan, is_neg, done, &[max_value]); + pos.ins().jump(uint_large_ebb, &[]); + + pos.insert_ebb(uint_large_ebb); let lfinal = pos.ins().iadd_imm(lres, 1 << (ty.lane_bits() - 1)); // Recycle the original instruction as a jump. @@ -818,6 +887,9 @@ fn expand_fcvt_to_uint_sat( pos.insert_ebb(done); cfg.recompute_ebb(pos.func, old_ebb); + cfg.recompute_ebb(pos.func, below_pow2nm1_or_nan_ebb); + cfg.recompute_ebb(pos.func, below_pow2nm1_ebb); cfg.recompute_ebb(pos.func, large); + cfg.recompute_ebb(pos.func, uint_large_ebb); cfg.recompute_ebb(pos.func, done); } diff --git a/cranelift/filetests/filetests/isa/x86/legalize-custom.clif b/cranelift/filetests/filetests/isa/x86/legalize-custom.clif index 51e7ca487d..747f4e819b 100644 --- a/cranelift/filetests/filetests/isa/x86/legalize-custom.clif +++ b/cranelift/filetests/filetests/isa/x86/legalize-custom.clif @@ -84,7 +84,7 @@ function %f32_min(f32, f32) -> f32 { ebb0(v0: f32, v1: f32): v2 = fmin v0, v1 return v2 - ; check: $(vnat=$V) = x86_fmin v0, v1 + ; check: $(vnat=$V) = x86_fmin.f32 v0, v1 ; nextln: jump $(done=$EBB)($vnat) ; check: $(uno=$EBB):