diff --git a/cranelift/codegen/src/isa/x86/enc_tables.rs b/cranelift/codegen/src/isa/x86/enc_tables.rs index bdd3458f14..0ed238ac18 100644 --- a/cranelift/codegen/src/isa/x86/enc_tables.rs +++ b/cranelift/codegen/src/isa/x86/enc_tables.rs @@ -91,6 +91,25 @@ fn size_plus_maybe_sib_or_offset_for_in_reg_1( sizing.base_size + additional_size_if(1, inst, divert, func, needs_sib_byte_or_offset) } +/// If the value's definition is a constant immediate, returns its unpacked value, or None +/// otherwise. +fn maybe_iconst_imm(pos: &FuncCursor, value: ir::Value) -> Option { + if let ir::ValueDef::Result(inst, _) = &pos.func.dfg.value_def(value) { + if let ir::InstructionData::UnaryImm { + opcode: ir::Opcode::Iconst, + imm, + } = &pos.func.dfg[*inst] + { + let value: i64 = (*imm).into(); + Some(value) + } else { + None + } + } else { + None + } +} + /// Expand the `sdiv` and `srem` instructions using `x86_sdivmodx`. fn expand_sdivrem( inst: ir::Inst, @@ -109,7 +128,7 @@ fn expand_sdivrem( } => (args[0], args[1], true), _ => panic!("Need sdiv/srem: {}", func.dfg.display_inst(inst, None)), }; - let avoid_div_traps = isa.flags().avoid_div_traps(); + let old_ebb = func.layout.pp_ebb(inst); let result = func.dfg.first_result(inst); let ty = func.dfg.value_type(result); @@ -118,6 +137,8 @@ fn expand_sdivrem( pos.use_srcloc(inst); pos.func.dfg.clear_results(inst); + let avoid_div_traps = isa.flags().avoid_div_traps(); + // If we can tolerate native division traps, sdiv doesn't need branching. if !avoid_div_traps && !is_srem { let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1); @@ -126,6 +147,32 @@ fn expand_sdivrem( return; } + // Try to remove checks if the input value is an immediate other than 0 or -1. For these two + // immediates, we'd ideally replace conditional traps by traps, but this requires more + // manipulation of the dfg/cfg, which is out of scope here. + let (could_be_zero, could_be_minus_one) = if let Some(imm) = maybe_iconst_imm(&pos, y) { + (imm == 0, imm == -1) + } else { + (true, true) + }; + + // Put in an explicit division-by-zero trap if the environment requires it. + if avoid_div_traps && could_be_zero { + pos.ins().trapz(y, ir::TrapCode::IntegerDivisionByZero); + } + + if !could_be_minus_one { + let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1); + let reuse = if is_srem { + [None, Some(result)] + } else { + [Some(result), None] + }; + pos.ins().with_results(reuse).x86_sdivmodx(x, xhi, y); + pos.remove_inst(); + return; + } + // EBB handling the -1 divisor case. let minus_one = pos.func.dfg.make_ebb(); @@ -139,11 +186,6 @@ fn expand_sdivrem( let is_m1 = pos.ins().ifcmp_imm(y, -1); pos.ins().brif(IntCC::Equal, is_m1, minus_one, &[]); - // Put in an explicit division-by-zero trap if the environment requires it. - if avoid_div_traps { - pos.ins().trapz(y, ir::TrapCode::IntegerDivisionByZero); - } - // Now it is safe to execute the `x86_sdivmodx` instruction which will still trap on division // by zero. let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1); @@ -206,7 +248,17 @@ fn expand_udivrem( // Put in an explicit division-by-zero trap if the environment requires it. if avoid_div_traps { - pos.ins().trapz(y, ir::TrapCode::IntegerDivisionByZero); + let zero_check = if let Some(imm) = maybe_iconst_imm(&pos, y) { + // Ideally, we'd just replace the conditional trap with a trap when the immediate is + // zero, but this requires more manipulation of the dfg/cfg, which is out of scope + // here. + imm == 0 + } else { + true + }; + if zero_check { + pos.ins().trapz(y, ir::TrapCode::IntegerDivisionByZero); + } } // Now it is safe to execute the `x86_udivmodx` instruction. diff --git a/cranelift/filetests/filetests/isa/x86/legalize-div-traps.clif b/cranelift/filetests/filetests/isa/x86/legalize-div-traps.clif index 40c09aee0e..2622ae48f3 100644 --- a/cranelift/filetests/filetests/isa/x86/legalize-div-traps.clif +++ b/cranelift/filetests/filetests/isa/x86/legalize-div-traps.clif @@ -19,6 +19,32 @@ ebb0(v0: i64, v1: i64): ; nextln: return $d } +function %udiv_0(i64) -> i64 { +ebb0(v0: i64): + ; check: ebb0( + v1 = iconst.i64 0 + ; nextln: v1 = iconst.i64 0 + v2 = udiv v0, v1 + ; nextln: $(fz=$V) = ifcmp_imm v1, 0 + ; nextln: trapif eq $fz, int_divz + ; nextln: $(hi=$V) = iconst.i64 0 + ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1 + return v2 + ; nextln: return $d +} + +function %udiv_minus_1(i64) -> i64 { +ebb0(v0: i64): + ; check: ebb0( + v1 = iconst.i64 -1 + ; nextln: v1 = iconst.i64 -1 + v2 = udiv v0, v1 + ; nextln: $(hi=$V) = iconst.i64 0 + ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1 + return v2 + ; nextln: return $d +} + function %urem(i64, i64) -> i64 { ebb0(v0: i64, v1: i64): ; check: ebb0( @@ -31,14 +57,74 @@ ebb0(v0: i64, v1: i64): ; nextln: return $r } +function %urem_0(i64) -> i64 { +ebb0(v0: i64): + ; check: ebb0( + v1 = iconst.i64 0 + ; nextln: v1 = iconst.i64 0 + v2 = urem v0, v1 + ; nextln: $(fz=$V) = ifcmp_imm v1, 0 + ; nextln: trapif eq $fz, int_divz + ; nextln: $(hi=$V) = iconst.i64 0 + ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1 + return v2 + ; nextln: return $r +} + +function %urem_minus_1(i64) -> i64 { +ebb0(v0: i64): + ; check: ebb0( + v1 = iconst.i64 -1 + ; nextln: v1 = iconst.i64 -1 + v2 = urem v0, v1 + ; nextln: $(hi=$V) = iconst.i64 0 + ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1 + return v2 + ; nextln: return $r +} + function %sdiv(i64, i64) -> i64 { ebb0(v0: i64, v1: i64): ; check: ebb0( v2 = sdiv v0, v1 - ; nextln: $(fm1=$V) = ifcmp_imm v1, -1 - ; nextln: brif eq $fm1, $(m1=$EBB) ; nextln: $(fz=$V) = ifcmp_imm v1, 0 ; nextln: trapif eq $fz, int_divz + ; nextln: $(fm1=$V) = ifcmp_imm v1, -1 + ; nextln: brif eq $fm1, $(m1=$EBB) + ; check: $(hi=$V) = sshr_imm + ; nextln: $(q=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1 + ; nextln: jump $(done=$EBB)($q) + ; check: $m1: + ; nextln: $(imin=$V) = iconst.i64 0x8000_0000_0000_0000 + ; nextln: $(fm=$V) = ifcmp.i64 v0, $imin + ; nextln: trapif eq $fm, int_ovf + ; check: $done(v2: i64): + return v2 + ; nextln: return v2 +} + +function %sdiv_0(i64) -> i64 { +ebb0(v0: i64): + ; check: ebb0( + v1 = iconst.i64 0 + ; nextln: v1 = iconst.i64 0 + v2 = sdiv v0, v1 + ; nextln: $(fz=$V) = ifcmp_imm v1, 0 + ; nextln: trapif eq $fz, int_divz + ; check: $(hi=$V) = sshr_imm + ; nextln: $(q=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1 + return v2 + ; nextln: return v2 +} + +function %sdiv_minus_1(i64) -> i64 { +ebb0(v0: i64): + ; check: ebb0( + v1 = iconst.i64 -1 + ; nextln: v1 = iconst.i64 -1 + v2 = sdiv v0, v1 + ; nextln: $(fm1=$V) = ifcmp_imm v1, -1 + ; nextln: brif eq $fm1, $(m1=$EBB) ; check: $(hi=$V) = sshr_imm ; nextln: $(q=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1 ; nextln: jump $(done=$EBB)($q) @@ -57,6 +143,41 @@ function %srem(i64, i64) -> i64 { ebb0(v0: i64, v1: i64): ; check: ebb0( v2 = srem v0, v1 + ; nextln: $(fz=$V) = ifcmp_imm v1, 0 + ; nextln: trapif eq $fz, int_divz + ; nextln: $(fm1=$V) = ifcmp_imm v1, -1 + ; nextln: brif eq $fm1, $(m1=$EBB) + ; check: $(hi=$V) = sshr_imm + ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1 + ; nextln: jump $(done=$EBB)($r) + ; check: $m1: + ; nextln: $(zero=$V) = iconst.i64 0 + ; nextln: jump $(done=$EBB)($zero) + ; check: $done(v2: i64): + return v2 + ; nextln: return v2 +} + +function %srem_0(i64) -> i64 { +ebb0(v0: i64): + ; check: ebb0( + v1 = iconst.i64 0 + ; nextln: v1 = iconst.i64 0 + v2 = srem v0, v1 + ; nextln: $(fz=$V) = ifcmp_imm v1, 0 + ; nextln: trapif eq $fz, int_divz + ; check: $(hi=$V) = sshr_imm + ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1 + return v2 + ; nextln: return v2 +} + +function %srem_minus_1(i64) -> i64 { +ebb0(v0: i64): + ; check: ebb0( + v1 = iconst.i64 -1 + ; nextln: v1 = iconst.i64 -1 + v2 = srem v0, v1 ; nextln: $(fm1=$V) = ifcmp_imm v1, -1 ; nextln: brif eq $fm1, $(m1=$EBB) ; check: $(hi=$V) = sshr_imm