diff --git a/cranelift/filetests/isa/intel/legalize-div-traps.cton b/cranelift/filetests/isa/intel/legalize-div-traps.cton new file mode 100644 index 0000000000..0da41bc5ad --- /dev/null +++ b/cranelift/filetests/isa/intel/legalize-div-traps.cton @@ -0,0 +1,71 @@ +; Test the division legalizations. +test legalizer +set is_64bit +; See also legalize-div.cton. +set avoid_div_traps=1 +isa intel + +; regex: V=v\d+ +; regex: EBB=ebb\d+ + +function %udiv(i64, i64) -> i64 { +ebb0(v0: i64, v1: i64): + ; check: $ebb0( + v2 = udiv v0, v1 + ; nextln: $(fz=$V) = ifcmp_imm $v1, 0 + ; nextln: trapif eq $fz, int_divz + ; nextln: $(hi=$V) = iconst.i64 0 + ; nextln: $(d=$V), $(r=$V) = x86_udivmodx $v0, $hi, $v1 + return v2 + ; nextln: return $d +} + +function %urem(i64, i64) -> i64 { +ebb0(v0: i64, v1: i64): + ; check: $ebb0( + v2 = urem v0, v1 + ; nextln: $(fz=$V) = ifcmp_imm $v1, 0 + ; nextln: trapif eq $fz, int_divz + ; nextln: $(hi=$V) = iconst.i64 0 + ; nextln: $(d=$V), $(r=$V) = x86_udivmodx $v0, $hi, $v1 + return v2 + ; nextln: return $r +} + +function %sdiv(i64, i64) -> i64 { +ebb0(v0: i64, v1: i64): + ; check: $ebb0( + v2 = sdiv v0, v1 + ; nextln: $(fm1=$V) = ifcmp_imm $v1, -1 + ; nextln: brif eq $fm1, $(m1=$EBB) + ; nextln: $(fz=$V) = ifcmp_imm $v1, 0 + ; nextln: trapif eq $fz, int_divz + ; check: $(hi=$V) = sshr + ; nextln: $(q=$V), $(r=$V) = x86_sdivmodx $v0, $hi, $v1 + ; nextln: jump $(done=$EBB)($q) + ; check: $m1: + ; nextln: $(fm=$V) = ifcmp_imm.i64 $v0, 0x8000_0000_0000_0000 + ; nextln: trapif eq $fm, int_ovf + ; check: $done($v2: i64): + return v2 + ; nextln: return $v2 +} + +; The srem expansion needs to special-case x % -1 since x86_sdivmodx traps on INT_MIN/-1. +; TODO: Add more explicit pattern matching once we've cleaned up the ifcmp+brif pattern. +function %srem(i64, i64) -> i64 { +ebb0(v0: i64, v1: i64): + ; check: $ebb0( + v2 = srem v0, v1 + ; nextln: $(fm1=$V) = ifcmp_imm $v1, -1 + ; nextln: brif eq $fm1, $(m1=$EBB) + ; check: $(hi=$V) = sshr + ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx $v0, $hi, $v1 + ; nextln: jump $(done=$EBB)($r) + ; check: $m1: + ; nextln: $(zero=$V) = iconst.i64 0 + ; nextln: jump $(done=$EBB)($zero) + ; check: $done($v2: i64): + return v2 + ; nextln: return $v2 +} diff --git a/cranelift/filetests/isa/intel/legalize-div.cton b/cranelift/filetests/isa/intel/legalize-div.cton new file mode 100644 index 0000000000..8e34c01b86 --- /dev/null +++ b/cranelift/filetests/isa/intel/legalize-div.cton @@ -0,0 +1,58 @@ +; Test the division legalizations. +test legalizer +set is_64bit +; See also legalize-div-traps.cton. +set avoid_div_traps=0 +isa intel + +; regex: V=v\d+ +; regex: EBB=ebb\d+ + +function %udiv(i64, i64) -> i64 { +ebb0(v0: i64, v1: i64): + ; check: $ebb0( + v2 = udiv v0, v1 + ; nextln: $(hi=$V) = iconst.i64 0 + ; nextln: $(d=$V), $(r=$V) = x86_udivmodx $v0, $hi, $v1 + return v2 + ; nextln: return $d +} + +function %urem(i64, i64) -> i64 { +ebb0(v0: i64, v1: i64): + ; check: $ebb0( + v2 = urem v0, v1 + ; nextln: $(hi=$V) = iconst.i64 0 + ; nextln: $(d=$V), $(r=$V) = x86_udivmodx $v0, $hi, $v1 + return v2 + ; nextln: return $r +} + +function %sdiv(i64, i64) -> i64 { +ebb0(v0: i64, v1: i64): + ; check: $ebb0( + v2 = sdiv v0, v1 + ; check: $(hi=$V) = sshr + ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx $v0, $hi, $v1 + return v2 + ; nextln: return $d +} + +; The srem expansion needs to special-case x % -1 since x86_sdivmodx traps on INT_MIN/-1. +; TODO: Add more explicit pattern matching once we've cleaned up the ifcmp+brif pattern. +function %srem(i64, i64) -> i64 { +ebb0(v0: i64, v1: i64): + ; check: $ebb0( + v2 = srem v0, v1 + ; nextln: $(fm1=$V) = ifcmp_imm $v1, -1 + ; nextln: brif eq $fm1, $(m1=$EBB) + ; check: $(hi=$V) = sshr + ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx $v0, $hi, $v1 + ; nextln: jump $(done=$EBB)($r) + ; check: $m1: + ; nextln: $(zero=$V) = iconst.i64 0 + ; nextln: jump $(done=$EBB)($zero) + ; check: $done($v2: i64): + return v2 + ; nextln: return $v2 +} diff --git a/lib/cretonne/meta/base/settings.py b/lib/cretonne/meta/base/settings.py index 68df47d4eb..8d0522af79 100644 --- a/lib/cretonne/meta/base/settings.py +++ b/lib/cretonne/meta/base/settings.py @@ -41,6 +41,18 @@ return_at_end = BoolSetting( instruction at the end. """) +avoid_div_traps = BoolSetting( + """ + Generate explicit checks around native division instructions to avoid + their trapping. + + This is primarily used by SpiderMonkey which doesn't install a signal + handler for SIGFPE, but expects a SIGILL trap for division by zero. + + On ISAs like ARM where the native division instructions don't trap, + this setting has no effect - explicit checks are always inserted. + """) + is_compressed = BoolSetting("Enable compressed instructions") enable_float = BoolSetting( diff --git a/lib/cretonne/meta/isa/intel/legalize.py b/lib/cretonne/meta/isa/intel/legalize.py index f6e5b1dc9d..32f0a98153 100644 --- a/lib/cretonne/meta/isa/intel/legalize.py +++ b/lib/cretonne/meta/isa/intel/legalize.py @@ -5,7 +5,6 @@ from __future__ import absolute_import from cdsl.ast import Var from cdsl.xform import Rtl, XFormGroup from base.immediates import imm64, intcc, floatcc -from base.types import i32, i64 from base import legalize as shared from base import instructions as insts from . import instructions as x86 @@ -31,31 +30,12 @@ a2 = Var('a2') # # Division and remainder. # -intel_expand.legalize( - a << insts.udiv(x, y), - Rtl( - xhi << insts.iconst(imm64(0)), - (a, dead) << x86.udivmodx(x, xhi, y) - )) - -intel_expand.legalize( - a << insts.urem(x, y), - Rtl( - xhi << insts.iconst(imm64(0)), - (dead, a) << x86.udivmodx(x, xhi, y) - )) - -for ty in [i32, i64]: - intel_expand.legalize( - a << insts.sdiv.bind(ty)(x, y), - Rtl( - xhi << insts.sshr_imm(x, imm64(ty.lane_bits() - 1)), - (a, dead) << x86.sdivmodx(x, xhi, y) - )) - # The srem expansion requires custom code because srem INT_MIN, -1 is not -# allowed to trap. -intel_expand.custom_legalize(insts.srem, 'expand_srem') +# allowed to trap. The other ops need to check avoid_div_traps. +intel_expand.custom_legalize(insts.sdiv, 'expand_sdivrem') +intel_expand.custom_legalize(insts.srem, 'expand_sdivrem') +intel_expand.custom_legalize(insts.udiv, 'expand_udivrem') +intel_expand.custom_legalize(insts.urem, 'expand_udivrem') # Floating point condition codes. # diff --git a/lib/cretonne/src/isa/intel/enc_tables.rs b/lib/cretonne/src/isa/intel/enc_tables.rs index a97607c4c7..98de07db48 100644 --- a/lib/cretonne/src/isa/intel/enc_tables.rs +++ b/lib/cretonne/src/isa/intel/enc_tables.rs @@ -1,9 +1,9 @@ //! Encoding tables for Intel ISAs. -use bitset::BitSet; use cursor::{Cursor, FuncCursor}; use flowgraph::ControlFlowGraph; use ir::{self, InstBuilder}; +use ir::condcodes::IntCC; use isa::constraints::*; use isa::enc_tables::*; use isa::encoding::RecipeSizing; @@ -14,55 +14,87 @@ use super::registers::*; include!(concat!(env!("OUT_DIR"), "/encoding-intel.rs")); include!(concat!(env!("OUT_DIR"), "/legalize-intel.rs")); -/// Expand the `srem` instruction using `x86_sdivmodx`. -fn expand_srem( +/// Expand the `sdiv` and `srem` instructions using `x86_sdivmodx`. +fn expand_sdivrem( inst: ir::Inst, func: &mut ir::Function, cfg: &mut ControlFlowGraph, - _isa: &isa::TargetIsa, + isa: &isa::TargetIsa, ) { - use ir::condcodes::IntCC; - let (x, y) = match func.dfg[inst] { + let (x, y, is_srem) = match func.dfg[inst] { + ir::InstructionData::Binary { + opcode: ir::Opcode::Sdiv, + args, + } => (args[0], args[1], false), ir::InstructionData::Binary { opcode: ir::Opcode::Srem, args, - } => (args[0], args[1]), - _ => panic!("Need srem: {}", func.dfg.display_inst(inst, None)), + } => (args[0], args[1], true), + _ => panic!("Need sdiv/srem: {}", func.dfg.display_inst(inst, None)), }; + let avoid_div_traps = isa.flags().avoid_div_traps(); let old_ebb = func.layout.pp_ebb(inst); - - // EBB handling the -1 divisor case. - let minus_one = func.dfg.make_ebb(); - - // Final EBB with one argument representing the final result value. - let done = func.dfg.make_ebb(); - - // Move the `inst` result value onto the `done` EBB. let result = func.dfg.first_result(inst); let ty = func.dfg.value_type(result); - func.dfg.clear_results(inst); - func.dfg.attach_ebb_param(done, result); let mut pos = FuncCursor::new(func).at_inst(inst); pos.use_srcloc(inst); + pos.func.dfg.clear_results(inst); + + // If we can tolerate native division traps, sdiv doesn't need branching. + if !avoid_div_traps && !is_srem { + let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1); + pos.ins().with_result(result).x86_sdivmodx(x, xhi, y); + pos.remove_inst(); + return; + } + + // EBB handling the -1 divisor case. + let minus_one = pos.func.dfg.make_ebb(); + + // Final EBB with one argument representing the final result value. + let done = pos.func.dfg.make_ebb(); + + // Move the `inst` result value onto the `done` EBB. + pos.func.dfg.attach_ebb_param(done, result); // Start by checking for a -1 divisor which needs to be handled specially. - let is_m1 = pos.ins().icmp_imm(IntCC::Equal, y, -1); - pos.ins().brnz(is_m1, minus_one, &[]); + let is_m1 = pos.ins().ifcmp_imm(y, -1); + pos.ins().brif(IntCC::Equal, is_m1, minus_one, &[]); + + // Put in an explicit division-by-zero trap if the environment requires it. + if avoid_div_traps { + pos.ins().trapz(y, ir::TrapCode::IntegerDivisionByZero); + } // Now it is safe to execute the `x86_sdivmodx` instruction which will still trap on division // by zero. let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1); - let (_qout, rem) = pos.ins().x86_sdivmodx(x, xhi, y); - pos.ins().jump(done, &[rem]); + let (quot, rem) = pos.ins().x86_sdivmodx(x, xhi, y); + let divres = if is_srem { rem } else { quot }; + pos.ins().jump(done, &[divres]); - // Now deal with the -1 divisor which always yields a 0 remainder. + // Now deal with the -1 divisor case. pos.insert_ebb(minus_one); - let zero = pos.ins().iconst(ty, 0); + let m1_result = if is_srem { + // x % -1 = 0. + pos.ins().iconst(ty, 0) + } else { + // Explicitly check for overflow: Trap when x == INT_MIN. + debug_assert!(avoid_div_traps, "Native trapping divide handled above"); + let f = pos.ins().ifcmp_imm(x, -1 << (ty.lane_bits() - 1)); + pos.ins().trapif( + IntCC::Equal, + f, + ir::TrapCode::IntegerOverflow, + ); + // x / -1 = -x. + pos.ins().irsub_imm(x, 0) + }; // Recycle the original instruction as a jump. - pos.func.dfg.replace(inst).jump(done, &[zero]); + pos.func.dfg.replace(inst).jump(done, &[m1_result]); // Finally insert a label for the completion. pos.next_inst(); @@ -73,6 +105,49 @@ fn expand_srem( cfg.recompute_ebb(pos.func, done); } +/// Expand the `udiv` and `urem` instructions using `x86_udivmodx`. +fn expand_udivrem( + inst: ir::Inst, + func: &mut ir::Function, + _cfg: &mut ControlFlowGraph, + isa: &isa::TargetIsa, +) { + + let (x, y, is_urem) = match func.dfg[inst] { + ir::InstructionData::Binary { + opcode: ir::Opcode::Udiv, + args, + } => (args[0], args[1], false), + ir::InstructionData::Binary { + opcode: ir::Opcode::Urem, + args, + } => (args[0], args[1], true), + _ => panic!("Need udiv/urem: {}", func.dfg.display_inst(inst, None)), + }; + let avoid_div_traps = isa.flags().avoid_div_traps(); + let result = func.dfg.first_result(inst); + let ty = func.dfg.value_type(result); + + let mut pos = FuncCursor::new(func).at_inst(inst); + pos.use_srcloc(inst); + pos.func.dfg.clear_results(inst); + + // Put in an explicit division-by-zero trap if the environment requires it. + if avoid_div_traps { + pos.ins().trapz(y, ir::TrapCode::IntegerDivisionByZero); + } + + // Now it is safe to execute the `x86_udivmodx` instruction. + let xhi = pos.ins().iconst(ty, 0); + let reuse = if is_urem { + [None, Some(result)] + } else { + [Some(result), None] + }; + pos.ins().with_results(reuse).x86_udivmodx(x, xhi, y); + pos.remove_inst(); +} + /// Expand the `fmin` and `fmax` instructions using the Intel `x86_fmin` and `x86_fmax` /// instructions. fn expand_minmax( diff --git a/lib/cretonne/src/settings.rs b/lib/cretonne/src/settings.rs index cc9f91f452..36a48e04b4 100644 --- a/lib/cretonne/src/settings.rs +++ b/lib/cretonne/src/settings.rs @@ -360,6 +360,7 @@ mod tests { is_64bit = false\n\ is_pic = false\n\ return_at_end = false\n\ + avoid_div_traps = false\n\ is_compressed = false\n\ enable_float = true\n\ enable_simd = true\n\