Add an avoid_div_traps setting.

This enables code generation that never causes a SIGFPE signal to be raised from a division instruction. Instead, division and remainder calculations are protected by explicit traps.
2018-02-16 11:23:37 -08:00
parent ed24320eda
commit a9e799debb
6 changed files with 247 additions and 50 deletions
--- a/cranelift/filetests/isa/intel/legalize-div-traps.cton
+++ b/cranelift/filetests/isa/intel/legalize-div-traps.cton
@@ -0,0 +1,71 @@
 ; Test the division legalizations.
 test legalizer
 set is_64bit
 ; See also legalize-div.cton.
 set avoid_div_traps=1
 isa intel
 ; regex: V=v\d+
 ; regex: EBB=ebb\d+
 function %udiv(i64, i64) -> i64 {
 ebb0(v0: i64, v1: i64):
    ; check: $ebb0(
    v2 = udiv v0, v1
    ; nextln: $(fz=$V) = ifcmp_imm $v1, 0
    ; nextln: trapif eq $fz, int_divz
    ; nextln: $(hi=$V) = iconst.i64 0
    ; nextln: $(d=$V), $(r=$V) = x86_udivmodx $v0, $hi, $v1
    return v2
    ; nextln: return $d
 }
 function %urem(i64, i64) -> i64 {
 ebb0(v0: i64, v1: i64):
    ; check: $ebb0(
    v2 = urem v0, v1
    ; nextln: $(fz=$V) = ifcmp_imm $v1, 0
    ; nextln: trapif eq $fz, int_divz
    ; nextln: $(hi=$V) = iconst.i64 0
    ; nextln: $(d=$V), $(r=$V) = x86_udivmodx $v0, $hi, $v1
    return v2
    ; nextln: return $r
 }
 function %sdiv(i64, i64) -> i64 {
 ebb0(v0: i64, v1: i64):
    ; check: $ebb0(
    v2 = sdiv v0, v1
    ; nextln: $(fm1=$V) = ifcmp_imm $v1, -1
    ; nextln: brif eq $fm1, $(m1=$EBB)
    ; nextln: $(fz=$V) = ifcmp_imm $v1, 0
    ; nextln: trapif eq $fz, int_divz
    ; check: $(hi=$V) = sshr
    ; nextln: $(q=$V), $(r=$V) = x86_sdivmodx $v0, $hi, $v1
    ; nextln: jump $(done=$EBB)($q)
    ; check: $m1:
    ; nextln: $(fm=$V) = ifcmp_imm.i64 $v0, 0x8000_0000_0000_0000
    ; nextln: trapif eq $fm, int_ovf
    ; check: $done($v2: i64):
    return v2
    ; nextln: return $v2
 }
 ; The srem expansion needs to special-case x % -1 since x86_sdivmodx traps on INT_MIN/-1.
 ; TODO: Add more explicit pattern matching once we've cleaned up the ifcmp+brif pattern.
 function %srem(i64, i64) -> i64 {
 ebb0(v0: i64, v1: i64):
    ; check: $ebb0(
    v2 = srem v0, v1
    ; nextln: $(fm1=$V) = ifcmp_imm $v1, -1
    ; nextln: brif eq $fm1, $(m1=$EBB)
    ; check: $(hi=$V) = sshr
    ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx $v0, $hi, $v1
    ; nextln: jump $(done=$EBB)($r)
    ; check: $m1:
    ; nextln: $(zero=$V) = iconst.i64 0
    ; nextln: jump $(done=$EBB)($zero)
    ; check: $done($v2: i64):
    return v2
    ; nextln: return $v2
 }
--- a/cranelift/filetests/isa/intel/legalize-div.cton
+++ b/cranelift/filetests/isa/intel/legalize-div.cton
@@ -0,0 +1,58 @@
 ; Test the division legalizations.
 test legalizer
 set is_64bit
 ; See also legalize-div-traps.cton.
 set avoid_div_traps=0
 isa intel
 ; regex: V=v\d+
 ; regex: EBB=ebb\d+
 function %udiv(i64, i64) -> i64 {
 ebb0(v0: i64, v1: i64):
    ; check: $ebb0(
    v2 = udiv v0, v1
    ; nextln: $(hi=$V) = iconst.i64 0
    ; nextln: $(d=$V), $(r=$V) = x86_udivmodx $v0, $hi, $v1
    return v2
    ; nextln: return $d
 }
 function %urem(i64, i64) -> i64 {
 ebb0(v0: i64, v1: i64):
    ; check: $ebb0(
    v2 = urem v0, v1
    ; nextln: $(hi=$V) = iconst.i64 0
    ; nextln: $(d=$V), $(r=$V) = x86_udivmodx $v0, $hi, $v1
    return v2
    ; nextln: return $r
 }
 function %sdiv(i64, i64) -> i64 {
 ebb0(v0: i64, v1: i64):
    ; check: $ebb0(
    v2 = sdiv v0, v1
    ; check: $(hi=$V) = sshr
    ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx $v0, $hi, $v1
    return v2
    ; nextln: return $d
 }
 ; The srem expansion needs to special-case x % -1 since x86_sdivmodx traps on INT_MIN/-1.
 ; TODO: Add more explicit pattern matching once we've cleaned up the ifcmp+brif pattern.
 function %srem(i64, i64) -> i64 {
 ebb0(v0: i64, v1: i64):
    ; check: $ebb0(
    v2 = srem v0, v1
    ; nextln: $(fm1=$V) = ifcmp_imm $v1, -1
    ; nextln: brif eq $fm1, $(m1=$EBB)
    ; check: $(hi=$V) = sshr
    ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx $v0, $hi, $v1
    ; nextln: jump $(done=$EBB)($r)
    ; check: $m1:
    ; nextln: $(zero=$V) = iconst.i64 0
    ; nextln: jump $(done=$EBB)($zero)
    ; check: $done($v2: i64):
    return v2
    ; nextln: return $v2
 }
--- a/lib/cretonne/meta/base/settings.py
+++ b/lib/cretonne/meta/base/settings.py
@@ -41,6 +41,18 @@ return_at_end = BoolSetting(
        instruction at the end.
        """)
 avoid_div_traps = BoolSetting(
        """
        Generate explicit checks around native division instructions to avoid
        their trapping.
        This is primarily used by SpiderMonkey which doesn't install a signal
        handler for SIGFPE, but expects a SIGILL trap for division by zero.
        On ISAs like ARM where the native division instructions don't trap,
        this setting has no effect - explicit checks are always inserted.
        """)
 is_compressed = BoolSetting("Enable compressed instructions")
 enable_float = BoolSetting(
--- a/lib/cretonne/meta/isa/intel/legalize.py
+++ b/lib/cretonne/meta/isa/intel/legalize.py
@@ -5,7 +5,6 @@ from __future__ import absolute_import
 from cdsl.ast import Var
 from cdsl.xform import Rtl, XFormGroup
 from base.immediates import imm64, intcc, floatcc
 from base.types import i32, i64
 from base import legalize as shared
 from base import instructions as insts
 from . import instructions as x86
@@ -31,31 +30,12 @@ a2 = Var('a2')
 #
 # Division and remainder.
 #
 intel_expand.legalize(
        a << insts.udiv(x, y),
        Rtl(
            xhi << insts.iconst(imm64(0)),
            (a, dead) << x86.udivmodx(x, xhi, y)
        ))
 intel_expand.legalize(
        a << insts.urem(x, y),
        Rtl(
            xhi << insts.iconst(imm64(0)),
            (dead, a) << x86.udivmodx(x, xhi, y)
        ))
 for ty in [i32, i64]:
    intel_expand.legalize(
            a << insts.sdiv.bind(ty)(x, y),
            Rtl(
                xhi << insts.sshr_imm(x, imm64(ty.lane_bits() - 1)),
                (a, dead) << x86.sdivmodx(x, xhi, y)
            ))
 # The srem expansion requires custom code because srem INT_MIN, -1 is not
-# allowed to trap.
+# allowed to trap. The other ops need to check avoid_div_traps.
-intel_expand.custom_legalize(insts.srem, 'expand_srem')
+intel_expand.custom_legalize(insts.sdiv, 'expand_sdivrem')
 intel_expand.custom_legalize(insts.srem, 'expand_sdivrem')
 intel_expand.custom_legalize(insts.udiv, 'expand_udivrem')
 intel_expand.custom_legalize(insts.urem, 'expand_udivrem')
 # Floating point condition codes.
 #
--- a/lib/cretonne/src/isa/intel/enc_tables.rs
+++ b/lib/cretonne/src/isa/intel/enc_tables.rs
@@ -1,9 +1,9 @@
 //! Encoding tables for Intel ISAs.
 use bitset::BitSet;
 use cursor::{Cursor, FuncCursor};
 use flowgraph::ControlFlowGraph;
 use ir::{self, InstBuilder};
 use ir::condcodes::IntCC;
 use isa::constraints::*;
 use isa::enc_tables::*;
 use isa::encoding::RecipeSizing;
@@ -14,55 +14,87 @@ use super::registers::*;
 include!(concat!(env!("OUT_DIR"), "/encoding-intel.rs"));
 include!(concat!(env!("OUT_DIR"), "/legalize-intel.rs"));
-/// Expand the `srem` instruction using `x86_sdivmodx`.
+/// Expand the `sdiv` and `srem` instructions using `x86_sdivmodx`.
-fn expand_srem(
+fn expand_sdivrem(
    inst: ir::Inst,
    func: &mut ir::Function,
    cfg: &mut ControlFlowGraph,
-    _isa: &isa::TargetIsa,
+    isa: &isa::TargetIsa,
 ) {
    use ir::condcodes::IntCC;
-    let (x, y) = match func.dfg[inst] {
+    let (x, y, is_srem) = match func.dfg[inst] {
        ir::InstructionData::Binary {
            opcode: ir::Opcode::Sdiv,
            args,
        } => (args[0], args[1], false),
        ir::InstructionData::Binary {
            opcode: ir::Opcode::Srem,
            args,
-        } => (args[0], args[1]),
+        } => (args[0], args[1], true),
-        _ => panic!("Need srem: {}", func.dfg.display_inst(inst, None)),
+        _ => panic!("Need sdiv/srem: {}", func.dfg.display_inst(inst, None)),
    };
    let avoid_div_traps = isa.flags().avoid_div_traps();
    let old_ebb = func.layout.pp_ebb(inst);
    // EBB handling the -1 divisor case.
    let minus_one = func.dfg.make_ebb();
    // Final EBB with one argument representing the final result value.
    let done = func.dfg.make_ebb();
    // Move the `inst` result value onto the `done` EBB.
    let result = func.dfg.first_result(inst);
    let ty = func.dfg.value_type(result);
    func.dfg.clear_results(inst);
    func.dfg.attach_ebb_param(done, result);
    let mut pos = FuncCursor::new(func).at_inst(inst);
    pos.use_srcloc(inst);
    pos.func.dfg.clear_results(inst);
    // If we can tolerate native division traps, sdiv doesn't need branching.
    if !avoid_div_traps && !is_srem {
        let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1);
        pos.ins().with_result(result).x86_sdivmodx(x, xhi, y);
        pos.remove_inst();
        return;
    }
    // EBB handling the -1 divisor case.
    let minus_one = pos.func.dfg.make_ebb();
    // Final EBB with one argument representing the final result value.
    let done = pos.func.dfg.make_ebb();
    // Move the `inst` result value onto the `done` EBB.
    pos.func.dfg.attach_ebb_param(done, result);
    // Start by checking for a -1 divisor which needs to be handled specially.
-    let is_m1 = pos.ins().icmp_imm(IntCC::Equal, y, -1);
+    let is_m1 = pos.ins().ifcmp_imm(y, -1);
-    pos.ins().brnz(is_m1, minus_one, &[]);
+    pos.ins().brif(IntCC::Equal, is_m1, minus_one, &[]);
    // Put in an explicit division-by-zero trap if the environment requires it.
    if avoid_div_traps {
        pos.ins().trapz(y, ir::TrapCode::IntegerDivisionByZero);
    }
    // Now it is safe to execute the `x86_sdivmodx` instruction which will still trap on division
    // by zero.
    let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1);
-    let (_qout, rem) = pos.ins().x86_sdivmodx(x, xhi, y);
+    let (quot, rem) = pos.ins().x86_sdivmodx(x, xhi, y);
-    pos.ins().jump(done, &[rem]);
+    let divres = if is_srem { rem } else { quot };
    pos.ins().jump(done, &[divres]);
-    // Now deal with the -1 divisor which always yields a 0 remainder.
+    // Now deal with the -1 divisor case.
    pos.insert_ebb(minus_one);
-    let zero = pos.ins().iconst(ty, 0);
+    let m1_result = if is_srem {
        // x % -1 = 0.
        pos.ins().iconst(ty, 0)
    } else {
        // Explicitly check for overflow: Trap when x == INT_MIN.
        debug_assert!(avoid_div_traps, "Native trapping divide handled above");
        let f = pos.ins().ifcmp_imm(x, -1 << (ty.lane_bits() - 1));
        pos.ins().trapif(
            IntCC::Equal,
            f,
            ir::TrapCode::IntegerOverflow,
        );
        // x / -1 = -x.
        pos.ins().irsub_imm(x, 0)
    };
    // Recycle the original instruction as a jump.
-    pos.func.dfg.replace(inst).jump(done, &[zero]);
+    pos.func.dfg.replace(inst).jump(done, &[m1_result]);
    // Finally insert a label for the completion.
    pos.next_inst();
@@ -73,6 +105,49 @@ fn expand_srem(
    cfg.recompute_ebb(pos.func, done);
 }
 /// Expand the `udiv` and `urem` instructions using `x86_udivmodx`.
 fn expand_udivrem(
    inst: ir::Inst,
    func: &mut ir::Function,
    _cfg: &mut ControlFlowGraph,
    isa: &isa::TargetIsa,
 ) {
    let (x, y, is_urem) = match func.dfg[inst] {
        ir::InstructionData::Binary {
            opcode: ir::Opcode::Udiv,
            args,
        } => (args[0], args[1], false),
        ir::InstructionData::Binary {
            opcode: ir::Opcode::Urem,
            args,
        } => (args[0], args[1], true),
        _ => panic!("Need udiv/urem: {}", func.dfg.display_inst(inst, None)),
    };
    let avoid_div_traps = isa.flags().avoid_div_traps();
    let result = func.dfg.first_result(inst);
    let ty = func.dfg.value_type(result);
    let mut pos = FuncCursor::new(func).at_inst(inst);
    pos.use_srcloc(inst);
    pos.func.dfg.clear_results(inst);
    // Put in an explicit division-by-zero trap if the environment requires it.
    if avoid_div_traps {
        pos.ins().trapz(y, ir::TrapCode::IntegerDivisionByZero);
    }
    // Now it is safe to execute the `x86_udivmodx` instruction.
    let xhi = pos.ins().iconst(ty, 0);
    let reuse = if is_urem {
        [None, Some(result)]
    } else {
        [Some(result), None]
    };
    pos.ins().with_results(reuse).x86_udivmodx(x, xhi, y);
    pos.remove_inst();
 }
 /// Expand the `fmin` and `fmax` instructions using the Intel `x86_fmin` and `x86_fmax`
 /// instructions.
 fn expand_minmax(
--- a/lib/cretonne/src/settings.rs
+++ b/lib/cretonne/src/settings.rs
@@ -360,6 +360,7 @@ mod tests {
                    is_64bit = false\n\
                    is_pic = false\n\
                    return_at_end = false\n\
                    avoid_div_traps = false\n\
                    is_compressed = false\n\
                    enable_float = true\n\
                    enable_simd = true\n\