Add an avoid_div_traps setting.

This enables code generation that never causes a SIGFPE signal to be raised from a division instruction. Instead, division and remainder calculations are protected by explicit traps.
2018-02-16 11:23:37 -08:00
parent ed24320eda
commit a9e799debb
6 changed files with 247 additions and 50 deletions
--- a/cranelift/filetests/isa/intel/legalize-div-traps.cton
+++ b/cranelift/filetests/isa/intel/legalize-div-traps.cton
@@ -0,0 +1,71 @@
+; Test the division legalizations.
+test legalizer
+set is_64bit
+; See also legalize-div.cton.
+set avoid_div_traps=1
+isa intel
+
+; regex: V=v\d+
+; regex: EBB=ebb\d+
+
+function %udiv(i64, i64) -> i64 {
+ebb0(v0: i64, v1: i64):
+    ; check: $ebb0(
+    v2 = udiv v0, v1
+    ; nextln: $(fz=$V) = ifcmp_imm $v1, 0
+    ; nextln: trapif eq $fz, int_divz
+    ; nextln: $(hi=$V) = iconst.i64 0
+    ; nextln: $(d=$V), $(r=$V) = x86_udivmodx $v0, $hi, $v1
+    return v2
+    ; nextln: return $d
+}
+
+function %urem(i64, i64) -> i64 {
+ebb0(v0: i64, v1: i64):
+    ; check: $ebb0(
+    v2 = urem v0, v1
+    ; nextln: $(fz=$V) = ifcmp_imm $v1, 0
+    ; nextln: trapif eq $fz, int_divz
+    ; nextln: $(hi=$V) = iconst.i64 0
+    ; nextln: $(d=$V), $(r=$V) = x86_udivmodx $v0, $hi, $v1
+    return v2
+    ; nextln: return $r
+}
+
+function %sdiv(i64, i64) -> i64 {
+ebb0(v0: i64, v1: i64):
+    ; check: $ebb0(
+    v2 = sdiv v0, v1
+    ; nextln: $(fm1=$V) = ifcmp_imm $v1, -1
+    ; nextln: brif eq $fm1, $(m1=$EBB)
+    ; nextln: $(fz=$V) = ifcmp_imm $v1, 0
+    ; nextln: trapif eq $fz, int_divz
+    ; check: $(hi=$V) = sshr
+    ; nextln: $(q=$V), $(r=$V) = x86_sdivmodx $v0, $hi, $v1
+    ; nextln: jump $(done=$EBB)($q)
+    ; check: $m1:
+    ; nextln: $(fm=$V) = ifcmp_imm.i64 $v0, 0x8000_0000_0000_0000
+    ; nextln: trapif eq $fm, int_ovf
+    ; check: $done($v2: i64):
+    return v2
+    ; nextln: return $v2
+}
+
+; The srem expansion needs to special-case x % -1 since x86_sdivmodx traps on INT_MIN/-1.
+; TODO: Add more explicit pattern matching once we've cleaned up the ifcmp+brif pattern.
+function %srem(i64, i64) -> i64 {
+ebb0(v0: i64, v1: i64):
+    ; check: $ebb0(
+    v2 = srem v0, v1
+    ; nextln: $(fm1=$V) = ifcmp_imm $v1, -1
+    ; nextln: brif eq $fm1, $(m1=$EBB)
+    ; check: $(hi=$V) = sshr
+    ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx $v0, $hi, $v1
+    ; nextln: jump $(done=$EBB)($r)
+    ; check: $m1:
+    ; nextln: $(zero=$V) = iconst.i64 0
+    ; nextln: jump $(done=$EBB)($zero)
+    ; check: $done($v2: i64):
+    return v2
+    ; nextln: return $v2
+}
--- a/cranelift/filetests/isa/intel/legalize-div.cton
+++ b/cranelift/filetests/isa/intel/legalize-div.cton
@@ -0,0 +1,58 @@
+; Test the division legalizations.
+test legalizer
+set is_64bit
+; See also legalize-div-traps.cton.
+set avoid_div_traps=0
+isa intel
+
+; regex: V=v\d+
+; regex: EBB=ebb\d+
+
+function %udiv(i64, i64) -> i64 {
+ebb0(v0: i64, v1: i64):
+    ; check: $ebb0(
+    v2 = udiv v0, v1
+    ; nextln: $(hi=$V) = iconst.i64 0
+    ; nextln: $(d=$V), $(r=$V) = x86_udivmodx $v0, $hi, $v1
+    return v2
+    ; nextln: return $d
+}
+
+function %urem(i64, i64) -> i64 {
+ebb0(v0: i64, v1: i64):
+    ; check: $ebb0(
+    v2 = urem v0, v1
+    ; nextln: $(hi=$V) = iconst.i64 0
+    ; nextln: $(d=$V), $(r=$V) = x86_udivmodx $v0, $hi, $v1
+    return v2
+    ; nextln: return $r
+}
+
+function %sdiv(i64, i64) -> i64 {
+ebb0(v0: i64, v1: i64):
+    ; check: $ebb0(
+    v2 = sdiv v0, v1
+    ; check: $(hi=$V) = sshr
+    ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx $v0, $hi, $v1
+    return v2
+    ; nextln: return $d
+}
+
+; The srem expansion needs to special-case x % -1 since x86_sdivmodx traps on INT_MIN/-1.
+; TODO: Add more explicit pattern matching once we've cleaned up the ifcmp+brif pattern.
+function %srem(i64, i64) -> i64 {
+ebb0(v0: i64, v1: i64):
+    ; check: $ebb0(
+    v2 = srem v0, v1
+    ; nextln: $(fm1=$V) = ifcmp_imm $v1, -1
+    ; nextln: brif eq $fm1, $(m1=$EBB)
+    ; check: $(hi=$V) = sshr
+    ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx $v0, $hi, $v1
+    ; nextln: jump $(done=$EBB)($r)
+    ; check: $m1:
+    ; nextln: $(zero=$V) = iconst.i64 0
+    ; nextln: jump $(done=$EBB)($zero)
+    ; check: $done($v2: i64):
+    return v2
+    ; nextln: return $v2
+}
--- a/lib/cretonne/meta/base/settings.py
+++ b/lib/cretonne/meta/base/settings.py
@@ -41,6 +41,18 @@ return_at_end = BoolSetting(
        instruction at the end.
        """)

+avoid_div_traps = BoolSetting(
+        """
+        Generate explicit checks around native division instructions to avoid
+        their trapping.
+
+        This is primarily used by SpiderMonkey which doesn't install a signal
+        handler for SIGFPE, but expects a SIGILL trap for division by zero.
+
+        On ISAs like ARM where the native division instructions don't trap,
+        this setting has no effect - explicit checks are always inserted.
+        """)
+
 is_compressed = BoolSetting("Enable compressed instructions")

 enable_float = BoolSetting(
--- a/lib/cretonne/meta/isa/intel/legalize.py
+++ b/lib/cretonne/meta/isa/intel/legalize.py
@@ -5,7 +5,6 @@ from __future__ import absolute_import
 from cdsl.ast import Var
 from cdsl.xform import Rtl, XFormGroup
 from base.immediates import imm64, intcc, floatcc
-from base.types import i32, i64
 from base import legalize as shared
 from base import instructions as insts
 from . import instructions as x86
@@ -31,31 +30,12 @@ a2 = Var('a2')
 #
 # Division and remainder.
 #
-intel_expand.legalize(
-        a << insts.udiv(x, y),
-        Rtl(
-            xhi << insts.iconst(imm64(0)),
-            (a, dead) << x86.udivmodx(x, xhi, y)
-        ))
-
-intel_expand.legalize(
-        a << insts.urem(x, y),
-        Rtl(
-            xhi << insts.iconst(imm64(0)),
-            (dead, a) << x86.udivmodx(x, xhi, y)
-        ))
-
-for ty in [i32, i64]:
-    intel_expand.legalize(
-            a << insts.sdiv.bind(ty)(x, y),
-            Rtl(
-                xhi << insts.sshr_imm(x, imm64(ty.lane_bits() - 1)),
-                (a, dead) << x86.sdivmodx(x, xhi, y)
-            ))
-
 # The srem expansion requires custom code because srem INT_MIN, -1 is not
-# allowed to trap.
-intel_expand.custom_legalize(insts.srem, 'expand_srem')
+# allowed to trap. The other ops need to check avoid_div_traps.
+intel_expand.custom_legalize(insts.sdiv, 'expand_sdivrem')
+intel_expand.custom_legalize(insts.srem, 'expand_sdivrem')
+intel_expand.custom_legalize(insts.udiv, 'expand_udivrem')
+intel_expand.custom_legalize(insts.urem, 'expand_udivrem')

 # Floating point condition codes.
 #
--- a/lib/cretonne/src/isa/intel/enc_tables.rs
+++ b/lib/cretonne/src/isa/intel/enc_tables.rs
@@ -1,9 +1,9 @@
 //! Encoding tables for Intel ISAs.

-use bitset::BitSet;
 use cursor::{Cursor, FuncCursor};
 use flowgraph::ControlFlowGraph;
 use ir::{self, InstBuilder};
+use ir::condcodes::IntCC;
 use isa::constraints::*;
 use isa::enc_tables::*;
 use isa::encoding::RecipeSizing;
@@ -14,55 +14,87 @@ use super::registers::*;
 include!(concat!(env!("OUT_DIR"), "/encoding-intel.rs"));
 include!(concat!(env!("OUT_DIR"), "/legalize-intel.rs"));

-/// Expand the `srem` instruction using `x86_sdivmodx`.
-fn expand_srem(
+/// Expand the `sdiv` and `srem` instructions using `x86_sdivmodx`.
+fn expand_sdivrem(
    inst: ir::Inst,
    func: &mut ir::Function,
    cfg: &mut ControlFlowGraph,
-    _isa: &isa::TargetIsa,
+    isa: &isa::TargetIsa,
 ) {
-    use ir::condcodes::IntCC;

-    let (x, y) = match func.dfg[inst] {
+    let (x, y, is_srem) = match func.dfg[inst] {
+        ir::InstructionData::Binary {
+            opcode: ir::Opcode::Sdiv,
+            args,
+        } => (args[0], args[1], false),
        ir::InstructionData::Binary {
            opcode: ir::Opcode::Srem,
            args,
-        } => (args[0], args[1]),
-        _ => panic!("Need srem: {}", func.dfg.display_inst(inst, None)),
+        } => (args[0], args[1], true),
+        _ => panic!("Need sdiv/srem: {}", func.dfg.display_inst(inst, None)),
    };
+    let avoid_div_traps = isa.flags().avoid_div_traps();
    let old_ebb = func.layout.pp_ebb(inst);
-
-    // EBB handling the -1 divisor case.
-    let minus_one = func.dfg.make_ebb();
-
-    // Final EBB with one argument representing the final result value.
-    let done = func.dfg.make_ebb();
-
-    // Move the `inst` result value onto the `done` EBB.
    let result = func.dfg.first_result(inst);
    let ty = func.dfg.value_type(result);
-    func.dfg.clear_results(inst);
-    func.dfg.attach_ebb_param(done, result);

    let mut pos = FuncCursor::new(func).at_inst(inst);
    pos.use_srcloc(inst);
+    pos.func.dfg.clear_results(inst);
+
+    // If we can tolerate native division traps, sdiv doesn't need branching.
+    if !avoid_div_traps && !is_srem {
+        let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1);
+        pos.ins().with_result(result).x86_sdivmodx(x, xhi, y);
+        pos.remove_inst();
+        return;
+    }
+
+    // EBB handling the -1 divisor case.
+    let minus_one = pos.func.dfg.make_ebb();
+
+    // Final EBB with one argument representing the final result value.
+    let done = pos.func.dfg.make_ebb();
+
+    // Move the `inst` result value onto the `done` EBB.
+    pos.func.dfg.attach_ebb_param(done, result);

    // Start by checking for a -1 divisor which needs to be handled specially.
-    let is_m1 = pos.ins().icmp_imm(IntCC::Equal, y, -1);
-    pos.ins().brnz(is_m1, minus_one, &[]);
+    let is_m1 = pos.ins().ifcmp_imm(y, -1);
+    pos.ins().brif(IntCC::Equal, is_m1, minus_one, &[]);
+
+    // Put in an explicit division-by-zero trap if the environment requires it.
+    if avoid_div_traps {
+        pos.ins().trapz(y, ir::TrapCode::IntegerDivisionByZero);
+    }

    // Now it is safe to execute the `x86_sdivmodx` instruction which will still trap on division
    // by zero.
    let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1);
-    let (_qout, rem) = pos.ins().x86_sdivmodx(x, xhi, y);
-    pos.ins().jump(done, &[rem]);
+    let (quot, rem) = pos.ins().x86_sdivmodx(x, xhi, y);
+    let divres = if is_srem { rem } else { quot };
+    pos.ins().jump(done, &[divres]);

-    // Now deal with the -1 divisor which always yields a 0 remainder.
+    // Now deal with the -1 divisor case.
    pos.insert_ebb(minus_one);
-    let zero = pos.ins().iconst(ty, 0);
+    let m1_result = if is_srem {
+        // x % -1 = 0.
+        pos.ins().iconst(ty, 0)
+    } else {
+        // Explicitly check for overflow: Trap when x == INT_MIN.
+        debug_assert!(avoid_div_traps, "Native trapping divide handled above");
+        let f = pos.ins().ifcmp_imm(x, -1 << (ty.lane_bits() - 1));
+        pos.ins().trapif(
+            IntCC::Equal,
+            f,
+            ir::TrapCode::IntegerOverflow,
+        );
+        // x / -1 = -x.
+        pos.ins().irsub_imm(x, 0)
+    };

    // Recycle the original instruction as a jump.
-    pos.func.dfg.replace(inst).jump(done, &[zero]);
+    pos.func.dfg.replace(inst).jump(done, &[m1_result]);

    // Finally insert a label for the completion.
    pos.next_inst();
@@ -73,6 +105,49 @@ fn expand_srem(
    cfg.recompute_ebb(pos.func, done);
 }

+/// Expand the `udiv` and `urem` instructions using `x86_udivmodx`.
+fn expand_udivrem(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    _cfg: &mut ControlFlowGraph,
+    isa: &isa::TargetIsa,
+) {
+
+    let (x, y, is_urem) = match func.dfg[inst] {
+        ir::InstructionData::Binary {
+            opcode: ir::Opcode::Udiv,
+            args,
+        } => (args[0], args[1], false),
+        ir::InstructionData::Binary {
+            opcode: ir::Opcode::Urem,
+            args,
+        } => (args[0], args[1], true),
+        _ => panic!("Need udiv/urem: {}", func.dfg.display_inst(inst, None)),
+    };
+    let avoid_div_traps = isa.flags().avoid_div_traps();
+    let result = func.dfg.first_result(inst);
+    let ty = func.dfg.value_type(result);
+
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+    pos.func.dfg.clear_results(inst);
+
+    // Put in an explicit division-by-zero trap if the environment requires it.
+    if avoid_div_traps {
+        pos.ins().trapz(y, ir::TrapCode::IntegerDivisionByZero);
+    }
+
+    // Now it is safe to execute the `x86_udivmodx` instruction.
+    let xhi = pos.ins().iconst(ty, 0);
+    let reuse = if is_urem {
+        [None, Some(result)]
+    } else {
+        [Some(result), None]
+    };
+    pos.ins().with_results(reuse).x86_udivmodx(x, xhi, y);
+    pos.remove_inst();
+}
+
 /// Expand the `fmin` and `fmax` instructions using the Intel `x86_fmin` and `x86_fmax`
 /// instructions.
 fn expand_minmax(
--- a/lib/cretonne/src/settings.rs
+++ b/lib/cretonne/src/settings.rs
@@ -360,6 +360,7 @@ mod tests {
                    is_64bit = false\n\
                    is_pic = false\n\
                    return_at_end = false\n\
+                    avoid_div_traps = false\n\
                    is_compressed = false\n\
                    enable_float = true\n\
                    enable_simd = true\n\