Add an avoid_div_traps setting.
This enables code generation that never causes a SIGFPE signal to be raised from a division instruction. Instead, division and remainder calculations are protected by explicit traps.
This commit is contained in:
71
cranelift/filetests/isa/intel/legalize-div-traps.cton
Normal file
71
cranelift/filetests/isa/intel/legalize-div-traps.cton
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
; Test the division legalizations.
|
||||||
|
test legalizer
|
||||||
|
set is_64bit
|
||||||
|
; See also legalize-div.cton.
|
||||||
|
set avoid_div_traps=1
|
||||||
|
isa intel
|
||||||
|
|
||||||
|
; regex: V=v\d+
|
||||||
|
; regex: EBB=ebb\d+
|
||||||
|
|
||||||
|
function %udiv(i64, i64) -> i64 {
|
||||||
|
ebb0(v0: i64, v1: i64):
|
||||||
|
; check: $ebb0(
|
||||||
|
v2 = udiv v0, v1
|
||||||
|
; nextln: $(fz=$V) = ifcmp_imm $v1, 0
|
||||||
|
; nextln: trapif eq $fz, int_divz
|
||||||
|
; nextln: $(hi=$V) = iconst.i64 0
|
||||||
|
; nextln: $(d=$V), $(r=$V) = x86_udivmodx $v0, $hi, $v1
|
||||||
|
return v2
|
||||||
|
; nextln: return $d
|
||||||
|
}
|
||||||
|
|
||||||
|
function %urem(i64, i64) -> i64 {
|
||||||
|
ebb0(v0: i64, v1: i64):
|
||||||
|
; check: $ebb0(
|
||||||
|
v2 = urem v0, v1
|
||||||
|
; nextln: $(fz=$V) = ifcmp_imm $v1, 0
|
||||||
|
; nextln: trapif eq $fz, int_divz
|
||||||
|
; nextln: $(hi=$V) = iconst.i64 0
|
||||||
|
; nextln: $(d=$V), $(r=$V) = x86_udivmodx $v0, $hi, $v1
|
||||||
|
return v2
|
||||||
|
; nextln: return $r
|
||||||
|
}
|
||||||
|
|
||||||
|
function %sdiv(i64, i64) -> i64 {
|
||||||
|
ebb0(v0: i64, v1: i64):
|
||||||
|
; check: $ebb0(
|
||||||
|
v2 = sdiv v0, v1
|
||||||
|
; nextln: $(fm1=$V) = ifcmp_imm $v1, -1
|
||||||
|
; nextln: brif eq $fm1, $(m1=$EBB)
|
||||||
|
; nextln: $(fz=$V) = ifcmp_imm $v1, 0
|
||||||
|
; nextln: trapif eq $fz, int_divz
|
||||||
|
; check: $(hi=$V) = sshr
|
||||||
|
; nextln: $(q=$V), $(r=$V) = x86_sdivmodx $v0, $hi, $v1
|
||||||
|
; nextln: jump $(done=$EBB)($q)
|
||||||
|
; check: $m1:
|
||||||
|
; nextln: $(fm=$V) = ifcmp_imm.i64 $v0, 0x8000_0000_0000_0000
|
||||||
|
; nextln: trapif eq $fm, int_ovf
|
||||||
|
; check: $done($v2: i64):
|
||||||
|
return v2
|
||||||
|
; nextln: return $v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; The srem expansion needs to special-case x % -1 since x86_sdivmodx traps on INT_MIN/-1.
|
||||||
|
; TODO: Add more explicit pattern matching once we've cleaned up the ifcmp+brif pattern.
|
||||||
|
function %srem(i64, i64) -> i64 {
|
||||||
|
ebb0(v0: i64, v1: i64):
|
||||||
|
; check: $ebb0(
|
||||||
|
v2 = srem v0, v1
|
||||||
|
; nextln: $(fm1=$V) = ifcmp_imm $v1, -1
|
||||||
|
; nextln: brif eq $fm1, $(m1=$EBB)
|
||||||
|
; check: $(hi=$V) = sshr
|
||||||
|
; nextln: $(d=$V), $(r=$V) = x86_sdivmodx $v0, $hi, $v1
|
||||||
|
; nextln: jump $(done=$EBB)($r)
|
||||||
|
; check: $m1:
|
||||||
|
; nextln: $(zero=$V) = iconst.i64 0
|
||||||
|
; nextln: jump $(done=$EBB)($zero)
|
||||||
|
; check: $done($v2: i64):
|
||||||
|
return v2
|
||||||
|
; nextln: return $v2
|
||||||
|
}
|
||||||
58
cranelift/filetests/isa/intel/legalize-div.cton
Normal file
58
cranelift/filetests/isa/intel/legalize-div.cton
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
; Test the division legalizations.
|
||||||
|
test legalizer
|
||||||
|
set is_64bit
|
||||||
|
; See also legalize-div-traps.cton.
|
||||||
|
set avoid_div_traps=0
|
||||||
|
isa intel
|
||||||
|
|
||||||
|
; regex: V=v\d+
|
||||||
|
; regex: EBB=ebb\d+
|
||||||
|
|
||||||
|
function %udiv(i64, i64) -> i64 {
|
||||||
|
ebb0(v0: i64, v1: i64):
|
||||||
|
; check: $ebb0(
|
||||||
|
v2 = udiv v0, v1
|
||||||
|
; nextln: $(hi=$V) = iconst.i64 0
|
||||||
|
; nextln: $(d=$V), $(r=$V) = x86_udivmodx $v0, $hi, $v1
|
||||||
|
return v2
|
||||||
|
; nextln: return $d
|
||||||
|
}
|
||||||
|
|
||||||
|
function %urem(i64, i64) -> i64 {
|
||||||
|
ebb0(v0: i64, v1: i64):
|
||||||
|
; check: $ebb0(
|
||||||
|
v2 = urem v0, v1
|
||||||
|
; nextln: $(hi=$V) = iconst.i64 0
|
||||||
|
; nextln: $(d=$V), $(r=$V) = x86_udivmodx $v0, $hi, $v1
|
||||||
|
return v2
|
||||||
|
; nextln: return $r
|
||||||
|
}
|
||||||
|
|
||||||
|
function %sdiv(i64, i64) -> i64 {
|
||||||
|
ebb0(v0: i64, v1: i64):
|
||||||
|
; check: $ebb0(
|
||||||
|
v2 = sdiv v0, v1
|
||||||
|
; check: $(hi=$V) = sshr
|
||||||
|
; nextln: $(d=$V), $(r=$V) = x86_sdivmodx $v0, $hi, $v1
|
||||||
|
return v2
|
||||||
|
; nextln: return $d
|
||||||
|
}
|
||||||
|
|
||||||
|
; The srem expansion needs to special-case x % -1 since x86_sdivmodx traps on INT_MIN/-1.
|
||||||
|
; TODO: Add more explicit pattern matching once we've cleaned up the ifcmp+brif pattern.
|
||||||
|
function %srem(i64, i64) -> i64 {
|
||||||
|
ebb0(v0: i64, v1: i64):
|
||||||
|
; check: $ebb0(
|
||||||
|
v2 = srem v0, v1
|
||||||
|
; nextln: $(fm1=$V) = ifcmp_imm $v1, -1
|
||||||
|
; nextln: brif eq $fm1, $(m1=$EBB)
|
||||||
|
; check: $(hi=$V) = sshr
|
||||||
|
; nextln: $(d=$V), $(r=$V) = x86_sdivmodx $v0, $hi, $v1
|
||||||
|
; nextln: jump $(done=$EBB)($r)
|
||||||
|
; check: $m1:
|
||||||
|
; nextln: $(zero=$V) = iconst.i64 0
|
||||||
|
; nextln: jump $(done=$EBB)($zero)
|
||||||
|
; check: $done($v2: i64):
|
||||||
|
return v2
|
||||||
|
; nextln: return $v2
|
||||||
|
}
|
||||||
@@ -41,6 +41,18 @@ return_at_end = BoolSetting(
|
|||||||
instruction at the end.
|
instruction at the end.
|
||||||
""")
|
""")
|
||||||
|
|
||||||
|
avoid_div_traps = BoolSetting(
|
||||||
|
"""
|
||||||
|
Generate explicit checks around native division instructions to avoid
|
||||||
|
their trapping.
|
||||||
|
|
||||||
|
This is primarily used by SpiderMonkey which doesn't install a signal
|
||||||
|
handler for SIGFPE, but expects a SIGILL trap for division by zero.
|
||||||
|
|
||||||
|
On ISAs like ARM where the native division instructions don't trap,
|
||||||
|
this setting has no effect - explicit checks are always inserted.
|
||||||
|
""")
|
||||||
|
|
||||||
is_compressed = BoolSetting("Enable compressed instructions")
|
is_compressed = BoolSetting("Enable compressed instructions")
|
||||||
|
|
||||||
enable_float = BoolSetting(
|
enable_float = BoolSetting(
|
||||||
|
|||||||
@@ -5,7 +5,6 @@ from __future__ import absolute_import
|
|||||||
from cdsl.ast import Var
|
from cdsl.ast import Var
|
||||||
from cdsl.xform import Rtl, XFormGroup
|
from cdsl.xform import Rtl, XFormGroup
|
||||||
from base.immediates import imm64, intcc, floatcc
|
from base.immediates import imm64, intcc, floatcc
|
||||||
from base.types import i32, i64
|
|
||||||
from base import legalize as shared
|
from base import legalize as shared
|
||||||
from base import instructions as insts
|
from base import instructions as insts
|
||||||
from . import instructions as x86
|
from . import instructions as x86
|
||||||
@@ -31,31 +30,12 @@ a2 = Var('a2')
|
|||||||
#
|
#
|
||||||
# Division and remainder.
|
# Division and remainder.
|
||||||
#
|
#
|
||||||
intel_expand.legalize(
|
|
||||||
a << insts.udiv(x, y),
|
|
||||||
Rtl(
|
|
||||||
xhi << insts.iconst(imm64(0)),
|
|
||||||
(a, dead) << x86.udivmodx(x, xhi, y)
|
|
||||||
))
|
|
||||||
|
|
||||||
intel_expand.legalize(
|
|
||||||
a << insts.urem(x, y),
|
|
||||||
Rtl(
|
|
||||||
xhi << insts.iconst(imm64(0)),
|
|
||||||
(dead, a) << x86.udivmodx(x, xhi, y)
|
|
||||||
))
|
|
||||||
|
|
||||||
for ty in [i32, i64]:
|
|
||||||
intel_expand.legalize(
|
|
||||||
a << insts.sdiv.bind(ty)(x, y),
|
|
||||||
Rtl(
|
|
||||||
xhi << insts.sshr_imm(x, imm64(ty.lane_bits() - 1)),
|
|
||||||
(a, dead) << x86.sdivmodx(x, xhi, y)
|
|
||||||
))
|
|
||||||
|
|
||||||
# The srem expansion requires custom code because srem INT_MIN, -1 is not
|
# The srem expansion requires custom code because srem INT_MIN, -1 is not
|
||||||
# allowed to trap.
|
# allowed to trap. The other ops need to check avoid_div_traps.
|
||||||
intel_expand.custom_legalize(insts.srem, 'expand_srem')
|
intel_expand.custom_legalize(insts.sdiv, 'expand_sdivrem')
|
||||||
|
intel_expand.custom_legalize(insts.srem, 'expand_sdivrem')
|
||||||
|
intel_expand.custom_legalize(insts.udiv, 'expand_udivrem')
|
||||||
|
intel_expand.custom_legalize(insts.urem, 'expand_udivrem')
|
||||||
|
|
||||||
# Floating point condition codes.
|
# Floating point condition codes.
|
||||||
#
|
#
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
//! Encoding tables for Intel ISAs.
|
//! Encoding tables for Intel ISAs.
|
||||||
|
|
||||||
use bitset::BitSet;
|
|
||||||
use cursor::{Cursor, FuncCursor};
|
use cursor::{Cursor, FuncCursor};
|
||||||
use flowgraph::ControlFlowGraph;
|
use flowgraph::ControlFlowGraph;
|
||||||
use ir::{self, InstBuilder};
|
use ir::{self, InstBuilder};
|
||||||
|
use ir::condcodes::IntCC;
|
||||||
use isa::constraints::*;
|
use isa::constraints::*;
|
||||||
use isa::enc_tables::*;
|
use isa::enc_tables::*;
|
||||||
use isa::encoding::RecipeSizing;
|
use isa::encoding::RecipeSizing;
|
||||||
@@ -14,55 +14,87 @@ use super::registers::*;
|
|||||||
include!(concat!(env!("OUT_DIR"), "/encoding-intel.rs"));
|
include!(concat!(env!("OUT_DIR"), "/encoding-intel.rs"));
|
||||||
include!(concat!(env!("OUT_DIR"), "/legalize-intel.rs"));
|
include!(concat!(env!("OUT_DIR"), "/legalize-intel.rs"));
|
||||||
|
|
||||||
/// Expand the `srem` instruction using `x86_sdivmodx`.
|
/// Expand the `sdiv` and `srem` instructions using `x86_sdivmodx`.
|
||||||
fn expand_srem(
|
fn expand_sdivrem(
|
||||||
inst: ir::Inst,
|
inst: ir::Inst,
|
||||||
func: &mut ir::Function,
|
func: &mut ir::Function,
|
||||||
cfg: &mut ControlFlowGraph,
|
cfg: &mut ControlFlowGraph,
|
||||||
_isa: &isa::TargetIsa,
|
isa: &isa::TargetIsa,
|
||||||
) {
|
) {
|
||||||
use ir::condcodes::IntCC;
|
|
||||||
|
|
||||||
let (x, y) = match func.dfg[inst] {
|
let (x, y, is_srem) = match func.dfg[inst] {
|
||||||
|
ir::InstructionData::Binary {
|
||||||
|
opcode: ir::Opcode::Sdiv,
|
||||||
|
args,
|
||||||
|
} => (args[0], args[1], false),
|
||||||
ir::InstructionData::Binary {
|
ir::InstructionData::Binary {
|
||||||
opcode: ir::Opcode::Srem,
|
opcode: ir::Opcode::Srem,
|
||||||
args,
|
args,
|
||||||
} => (args[0], args[1]),
|
} => (args[0], args[1], true),
|
||||||
_ => panic!("Need srem: {}", func.dfg.display_inst(inst, None)),
|
_ => panic!("Need sdiv/srem: {}", func.dfg.display_inst(inst, None)),
|
||||||
};
|
};
|
||||||
|
let avoid_div_traps = isa.flags().avoid_div_traps();
|
||||||
let old_ebb = func.layout.pp_ebb(inst);
|
let old_ebb = func.layout.pp_ebb(inst);
|
||||||
|
|
||||||
// EBB handling the -1 divisor case.
|
|
||||||
let minus_one = func.dfg.make_ebb();
|
|
||||||
|
|
||||||
// Final EBB with one argument representing the final result value.
|
|
||||||
let done = func.dfg.make_ebb();
|
|
||||||
|
|
||||||
// Move the `inst` result value onto the `done` EBB.
|
|
||||||
let result = func.dfg.first_result(inst);
|
let result = func.dfg.first_result(inst);
|
||||||
let ty = func.dfg.value_type(result);
|
let ty = func.dfg.value_type(result);
|
||||||
func.dfg.clear_results(inst);
|
|
||||||
func.dfg.attach_ebb_param(done, result);
|
|
||||||
|
|
||||||
let mut pos = FuncCursor::new(func).at_inst(inst);
|
let mut pos = FuncCursor::new(func).at_inst(inst);
|
||||||
pos.use_srcloc(inst);
|
pos.use_srcloc(inst);
|
||||||
|
pos.func.dfg.clear_results(inst);
|
||||||
|
|
||||||
|
// If we can tolerate native division traps, sdiv doesn't need branching.
|
||||||
|
if !avoid_div_traps && !is_srem {
|
||||||
|
let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1);
|
||||||
|
pos.ins().with_result(result).x86_sdivmodx(x, xhi, y);
|
||||||
|
pos.remove_inst();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// EBB handling the -1 divisor case.
|
||||||
|
let minus_one = pos.func.dfg.make_ebb();
|
||||||
|
|
||||||
|
// Final EBB with one argument representing the final result value.
|
||||||
|
let done = pos.func.dfg.make_ebb();
|
||||||
|
|
||||||
|
// Move the `inst` result value onto the `done` EBB.
|
||||||
|
pos.func.dfg.attach_ebb_param(done, result);
|
||||||
|
|
||||||
// Start by checking for a -1 divisor which needs to be handled specially.
|
// Start by checking for a -1 divisor which needs to be handled specially.
|
||||||
let is_m1 = pos.ins().icmp_imm(IntCC::Equal, y, -1);
|
let is_m1 = pos.ins().ifcmp_imm(y, -1);
|
||||||
pos.ins().brnz(is_m1, minus_one, &[]);
|
pos.ins().brif(IntCC::Equal, is_m1, minus_one, &[]);
|
||||||
|
|
||||||
|
// Put in an explicit division-by-zero trap if the environment requires it.
|
||||||
|
if avoid_div_traps {
|
||||||
|
pos.ins().trapz(y, ir::TrapCode::IntegerDivisionByZero);
|
||||||
|
}
|
||||||
|
|
||||||
// Now it is safe to execute the `x86_sdivmodx` instruction which will still trap on division
|
// Now it is safe to execute the `x86_sdivmodx` instruction which will still trap on division
|
||||||
// by zero.
|
// by zero.
|
||||||
let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1);
|
let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1);
|
||||||
let (_qout, rem) = pos.ins().x86_sdivmodx(x, xhi, y);
|
let (quot, rem) = pos.ins().x86_sdivmodx(x, xhi, y);
|
||||||
pos.ins().jump(done, &[rem]);
|
let divres = if is_srem { rem } else { quot };
|
||||||
|
pos.ins().jump(done, &[divres]);
|
||||||
|
|
||||||
// Now deal with the -1 divisor which always yields a 0 remainder.
|
// Now deal with the -1 divisor case.
|
||||||
pos.insert_ebb(minus_one);
|
pos.insert_ebb(minus_one);
|
||||||
let zero = pos.ins().iconst(ty, 0);
|
let m1_result = if is_srem {
|
||||||
|
// x % -1 = 0.
|
||||||
|
pos.ins().iconst(ty, 0)
|
||||||
|
} else {
|
||||||
|
// Explicitly check for overflow: Trap when x == INT_MIN.
|
||||||
|
debug_assert!(avoid_div_traps, "Native trapping divide handled above");
|
||||||
|
let f = pos.ins().ifcmp_imm(x, -1 << (ty.lane_bits() - 1));
|
||||||
|
pos.ins().trapif(
|
||||||
|
IntCC::Equal,
|
||||||
|
f,
|
||||||
|
ir::TrapCode::IntegerOverflow,
|
||||||
|
);
|
||||||
|
// x / -1 = -x.
|
||||||
|
pos.ins().irsub_imm(x, 0)
|
||||||
|
};
|
||||||
|
|
||||||
// Recycle the original instruction as a jump.
|
// Recycle the original instruction as a jump.
|
||||||
pos.func.dfg.replace(inst).jump(done, &[zero]);
|
pos.func.dfg.replace(inst).jump(done, &[m1_result]);
|
||||||
|
|
||||||
// Finally insert a label for the completion.
|
// Finally insert a label for the completion.
|
||||||
pos.next_inst();
|
pos.next_inst();
|
||||||
@@ -73,6 +105,49 @@ fn expand_srem(
|
|||||||
cfg.recompute_ebb(pos.func, done);
|
cfg.recompute_ebb(pos.func, done);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Expand the `udiv` and `urem` instructions using `x86_udivmodx`.
|
||||||
|
fn expand_udivrem(
|
||||||
|
inst: ir::Inst,
|
||||||
|
func: &mut ir::Function,
|
||||||
|
_cfg: &mut ControlFlowGraph,
|
||||||
|
isa: &isa::TargetIsa,
|
||||||
|
) {
|
||||||
|
|
||||||
|
let (x, y, is_urem) = match func.dfg[inst] {
|
||||||
|
ir::InstructionData::Binary {
|
||||||
|
opcode: ir::Opcode::Udiv,
|
||||||
|
args,
|
||||||
|
} => (args[0], args[1], false),
|
||||||
|
ir::InstructionData::Binary {
|
||||||
|
opcode: ir::Opcode::Urem,
|
||||||
|
args,
|
||||||
|
} => (args[0], args[1], true),
|
||||||
|
_ => panic!("Need udiv/urem: {}", func.dfg.display_inst(inst, None)),
|
||||||
|
};
|
||||||
|
let avoid_div_traps = isa.flags().avoid_div_traps();
|
||||||
|
let result = func.dfg.first_result(inst);
|
||||||
|
let ty = func.dfg.value_type(result);
|
||||||
|
|
||||||
|
let mut pos = FuncCursor::new(func).at_inst(inst);
|
||||||
|
pos.use_srcloc(inst);
|
||||||
|
pos.func.dfg.clear_results(inst);
|
||||||
|
|
||||||
|
// Put in an explicit division-by-zero trap if the environment requires it.
|
||||||
|
if avoid_div_traps {
|
||||||
|
pos.ins().trapz(y, ir::TrapCode::IntegerDivisionByZero);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now it is safe to execute the `x86_udivmodx` instruction.
|
||||||
|
let xhi = pos.ins().iconst(ty, 0);
|
||||||
|
let reuse = if is_urem {
|
||||||
|
[None, Some(result)]
|
||||||
|
} else {
|
||||||
|
[Some(result), None]
|
||||||
|
};
|
||||||
|
pos.ins().with_results(reuse).x86_udivmodx(x, xhi, y);
|
||||||
|
pos.remove_inst();
|
||||||
|
}
|
||||||
|
|
||||||
/// Expand the `fmin` and `fmax` instructions using the Intel `x86_fmin` and `x86_fmax`
|
/// Expand the `fmin` and `fmax` instructions using the Intel `x86_fmin` and `x86_fmax`
|
||||||
/// instructions.
|
/// instructions.
|
||||||
fn expand_minmax(
|
fn expand_minmax(
|
||||||
|
|||||||
@@ -360,6 +360,7 @@ mod tests {
|
|||||||
is_64bit = false\n\
|
is_64bit = false\n\
|
||||||
is_pic = false\n\
|
is_pic = false\n\
|
||||||
return_at_end = false\n\
|
return_at_end = false\n\
|
||||||
|
avoid_div_traps = false\n\
|
||||||
is_compressed = false\n\
|
is_compressed = false\n\
|
||||||
enable_float = true\n\
|
enable_float = true\n\
|
||||||
enable_simd = true\n\
|
enable_simd = true\n\
|
||||||
|
|||||||
Reference in New Issue
Block a user