cranelift: Implement nan canonicalization for vectors (#3146)

This fixes some fuzz bugs that came about enabling simd where nan
canonicalization is performed on the fuzzers but cranelift would panic
on these ops for vectors. This adds some custom codegen with `bitselect`
to ensure any nan lanes are canonical-nan lanes in the canonicalized
operations.
This commit is contained in:
Alex Crichton
2021-08-05 13:44:16 -05:00
committed by GitHub
parent 9e142f8792
commit c6b095f9a3
3 changed files with 95 additions and 12 deletions

View File

@@ -6,7 +6,6 @@ use crate::cursor::{Cursor, FuncCursor};
use crate::ir::condcodes::FloatCC;
use crate::ir::immediates::{Ieee32, Ieee64};
use crate::ir::types;
use crate::ir::types::Type;
use crate::ir::{Function, Inst, InstBuilder, InstructionData, Opcode, Value};
use crate::timing;
@@ -64,22 +63,44 @@ fn add_nan_canon_seq(pos: &mut FuncCursor, inst: Inst) {
// Insert a comparison instruction, to check if `inst_res` is NaN. Select
// the canonical NaN value if `val` is NaN, assign the result to `inst`.
let is_nan = pos.ins().fcmp(FloatCC::NotEqual, new_res, new_res);
let canon_nan = insert_nan_const(pos, val_type);
let scalar_select = |pos: &mut FuncCursor, canon_nan: Value| {
pos.ins()
.with_result(val)
.select(is_nan, canon_nan, new_res);
};
let vector_select = |pos: &mut FuncCursor, canon_nan: Value| {
let cond = pos.ins().raw_bitcast(types::I8X16, is_nan);
let canon_nan = pos.ins().raw_bitcast(types::I8X16, canon_nan);
let result = pos.ins().raw_bitcast(types::I8X16, new_res);
let bitmask = pos.ins().bitselect(cond, canon_nan, result);
pos.ins().with_result(val).raw_bitcast(val_type, bitmask);
};
pos.prev_inst(); // Step backwards so the pass does not skip instructions.
match val_type {
types::F32 => {
let canon_nan = pos.ins().f32const(Ieee32::with_bits(CANON_32BIT_NAN));
scalar_select(pos, canon_nan);
}
types::F64 => {
let canon_nan = pos.ins().f64const(Ieee64::with_bits(CANON_64BIT_NAN));
scalar_select(pos, canon_nan);
}
types::F32X4 => {
let canon_nan = pos.ins().iconst(types::I32, i64::from(CANON_32BIT_NAN));
let canon_nan = pos.ins().splat(types::I32X4, canon_nan);
vector_select(pos, canon_nan);
}
types::F64X2 => {
let canon_nan = pos.ins().iconst(types::I64, CANON_64BIT_NAN as i64);
let canon_nan = pos.ins().splat(types::I64X2, canon_nan);
vector_select(pos, canon_nan);
}
/// Insert a canonical 32-bit or 64-bit NaN constant at the current position.
fn insert_nan_const(pos: &mut FuncCursor, nan_type: Type) -> Value {
match nan_type {
types::F32 => pos.ins().f32const(Ieee32::with_bits(CANON_32BIT_NAN)),
types::F64 => pos.ins().f64const(Ieee64::with_bits(CANON_64BIT_NAN)),
_ => {
// Panic if the type given was not an IEEE floating point type.
panic!("Could not canonicalize NaN: Unexpected result type found.");
}
}
pos.prev_inst(); // Step backwards so the pass does not skip instructions.
}

View File

@@ -40,6 +40,10 @@ fn run_wast(wast: &str, strategy: Strategy, pooling: bool) -> anyhow::Result<()>
.strategy(strategy)?
.cranelift_debug_verifier(true);
if wast.ends_with("canonicalize-nan.wast") {
cfg.cranelift_nan_canonicalization(true);
}
// By default we'll allocate huge chunks (6gb) of the address space for each
// linear memory. This is typically fine but when we emulate tests with QEMU
// it turns out that it causes memory usage to balloon massively. Leave a

View File

@@ -0,0 +1,58 @@
;; This *.wast test should be run with `cranelift_nan_canonicalization` set to
;; `true` in `wast.rs`
(module
(func (export "f32x4.floor") (param v128) (result v128)
local.get 0
f32x4.floor)
(func (export "f32x4.nearest") (param v128) (result v128)
local.get 0
f32x4.nearest)
(func (export "f32x4.sqrt") (param v128) (result v128)
local.get 0
f32x4.sqrt)
(func (export "f32x4.trunc") (param v128) (result v128)
local.get 0
f32x4.trunc)
(func (export "f32x4.ceil") (param v128) (result v128)
local.get 0
f32x4.ceil)
(func (export "f64x2.floor") (param v128) (result v128)
local.get 0
f64x2.floor)
(func (export "f64x2.nearest") (param v128) (result v128)
local.get 0
f64x2.nearest)
(func (export "f64x2.sqrt") (param v128) (result v128)
local.get 0
f64x2.sqrt)
(func (export "f64x2.trunc") (param v128) (result v128)
local.get 0
f64x2.trunc)
(func (export "f64x2.ceil") (param v128) (result v128)
local.get 0
f64x2.ceil)
)
(assert_return (invoke "f32x4.floor" (v128.const f32x4 1 -2.2 3.4 nan))
(v128.const f32x4 1 -3 3 nan))
(assert_return (invoke "f32x4.nearest" (v128.const f32x4 1 -2.2 3.4 nan))
(v128.const f32x4 1 -2 3 nan))
(assert_return (invoke "f32x4.sqrt" (v128.const f32x4 1 4 -1 nan))
(v128.const f32x4 1 2 nan nan))
(assert_return (invoke "f32x4.trunc" (v128.const f32x4 1 -2.2 3.4 nan))
(v128.const f32x4 1 -2 3 nan))
(assert_return (invoke "f32x4.ceil" (v128.const f32x4 1 -2.2 3.4 nan))
(v128.const f32x4 1 -2 4 nan))
(assert_return (invoke "f64x2.floor" (v128.const f64x2 -2.2 nan))
(v128.const f64x2 -3 nan))
(assert_return (invoke "f64x2.nearest" (v128.const f64x2 -2.2 nan))
(v128.const f64x2 -2 nan))
(assert_return (invoke "f64x2.sqrt" (v128.const f64x2 4 nan))
(v128.const f64x2 2 nan))
(assert_return (invoke "f64x2.trunc" (v128.const f64x2 3.4 nan))
(v128.const f64x2 3 nan))
(assert_return (invoke "f64x2.ceil" (v128.const f64x2 3.4 nan))
(v128.const f64x2 4 nan))