cranelift: Remove iconst.i128 (#5075)

* cranelift: Remove iconst.i128

* bugpoint: Report Changed when only one instruction is mutated

* cranelift: Fix egraph bxor rule

* cranelift: Remove some simple_preopt opts for i128
This commit is contained in:
Afonso Bordado
2022-10-24 20:43:28 +01:00
committed by GitHub
parent bfcf6616fe
commit c8791073d6
13 changed files with 246 additions and 184 deletions

View File

@@ -694,6 +694,16 @@ pub(crate) fn define(
.build(),
);
let NarrowInt = &TypeVar::new(
"NarrowInt",
"An integer type with lanes type to `i64`",
TypeSetBuilder::new()
.ints(8..64)
.simd_lanes(Interval::All)
.dynamic_simd_lanes(Interval::All)
.build(),
);
let ScalarTruthy = &TypeVar::new(
"ScalarTruthy",
"A scalar truthy type",
@@ -1342,7 +1352,7 @@ pub(crate) fn define(
);
let N = &Operand::new("N", &imm.imm64);
let a = &Operand::new("a", Int).with_doc("A constant integer scalar or vector value");
let a = &Operand::new("a", NarrowInt).with_doc("A constant integer scalar or vector value");
ig.push(
Inst::new(
@@ -3880,15 +3890,6 @@ pub(crate) fn define(
.operands_out(vec![lo, hi]),
);
let NarrowInt = &TypeVar::new(
"NarrowInt",
"An integer type with lanes type to `i64`",
TypeSetBuilder::new()
.ints(8..64)
.simd_lanes(Interval::All)
.build(),
);
let lo = &Operand::new("lo", NarrowInt);
let hi = &Operand::new("hi", NarrowInt);
let a = &Operand::new("a", &NarrowInt.double_width())

View File

@@ -107,7 +107,7 @@
(subsume x))
;; x ^ x == 0.
(rule (simplify (bxor ty x x))
(rule (simplify (bxor (fits_in_64 ty) x x))
(subsume (iconst ty (imm64 0))))
;; x ^ not(x) == not(x) ^ x == -1.

View File

@@ -11,7 +11,7 @@ use crate::flowgraph::ControlFlowGraph;
use crate::ir::{
condcodes::{CondCode, IntCC},
instructions::Opcode,
types::{I32, I64},
types::{I128, I32, I64},
Block, DataFlowGraph, Function, Inst, InstBuilder, InstructionData, Type, Value,
};
use crate::isa::TargetIsa;
@@ -824,27 +824,27 @@ mod simplify {
};
// Replace operations that are no-ops.
match (opcode, imm.into()) {
(Opcode::IaddImm, 0)
| (Opcode::ImulImm, 1)
| (Opcode::SdivImm, 1)
| (Opcode::UdivImm, 1)
| (Opcode::BorImm, 0)
| (Opcode::BandImm, -1)
| (Opcode::BxorImm, 0)
| (Opcode::RotlImm, 0)
| (Opcode::RotrImm, 0)
| (Opcode::IshlImm, 0)
| (Opcode::UshrImm, 0)
| (Opcode::SshrImm, 0) => {
match (opcode, imm.into(), ty) {
(Opcode::IaddImm, 0, _)
| (Opcode::ImulImm, 1, _)
| (Opcode::SdivImm, 1, _)
| (Opcode::UdivImm, 1, _)
| (Opcode::BorImm, 0, _)
| (Opcode::BandImm, -1, _)
| (Opcode::BxorImm, 0, _)
| (Opcode::RotlImm, 0, _)
| (Opcode::RotrImm, 0, _)
| (Opcode::IshlImm, 0, _)
| (Opcode::UshrImm, 0, _)
| (Opcode::SshrImm, 0, _) => {
// Alias the result value with the original argument.
replace_single_result_with_alias(&mut pos.func.dfg, inst, arg);
}
(Opcode::ImulImm, 0) | (Opcode::BandImm, 0) => {
(Opcode::ImulImm, 0, ty) | (Opcode::BandImm, 0, ty) if ty != I128 => {
// Replace by zero.
pos.func.dfg.replace(inst).iconst(ty, 0);
}
(Opcode::BorImm, -1) => {
(Opcode::BorImm, -1, ty) if ty != I128 => {
// Replace by minus one.
pos.func.dfg.replace(inst).iconst(ty, -1);
}

View File

@@ -0,0 +1,13 @@
test optimize
set opt_level=speed_and_size
set use_egraphs=true
target x86_64
; This it a regression test to ensure that we don't insert a iconst.i128 when optimizing bxor.
function %bxor_i128(i128) -> i128 system_v {
block0(v0: i128):
v1 = bxor v0, v0
return v1
; check: v1 = bxor v0, v0
; nextln: return v1
}

View File

@@ -278,81 +278,86 @@ block0(v0: i64, v1: i128, v2: i128):
function %f(i128, i8, i8) -> i8 {
block0(v0: i128, v1: i8, v2: i8):
v3 = iconst.i128 42
v4 = ifcmp v0, v3
v5 = selectif.i8 eq v4, v1, v2
return v5
v3 = iconst.i64 42
v4 = uextend.i128 v3
v5 = ifcmp v0, v4
v6 = selectif.i8 eq v5, v1, v2
return v6
}
; block0:
; movz x6, #42
; movz x8, #0
; subs xzr, x0, x6
; ccmp x1, x8, #nzcv, eq
; movz x7, #42
; movz w9, #0
; subs xzr, x0, x7
; ccmp x1, x9, #nzcv, eq
; csel x0, x2, x3, eq
; ret
function %f(i128, i16, i16) -> i16 {
block0(v0: i128, v1: i16, v2: i16):
v3 = iconst.i128 42
v4 = ifcmp v0, v3
v5 = selectif.i16 eq v4, v1, v2
return v5
v3 = iconst.i64 42
v4 = uextend.i128 v3
v5 = ifcmp v0, v4
v6 = selectif.i16 eq v5, v1, v2
return v6
}
; block0:
; movz x6, #42
; movz x8, #0
; subs xzr, x0, x6
; ccmp x1, x8, #nzcv, eq
; movz x7, #42
; movz w9, #0
; subs xzr, x0, x7
; ccmp x1, x9, #nzcv, eq
; csel x0, x2, x3, eq
; ret
function %f(i128, i32, i32) -> i32 {
block0(v0: i128, v1: i32, v2: i32):
v3 = iconst.i128 42
v4 = ifcmp v0, v3
v5 = selectif.i32 eq v4, v1, v2
return v5
v3 = iconst.i64 42
v4 = uextend.i128 v3
v5 = ifcmp v0, v4
v6 = selectif.i32 eq v5, v1, v2
return v6
}
; block0:
; movz x6, #42
; movz x8, #0
; subs xzr, x0, x6
; ccmp x1, x8, #nzcv, eq
; movz x7, #42
; movz w9, #0
; subs xzr, x0, x7
; ccmp x1, x9, #nzcv, eq
; csel x0, x2, x3, eq
; ret
function %f(i128, i64, i64) -> i64 {
block0(v0: i128, v1: i64, v2: i64):
v3 = iconst.i128 42
v4 = ifcmp v0, v3
v5 = selectif.i64 eq v4, v1, v2
return v5
v3 = iconst.i64 42
v4 = uextend.i128 v3
v5 = ifcmp v0, v4
v6 = selectif.i64 eq v5, v1, v2
return v6
}
; block0:
; movz x6, #42
; movz x8, #0
; subs xzr, x0, x6
; ccmp x1, x8, #nzcv, eq
; movz x7, #42
; movz w9, #0
; subs xzr, x0, x7
; ccmp x1, x9, #nzcv, eq
; csel x0, x2, x3, eq
; ret
function %f(i128, i128, i128) -> i128 {
block0(v0: i128, v1: i128, v2: i128):
v3 = iconst.i128 42
v4 = ifcmp v0, v3
v5 = selectif.i128 eq v4, v1, v2
return v5
v3 = iconst.i64 42
v4 = uextend.i128 v3
v5 = ifcmp v0, v4
v6 = selectif.i128 eq v5, v1, v2
return v6
}
; block0:
; movz x9, #42
; movz x11, #0
; subs xzr, x0, x9
; ccmp x1, x11, #nzcv, eq
; movz x11, #42
; movz w13, #0
; subs xzr, x0, x11
; ccmp x1, x13, #nzcv, eq
; csel x0, x2, x4, eq
; csel x1, x3, x5, eq
; ret
@@ -653,85 +658,90 @@ block0(v0: i64, v1: i128, v2: i128):
function %f(i128, i8, i8) -> i8 {
block0(v0: i128, v1: i8, v2: i8):
v3 = iconst.i128 42
v4 = ifcmp v0, v3
v5 = selectif_spectre_guard.i8 eq v4, v1, v2
return v5
v3 = iconst.i64 42
v4 = uextend.i128 v3
v5 = ifcmp v0, v4
v6 = selectif_spectre_guard.i8 eq v5, v1, v2
return v6
}
; block0:
; movz x6, #42
; movz x8, #0
; subs xzr, x0, x6
; ccmp x1, x8, #nzcv, eq
; movz x7, #42
; movz w9, #0
; subs xzr, x0, x7
; ccmp x1, x9, #nzcv, eq
; csel x0, x2, x3, eq
; csdb
; ret
function %f(i128, i16, i16) -> i16 {
block0(v0: i128, v1: i16, v2: i16):
v3 = iconst.i128 42
v4 = ifcmp v0, v3
v5 = selectif_spectre_guard.i16 eq v4, v1, v2
return v5
v3 = iconst.i64 42
v4 = uextend.i128 v3
v5 = ifcmp v0, v4
v6 = selectif_spectre_guard.i16 eq v5, v1, v2
return v6
}
; block0:
; movz x6, #42
; movz x8, #0
; subs xzr, x0, x6
; ccmp x1, x8, #nzcv, eq
; movz x7, #42
; movz w9, #0
; subs xzr, x0, x7
; ccmp x1, x9, #nzcv, eq
; csel x0, x2, x3, eq
; csdb
; ret
function %f(i128, i32, i32) -> i32 {
block0(v0: i128, v1: i32, v2: i32):
v3 = iconst.i128 42
v4 = ifcmp v0, v3
v5 = selectif_spectre_guard.i32 eq v4, v1, v2
return v5
v3 = iconst.i64 42
v4 = uextend.i128 v3
v5 = ifcmp v0, v4
v6 = selectif_spectre_guard.i32 eq v5, v1, v2
return v6
}
; block0:
; movz x6, #42
; movz x8, #0
; subs xzr, x0, x6
; ccmp x1, x8, #nzcv, eq
; movz x7, #42
; movz w9, #0
; subs xzr, x0, x7
; ccmp x1, x9, #nzcv, eq
; csel x0, x2, x3, eq
; csdb
; ret
function %f(i128, i64, i64) -> i64 {
block0(v0: i128, v1: i64, v2: i64):
v3 = iconst.i128 42
v4 = ifcmp v0, v3
v5 = selectif_spectre_guard.i64 eq v4, v1, v2
return v5
v3 = iconst.i64 42
v4 = uextend.i128 v3
v5 = ifcmp v0, v4
v6 = selectif_spectre_guard.i64 eq v5, v1, v2
return v6
}
; block0:
; movz x6, #42
; movz x8, #0
; subs xzr, x0, x6
; ccmp x1, x8, #nzcv, eq
; movz x7, #42
; movz w9, #0
; subs xzr, x0, x7
; ccmp x1, x9, #nzcv, eq
; csel x0, x2, x3, eq
; csdb
; ret
function %f(i128, i128, i128) -> i128 {
block0(v0: i128, v1: i128, v2: i128):
v3 = iconst.i128 42
v4 = ifcmp v0, v3
v5 = selectif_spectre_guard.i128 eq v4, v1, v2
return v5
v3 = iconst.i64 42
v4 = uextend.i128 v3
v5 = ifcmp v0, v4
v6 = selectif_spectre_guard.i128 eq v5, v1, v2
return v6
}
; block0:
; movz x9, #42
; movz x11, #0
; subs xzr, x0, x9
; ccmp x1, x11, #nzcv, eq
; movz x11, #42
; movz w13, #0
; subs xzr, x0, x11
; ccmp x1, x13, #nzcv, eq
; csel x0, x2, x4, eq
; csel x1, x3, x5, eq
; csdb

View File

@@ -656,43 +656,46 @@ block0(v0: i64):
function %f23(i128, i8) -> i128 {
block0(v0: i128, v1: i8):
v2 = iconst.i128 0
brnz v1, block1(v2)
jump block2(v2)
v2 = iconst.i64 0
v3 = uextend.i128 v2
brnz v1, block1(v3)
jump block2(v3)
block1(v3: i128):
v4 = iconst.i128 1
v5 = iadd.i128 v3, v4
return v5
block1(v4: i128):
v5 = iconst.i64 1
v6 = uextend.i128 v5
v7 = iadd.i128 v4, v6
return v7
block2(v6: i128):
v7 = iconst.i128 2
v8 = iadd.i128 v6, v7
return v8
block2(v8: i128):
v9 = iconst.i64 2
v10 = uextend.i128 v9
v11 = iadd.i128 v8, v10
return v11
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; xorq %rax, %rax, %rax
; xorq %r11, %r11, %r11
; testb %dl, %dl
; jnz label1; j label2
; block1:
; xorq %rax, %rax, %rax
; xorq %rdx, %rdx, %rdx
; movl $1, %esi
; xorq %rcx, %rcx, %rcx
; addq %rax, %rsi, %rax
; adcq %rdx, %rcx, %rdx
; movl $1, %r10d
; xorq %rsi, %rsi, %rsi
; addq %rax, %r10, %rax
; movq %r11, %rdx
; adcq %rdx, %rsi, %rdx
; movq %rbp, %rsp
; popq %rbp
; ret
; block2:
; xorq %rax, %rax, %rax
; xorq %rdx, %rdx, %rdx
; movl $2, %r8d
; xorq %r10, %r10, %r10
; addq %rax, %r8, %rax
; adcq %rdx, %r10, %rdx
; movq %r11, %rdx
; movl $2, %ecx
; xorq %r8, %r8, %r8
; addq %rax, %rcx, %rax
; adcq %rdx, %r8, %rdx
; movq %rbp, %rsp
; popq %rbp
; ret

View File

@@ -1,14 +0,0 @@
test interpret
test run
set enable_llvm_abi_extensions=true
target aarch64
target s390x
target x86_64
target riscv64
function %i128_const_0() -> i128 {
block0:
v1 = iconst.i128 0
return v1
}
; run: %i128_const_0() == 0

View File

@@ -15,8 +15,10 @@ function %issue4996() -> i128, i64 system_v {
jump block5(v13)
block3:
v21 = iconst.i128 0
jump block5(v21) ; v21 = 0
v20 = iconst.i64 0
v21 = uextend.i128 v20 ; v20 = 0
jump block5(v21)
block5(v23: i128):
v29 = iconst.i64 0
return v23, v29 ; v29 = 0

View File

@@ -0,0 +1,28 @@
test simple_preopt
target aarch64
target x86_64
target s390x
target riscv64
function %imul_imm_zero(i128) -> i128 {
block0(v0: i128):
v1 = imul_imm v0, 0
return v1
}
; sameln: function %imul_imm_zero
; nextln: block0(v0: i128):
; nextln: v1 = imul_imm v0, 0
; nextln: return v1
; nextln: }
function %band_imm_zero(i128) -> i128 {
block0(v0: i128):
v1 = band_imm v0, 0
return v1
}
; check: function %band_imm_zero
; nextln: block0(v0: i128):
; nextln: v1 = band_imm v0, 0
; nextln: return v1
; nextln: }

View File

@@ -644,7 +644,8 @@ block5:
let mut bx = FunctionBuilder::new(&mut func, &mut func_ctx);
let block0 = bx.create_block();
bx.switch_to_block(block0);
let val = bx.ins().iconst(types::I128, 0);
let val = bx.ins().iconst(types::I64, 0);
let val = bx.ins().uextend(types::I128, val);
let mut switch = Switch::new();
let block1 = bx.create_block();
switch.set_entry(1, block1);
@@ -663,17 +664,18 @@ block5:
" jt0 = jump_table [block2, block1]
block0:
v0 = iconst.i128 0
v0 = iconst.i64 0
v1 = uextend.i128 v0 ; v0 = 0
jump block4
block4:
v1 = icmp_imm.i128 ugt v0, 0xffff_ffff ; v0 = 0
brnz v1, block3
v2 = icmp_imm.i128 ugt v1, 0xffff_ffff
brnz v2, block3
jump block5
block5:
v2 = ireduce.i32 v0 ; v0 = 0
br_table v2, block3, jt0"
v3 = ireduce.i32 v1
br_table v3, block3, jt0"
);
}
}

View File

@@ -781,7 +781,6 @@ const OPCODE_SIGNATURES: &'static [(
(Opcode::Iconst, &[], &[I16], insert_const),
(Opcode::Iconst, &[], &[I32], insert_const),
(Opcode::Iconst, &[], &[I64], insert_const),
(Opcode::Iconst, &[], &[I128], insert_const),
// Float Consts
(Opcode::F32const, &[], &[F32], insert_const),
(Opcode::F64const, &[], &[F64], insert_const),
@@ -1200,11 +1199,11 @@ where
/// Zero initializes the stack slot by inserting `stack_store`'s.
fn initialize_stack_slots(&mut self, builder: &mut FunctionBuilder) -> Result<()> {
let i128_zero = builder.ins().iconst(I128, 0);
let i64_zero = builder.ins().iconst(I64, 0);
let i32_zero = builder.ins().iconst(I32, 0);
let i16_zero = builder.ins().iconst(I16, 0);
let i8_zero = builder.ins().iconst(I8, 0);
let i16_zero = builder.ins().iconst(I16, 0);
let i32_zero = builder.ins().iconst(I32, 0);
let i64_zero = builder.ins().iconst(I64, 0);
let i128_zero = builder.ins().uextend(I128, i64_zero);
for &(slot, init_size) in self.resources.stack_slots.iter() {
let mut size = init_size;

View File

@@ -3,9 +3,10 @@
use crate::utils::{parse_sets_and_triple, read_to_string};
use anyhow::{Context as _, Result};
use clap::Parser;
use cranelift::prelude::Value;
use cranelift_codegen::cursor::{Cursor, FuncCursor};
use cranelift_codegen::flowgraph::ControlFlowGraph;
use cranelift_codegen::ir::types::{F32, F64};
use cranelift_codegen::ir::types::{F32, F64, I128, I64};
use cranelift_codegen::ir::{
self, Block, FuncRef, Function, GlobalValueData, Inst, InstBuilder, InstructionData,
StackSlots, TrapCode,
@@ -182,14 +183,22 @@ impl Mutator for ReplaceInstWithConst {
return (func, format!(""), ProgressStatus::Skip);
}
if num_results == 1 {
let ty = func.dfg.value_type(func.dfg.first_result(prev_inst));
let new_inst_name = const_for_type(func.dfg.replace(prev_inst), ty);
return (
func,
format!("Replace inst {} with {}.", prev_inst, new_inst_name),
ProgressStatus::Changed,
);
// We replace a i128 const with a uextend+iconst, so we need to match that here
// to avoid processing those multiple times
if opcode == ir::Opcode::Uextend {
let ret_ty = func.dfg.value_type(func.dfg.first_result(prev_inst));
let is_uextend_i128 = ret_ty == I128;
let arg = func.dfg.inst_args(prev_inst)[0];
let arg_def = func.dfg.value_def(arg);
let arg_is_iconst = arg_def
.inst()
.map(|inst| func.dfg[inst].opcode() == ir::Opcode::Iconst)
.unwrap_or(false);
if is_uextend_i128 && arg_is_iconst {
return (func, format!(""), ProgressStatus::Skip);
}
}
// At least 2 results. Replace each instruction with as many const instructions as
@@ -204,20 +213,24 @@ impl Mutator for ReplaceInstWithConst {
pos.func.dfg.clear_results(prev_inst);
let mut inst_names = Vec::new();
for r in results {
let ty = pos.func.dfg.value_type(r);
let builder = pos.ins().with_results([Some(r)]);
let new_inst_name = const_for_type(builder, ty);
for r in &results {
let new_inst_name = replace_with_const(&mut pos, *r);
inst_names.push(new_inst_name);
}
// Remove the instruction.
assert_eq!(pos.remove_inst(), prev_inst);
let progress = if results.len() == 1 {
ProgressStatus::Changed
} else {
ProgressStatus::ExpandedOrShrinked
};
(
func,
format!("Replace inst {} with {}", prev_inst, inst_names.join(" / ")),
ProgressStatus::ExpandedOrShrinked,
progress,
)
},
)
@@ -397,13 +410,11 @@ impl Mutator for ReplaceBlockParamWithConst {
let param_index = self.params_remaining;
let param = func.dfg.block_params(self.block)[param_index];
let param_type = func.dfg.value_type(param);
func.dfg.remove_block_param(param);
let first_inst = func.layout.first_inst(self.block).unwrap();
let mut pos = FuncCursor::new(&mut func).at_inst(first_inst);
let builder = pos.ins().with_results([Some(param)]);
let new_inst_name = const_for_type(builder, param_type);
let new_inst_name = replace_with_const(&mut pos, param);
let mut cfg = ControlFlowGraph::new();
cfg.compute(&func);
@@ -755,24 +766,29 @@ impl Mutator for MergeBlocks {
}
}
fn const_for_type<'f, T: InstBuilder<'f>>(mut builder: T, ty: ir::Type) -> &'static str {
fn replace_with_const(pos: &mut FuncCursor, param: Value) -> &'static str {
let ty = pos.func.dfg.value_type(param);
if ty == F32 {
builder.f32const(0.0);
pos.ins().with_result(param).f32const(0.0);
"f32const"
} else if ty == F64 {
builder.f64const(0.0);
pos.ins().with_result(param).f64const(0.0);
"f64const"
} else if ty.is_ref() {
builder.null(ty);
pos.ins().with_result(param).null(ty);
"null"
} else if ty.is_vector() {
let zero_data = vec![0; ty.bytes() as usize].into();
let zero_handle = builder.data_flow_graph_mut().constants.insert(zero_data);
builder.vconst(ty, zero_handle);
let zero_handle = pos.func.dfg.constants.insert(zero_data);
pos.ins().with_result(param).vconst(ty, zero_handle);
"vconst"
} else if ty == I128 {
let res = pos.ins().iconst(I64, 0);
pos.ins().with_result(param).uextend(I128, res);
"iconst+uextend"
} else {
// Default to an integer type and possibly create verifier error
builder.iconst(ty, 0);
pos.ins().with_result(param).iconst(ty, 0);
"iconst"
}
}

View File

@@ -12,15 +12,17 @@ block1:
v3 = iconst.i16 0
v4 = iconst.i32 0
v5 = iconst.i64 0
v6 = iconst.i128 0
v16 = iconst.i64 0
v6 = uextend.i128 v16 ; v16 = 0
v7 = iconst.i8 0
v8 = iconst.i8 0
v9 = iconst.i128 0
v15 = iconst.i64 0
v9 = uextend.i128 v15 ; v15 = 0
v10 = null.r32
v11 = null.r64
v12 = vconst.i8x16 const2
v13 = vconst.i16x4 const1
v14 = vconst.f32x16 const0
call fn0(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14) ; v0 = 0.0, v1 = 0.0, v2 = 0, v3 = 0, v4 = 0, v5 = 0, v6 = 0, v7 = 0, v8 = 0, v9 = 0, v12 = const2, v13 = const1, v14 = const0
call fn0(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14) ; v0 = 0.0, v1 = 0.0, v2 = 0, v3 = 0, v4 = 0, v5 = 0, v7 = 0, v8 = 0, v12 = const2, v13 = const1, v14 = const0
trap user0
}