x64: port the remainder of select to ISLE (#3973)

Previous changes had ported the difficult "`select` based on an `fcmp`"
patterns to ISLE; this completes porting of `select` by moving over the
final two kinds of patterns:
 - `select` based on an `icmp`
 - `select` based on a value
This commit is contained in:
Andrew Brown
2022-03-30 13:32:26 -07:00
committed by GitHub
parent a5752773b2
commit e8dd13cf87
5 changed files with 408 additions and 332 deletions

View File

@@ -1746,6 +1746,43 @@
(rule (lower (has_type ty (select (fcmp (FloatCC.NotEqual) a b) x y)))
(with_flags (x64_ucomis a b) (cmove_or_from_values ty (CC.NZ) (CC.P) x y)))
;; We also can lower `select`s that depend on an `icmp` test, but more simply
;; than the `fcmp` variants above. In these cases, we lower to a `CMP`
;; instruction plus a `CMOV`; recall that `cmove_from_values` here may emit more
;; than one instruction for certain types (e.g., XMM-held, I128).
(rule (lower (has_type ty (select (icmp cc a @ (value_type (fits_in_64 a_ty)) b) x y)))
;; N.B.: we force the comparison operators into registers, and disallow
;; load-op fusion, because we do not have a transitive guarantee that this
;; cmp-site will be the sole user of the value. Consider: the `icmp` might
;; be the only user of a load, but there may be multiple users of the
;; `icmp` (e.g., `select` or `bint` instructions) that each invoke emit a
;; comparison. If we were to allow a load to sink to the *latest* one, but
;; other sites did not permit sinking, then we would be missing the load
;; for other cmp-sites. TODO:
;; https://github.com/bytecodealliance/wasmtime/issues/3953.
(let ((gpr_a Gpr (put_in_gpr a))
(gpr_b Gpr (put_in_gpr b))
(size OperandSize (raw_operand_size_of_type a_ty)))
(with_flags (x64_cmp size gpr_b gpr_a) (cmove_from_values ty cc x y))))
;; Finally, we lower `select` from a condition value `c`. These rules are meant
;; to be the final, default lowerings if no other patterns matched above.
(rule (lower (has_type ty (select c @ (value_type $B1) x y)))
(let ((size OperandSize (raw_operand_size_of_type $B1))
;; N.B.: disallow load-op fusion, see above. TODO:
;; https://github.com/bytecodealliance/wasmtime/issues/3953.
(gpr_c Gpr (put_in_gpr c)))
(with_flags (x64_test size (RegMemImm.Imm 1) gpr_c) (cmove_from_values ty (CC.NZ) x y))))
(rule (lower (has_type ty (select c @ (value_type (fits_in_64 a_ty)) x y)))
(let ((size OperandSize (raw_operand_size_of_type a_ty))
;; N.B.: disallow load-op fusion, see above. TODO:
;; https://github.com/bytecodealliance/wasmtime/issues/3953.
(gpr_c Gpr (put_in_gpr c)))
(with_flags (x64_test size gpr_c gpr_c) (cmove_from_values ty (CC.NZ) x y))))
;; Rules for `clz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; If available, we can use a plain lzcnt instruction here. Note no

View File

@@ -2569,67 +2569,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
}
Opcode::Select => {
let flag_input = inputs[0];
if let Some(_) = matches_input(ctx, flag_input, Opcode::Fcmp) {
implemented_in_isle(ctx);
} else {
let ty = ty.unwrap();
let size = ty.bytes() as u8;
let lhs = put_input_in_regs(ctx, inputs[1]);
let rhs = put_input_in_regs(ctx, inputs[2]);
let dst = get_output_reg(ctx, outputs[0]);
let cc = if let Some(icmp) = matches_input(ctx, flag_input, Opcode::Icmp) {
let cond_code = ctx.data(icmp).cond_code().unwrap();
let cond_code = emit_cmp(ctx, icmp, cond_code);
CC::from_intcc(cond_code)
} else {
let sel_ty = ctx.input_ty(insn, 0);
let size = OperandSize::from_ty(ctx.input_ty(insn, 0));
let test = put_input_in_reg(ctx, flag_input);
let test_input = if sel_ty == types::B1 {
// The input is a boolean value; test the LSB for nonzero with:
// test reg, 1
RegMemImm::imm(1)
} else {
// The input is an integer; test the whole value for
// nonzero with:
// test reg, reg
//
// (It doesn't make sense to have a boolean wider than
// one bit here -- which bit would cause us to select an
// input?)
assert!(!is_bool_ty(sel_ty));
RegMemImm::reg(test)
};
ctx.emit(Inst::test_rmi_r(size, test_input, test));
CC::NZ
};
// This doesn't affect the flags.
emit_moves(ctx, dst, rhs, ty);
if is_int_or_ref_ty(ty) || ty == types::I128 {
emit_cmoves(ctx, size, cc, lhs, dst);
} else {
debug_assert!(
ty == types::F32
|| ty == types::F64
|| (ty.is_vector() && ty.bits() == 128)
);
ctx.emit(Inst::xmm_cmove(
if ty == types::F64 {
OperandSize::Size64
} else {
OperandSize::Size32
},
cc,
RegMem::reg(lhs.only_reg().unwrap()),
dst.only_reg().unwrap(),
));
}
}
implemented_in_isle(ctx);
}
Opcode::Selectif | Opcode::SelectifSpectreGuard => {

View File

@@ -1,4 +1,4 @@
src/clif.isle 9ea75a6f790b5c03
src/prelude.isle 74d9514ac948e163
src/isa/x64/inst.isle a002d62dcfce285
src/isa/x64/lower.isle d8facef52a4e2ac6
src/isa/x64/lower.isle 8f3e1ed2929fd07e

File diff suppressed because it is too large Load Diff

View File

@@ -11,15 +11,16 @@ block0(v0: b1, v1: i32, v2: i32):
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 8)
; (instruction range: 0 .. 9)
; Inst 0: pushq %rbp
; Inst 1: movq %rsp, %rbp
; Inst 2: testb $1, %dil
; Inst 3: cmovnzl %esi, %edx
; Inst 4: movq %rdx, %rax
; Inst 5: movq %rbp, %rsp
; Inst 6: popq %rbp
; Inst 7: ret
; Inst 3: movl %edx, %edi
; Inst 4: cmovnzl %esi, %edi
; Inst 5: movq %rdi, %rax
; Inst 6: movq %rbp, %rsp
; Inst 7: popq %rbp
; Inst 8: ret
; }}
function %f1(b1) -> i32 {