x64: port the remainder of select to ISLE (#3973)
Previous changes had ported the difficult "`select` based on an `fcmp`" patterns to ISLE; this completes porting of `select` by moving over the final two kinds of patterns: - `select` based on an `icmp` - `select` based on a value
This commit is contained in:
@@ -1746,6 +1746,43 @@
|
||||
(rule (lower (has_type ty (select (fcmp (FloatCC.NotEqual) a b) x y)))
|
||||
(with_flags (x64_ucomis a b) (cmove_or_from_values ty (CC.NZ) (CC.P) x y)))
|
||||
|
||||
;; We also can lower `select`s that depend on an `icmp` test, but more simply
|
||||
;; than the `fcmp` variants above. In these cases, we lower to a `CMP`
|
||||
;; instruction plus a `CMOV`; recall that `cmove_from_values` here may emit more
|
||||
;; than one instruction for certain types (e.g., XMM-held, I128).
|
||||
|
||||
(rule (lower (has_type ty (select (icmp cc a @ (value_type (fits_in_64 a_ty)) b) x y)))
|
||||
;; N.B.: we force the comparison operators into registers, and disallow
|
||||
;; load-op fusion, because we do not have a transitive guarantee that this
|
||||
;; cmp-site will be the sole user of the value. Consider: the `icmp` might
|
||||
;; be the only user of a load, but there may be multiple users of the
|
||||
;; `icmp` (e.g., `select` or `bint` instructions) that each invoke emit a
|
||||
;; comparison. If we were to allow a load to sink to the *latest* one, but
|
||||
;; other sites did not permit sinking, then we would be missing the load
|
||||
;; for other cmp-sites. TODO:
|
||||
;; https://github.com/bytecodealliance/wasmtime/issues/3953.
|
||||
(let ((gpr_a Gpr (put_in_gpr a))
|
||||
(gpr_b Gpr (put_in_gpr b))
|
||||
(size OperandSize (raw_operand_size_of_type a_ty)))
|
||||
(with_flags (x64_cmp size gpr_b gpr_a) (cmove_from_values ty cc x y))))
|
||||
|
||||
;; Finally, we lower `select` from a condition value `c`. These rules are meant
|
||||
;; to be the final, default lowerings if no other patterns matched above.
|
||||
|
||||
(rule (lower (has_type ty (select c @ (value_type $B1) x y)))
|
||||
(let ((size OperandSize (raw_operand_size_of_type $B1))
|
||||
;; N.B.: disallow load-op fusion, see above. TODO:
|
||||
;; https://github.com/bytecodealliance/wasmtime/issues/3953.
|
||||
(gpr_c Gpr (put_in_gpr c)))
|
||||
(with_flags (x64_test size (RegMemImm.Imm 1) gpr_c) (cmove_from_values ty (CC.NZ) x y))))
|
||||
|
||||
(rule (lower (has_type ty (select c @ (value_type (fits_in_64 a_ty)) x y)))
|
||||
(let ((size OperandSize (raw_operand_size_of_type a_ty))
|
||||
;; N.B.: disallow load-op fusion, see above. TODO:
|
||||
;; https://github.com/bytecodealliance/wasmtime/issues/3953.
|
||||
(gpr_c Gpr (put_in_gpr c)))
|
||||
(with_flags (x64_test size gpr_c gpr_c) (cmove_from_values ty (CC.NZ) x y))))
|
||||
|
||||
;; Rules for `clz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; If available, we can use a plain lzcnt instruction here. Note no
|
||||
|
||||
@@ -2569,67 +2569,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
}
|
||||
|
||||
Opcode::Select => {
|
||||
let flag_input = inputs[0];
|
||||
if let Some(_) = matches_input(ctx, flag_input, Opcode::Fcmp) {
|
||||
implemented_in_isle(ctx);
|
||||
} else {
|
||||
let ty = ty.unwrap();
|
||||
|
||||
let size = ty.bytes() as u8;
|
||||
let lhs = put_input_in_regs(ctx, inputs[1]);
|
||||
let rhs = put_input_in_regs(ctx, inputs[2]);
|
||||
let dst = get_output_reg(ctx, outputs[0]);
|
||||
|
||||
let cc = if let Some(icmp) = matches_input(ctx, flag_input, Opcode::Icmp) {
|
||||
let cond_code = ctx.data(icmp).cond_code().unwrap();
|
||||
let cond_code = emit_cmp(ctx, icmp, cond_code);
|
||||
CC::from_intcc(cond_code)
|
||||
} else {
|
||||
let sel_ty = ctx.input_ty(insn, 0);
|
||||
let size = OperandSize::from_ty(ctx.input_ty(insn, 0));
|
||||
let test = put_input_in_reg(ctx, flag_input);
|
||||
let test_input = if sel_ty == types::B1 {
|
||||
// The input is a boolean value; test the LSB for nonzero with:
|
||||
// test reg, 1
|
||||
RegMemImm::imm(1)
|
||||
} else {
|
||||
// The input is an integer; test the whole value for
|
||||
// nonzero with:
|
||||
// test reg, reg
|
||||
//
|
||||
// (It doesn't make sense to have a boolean wider than
|
||||
// one bit here -- which bit would cause us to select an
|
||||
// input?)
|
||||
assert!(!is_bool_ty(sel_ty));
|
||||
RegMemImm::reg(test)
|
||||
};
|
||||
ctx.emit(Inst::test_rmi_r(size, test_input, test));
|
||||
CC::NZ
|
||||
};
|
||||
|
||||
// This doesn't affect the flags.
|
||||
emit_moves(ctx, dst, rhs, ty);
|
||||
|
||||
if is_int_or_ref_ty(ty) || ty == types::I128 {
|
||||
emit_cmoves(ctx, size, cc, lhs, dst);
|
||||
} else {
|
||||
debug_assert!(
|
||||
ty == types::F32
|
||||
|| ty == types::F64
|
||||
|| (ty.is_vector() && ty.bits() == 128)
|
||||
);
|
||||
ctx.emit(Inst::xmm_cmove(
|
||||
if ty == types::F64 {
|
||||
OperandSize::Size64
|
||||
} else {
|
||||
OperandSize::Size32
|
||||
},
|
||||
cc,
|
||||
RegMem::reg(lhs.only_reg().unwrap()),
|
||||
dst.only_reg().unwrap(),
|
||||
));
|
||||
}
|
||||
}
|
||||
implemented_in_isle(ctx);
|
||||
}
|
||||
|
||||
Opcode::Selectif | Opcode::SelectifSpectreGuard => {
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
src/clif.isle 9ea75a6f790b5c03
|
||||
src/prelude.isle 74d9514ac948e163
|
||||
src/isa/x64/inst.isle a002d62dcfce285
|
||||
src/isa/x64/lower.isle d8facef52a4e2ac6
|
||||
src/isa/x64/lower.isle 8f3e1ed2929fd07e
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -11,15 +11,16 @@ block0(v0: b1, v1: i32, v2: i32):
|
||||
; Entry block: 0
|
||||
; Block 0:
|
||||
; (original IR block: block0)
|
||||
; (instruction range: 0 .. 8)
|
||||
; (instruction range: 0 .. 9)
|
||||
; Inst 0: pushq %rbp
|
||||
; Inst 1: movq %rsp, %rbp
|
||||
; Inst 2: testb $1, %dil
|
||||
; Inst 3: cmovnzl %esi, %edx
|
||||
; Inst 4: movq %rdx, %rax
|
||||
; Inst 5: movq %rbp, %rsp
|
||||
; Inst 6: popq %rbp
|
||||
; Inst 7: ret
|
||||
; Inst 3: movl %edx, %edi
|
||||
; Inst 4: cmovnzl %esi, %edi
|
||||
; Inst 5: movq %rdi, %rax
|
||||
; Inst 6: movq %rbp, %rsp
|
||||
; Inst 7: popq %rbp
|
||||
; Inst 8: ret
|
||||
; }}
|
||||
|
||||
function %f1(b1) -> i32 {
|
||||
|
||||
Reference in New Issue
Block a user