x64 backend: migrate stores, and remainder of loads (I128 case), to ISLE. (#4069)
This commit is contained in:
@@ -838,6 +838,11 @@
|
||||
(rule (to_amode flags base offset)
|
||||
(amode_imm_reg_flags offset (put_in_gpr base) flags))
|
||||
|
||||
;; Offsetting an Amode. Used when we need to do consecutive
|
||||
;; loads/stores to adjacent addresses.
|
||||
(decl amode_offset (Amode u32) Amode)
|
||||
(extern constructor amode_offset amode_offset)
|
||||
|
||||
;; Shift kinds.
|
||||
|
||||
(type ShiftKind extern
|
||||
@@ -1404,6 +1409,15 @@
|
||||
(rule (x64_pmovzxdq from)
|
||||
(xmm_unary_rm_r (SseOpcode.Pmovzxdq) from))
|
||||
|
||||
(decl x64_movrm (Type SyntheticAmode Gpr) SideEffectNoResult)
|
||||
(rule (x64_movrm ty addr data)
|
||||
(let ((size OperandSize (raw_operand_size_of_type ty)))
|
||||
(SideEffectNoResult.Inst (MInst.MovRM size data addr))))
|
||||
|
||||
(decl x64_xmm_movrm (SseOpcode SyntheticAmode Xmm) SideEffectNoResult)
|
||||
(rule (x64_xmm_movrm op addr data)
|
||||
(SideEffectNoResult.Inst (MInst.XmmMovRM op data addr)))
|
||||
|
||||
;; Load a constant into an XMM register.
|
||||
(decl x64_xmm_load_const (Type VCodeConstant) Xmm)
|
||||
(rule (x64_xmm_load_const ty const)
|
||||
|
||||
@@ -2554,6 +2554,15 @@
|
||||
(rule (lower (has_type (ty_vec128 ty) (load flags address offset)))
|
||||
(x64_movdqu (to_amode flags address offset)))
|
||||
|
||||
;; We can load an I128/B128 by doing two 64-bit loads.
|
||||
(rule (lower (has_type (ty_int_bool_128 _)
|
||||
(load flags address offset)))
|
||||
(let ((addr_lo Amode (to_amode flags address offset))
|
||||
(addr_hi Amode (amode_offset addr_lo 8))
|
||||
(value_lo Reg (x64_mov addr_lo))
|
||||
(value_hi Reg (x64_mov addr_hi)))
|
||||
(value_regs value_lo value_hi)))
|
||||
|
||||
;; We also include widening vector loads; these sign- or zero-extend each lane
|
||||
;; to the next wider width (e.g., 16x4 -> 32x4).
|
||||
(rule (lower (has_type $I16X8 (sload8x8 flags address offset)))
|
||||
@@ -2570,3 +2579,79 @@
|
||||
(x64_pmovzxdq (to_amode flags address offset)))
|
||||
|
||||
;; TODO: Multi-register loads (I128)
|
||||
|
||||
;; Rules for `store*` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; 8-, 16-, 32- and 64-bit GPR stores.
|
||||
(rule (lower (store flags
|
||||
value @ (value_type (is_gpr_type ty))
|
||||
address
|
||||
offset))
|
||||
(side_effect
|
||||
(x64_movrm ty (to_amode flags address offset) value)))
|
||||
|
||||
;; Explicit 8/16/32-bit opcodes.
|
||||
(rule (lower (istore8 flags value address offset))
|
||||
(side_effect
|
||||
(x64_movrm $I8 (to_amode flags address offset) value)))
|
||||
(rule (lower (istore16 flags value address offset))
|
||||
(side_effect
|
||||
(x64_movrm $I16 (to_amode flags address offset) value)))
|
||||
(rule (lower (istore32 flags value address offset))
|
||||
(side_effect
|
||||
(x64_movrm $I32 (to_amode flags address offset) value)))
|
||||
|
||||
;; F32 stores of values in XMM registers.
|
||||
(rule (lower (store flags
|
||||
value @ (value_type $F32)
|
||||
address
|
||||
offset))
|
||||
(side_effect
|
||||
(x64_xmm_movrm (SseOpcode.Movss) (to_amode flags address offset) value)))
|
||||
|
||||
;; F64 stores of values in XMM registers.
|
||||
(rule (lower (store flags
|
||||
value @ (value_type $F64)
|
||||
address
|
||||
offset))
|
||||
(side_effect
|
||||
(x64_xmm_movrm (SseOpcode.Movsd) (to_amode flags address offset) value)))
|
||||
|
||||
;; Stores of F32X4 vectors.
|
||||
(rule (lower (store flags
|
||||
value @ (value_type $F32X4)
|
||||
address
|
||||
offset))
|
||||
(side_effect
|
||||
(x64_xmm_movrm (SseOpcode.Movups) (to_amode flags address offset) value)))
|
||||
|
||||
;; Stores of F64X2 vectors.
|
||||
(rule (lower (store flags
|
||||
value @ (value_type $F64X2)
|
||||
address
|
||||
offset))
|
||||
(side_effect
|
||||
(x64_xmm_movrm (SseOpcode.Movupd) (to_amode flags address offset) value)))
|
||||
|
||||
;; Stores of all other 128-bit vector types with integer lanes.
|
||||
(rule (lower (store flags
|
||||
value @ (value_type (ty_vec128_int _))
|
||||
address
|
||||
offset))
|
||||
(side_effect
|
||||
(x64_xmm_movrm (SseOpcode.Movdqu) (to_amode flags address offset) value)))
|
||||
|
||||
;; Stores of I128/B128 values: store the two 64-bit halves separately.
|
||||
(rule (lower (store flags
|
||||
value @ (value_type (ty_int_bool_128 _))
|
||||
address
|
||||
offset))
|
||||
(let ((value_reg ValueRegs value)
|
||||
(value_lo Gpr (value_regs_get_gpr value_reg 0))
|
||||
(value_hi Gpr (value_regs_get_gpr value_reg 1))
|
||||
(addr_lo Amode (to_amode flags address offset))
|
||||
(addr_hi Amode (amode_offset addr_lo 8)))
|
||||
(side_effect
|
||||
(side_effect_concat
|
||||
(x64_movrm $I64 addr_lo value_lo)
|
||||
(x64_movrm $I64 addr_hi value_hi)))))
|
||||
|
||||
@@ -2146,75 +2146,11 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
| Opcode::Uload16x4
|
||||
| Opcode::Sload32x2
|
||||
| Opcode::Uload32x2 => {
|
||||
let offset = ctx.data(insn).load_store_offset().unwrap();
|
||||
|
||||
let elem_ty = match op {
|
||||
Opcode::Sload8 | Opcode::Uload8 => types::I8,
|
||||
Opcode::Sload16 | Opcode::Uload16 => types::I16,
|
||||
Opcode::Sload32 | Opcode::Uload32 => types::I32,
|
||||
Opcode::Sload8x8 | Opcode::Uload8x8 => types::I8X8,
|
||||
Opcode::Sload16x4 | Opcode::Uload16x4 => types::I16X4,
|
||||
Opcode::Sload32x2 | Opcode::Uload32x2 => types::I32X2,
|
||||
Opcode::Load => ctx.output_ty(insn, 0),
|
||||
_ => unimplemented!(),
|
||||
};
|
||||
|
||||
let amode = match op {
|
||||
Opcode::Load
|
||||
| Opcode::Uload8
|
||||
| Opcode::Sload8
|
||||
| Opcode::Uload16
|
||||
| Opcode::Sload16
|
||||
| Opcode::Uload32
|
||||
| Opcode::Sload32
|
||||
| Opcode::Sload8x8
|
||||
| Opcode::Uload8x8
|
||||
| Opcode::Sload16x4
|
||||
| Opcode::Uload16x4
|
||||
| Opcode::Sload32x2
|
||||
| Opcode::Uload32x2 => {
|
||||
assert_eq!(inputs.len(), 1, "only one input for load operands");
|
||||
lower_to_amode(ctx, inputs[0], offset)
|
||||
}
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
if elem_ty == types::I128 {
|
||||
let dsts = get_output_reg(ctx, outputs[0]);
|
||||
ctx.emit(Inst::mov64_m_r(amode.clone(), dsts.regs()[0]));
|
||||
ctx.emit(Inst::mov64_m_r(amode.offset(8), dsts.regs()[1]));
|
||||
} else {
|
||||
implemented_in_isle(ctx);
|
||||
}
|
||||
implemented_in_isle(ctx);
|
||||
}
|
||||
|
||||
Opcode::Store | Opcode::Istore8 | Opcode::Istore16 | Opcode::Istore32 => {
|
||||
let offset = ctx.data(insn).load_store_offset().unwrap();
|
||||
|
||||
let elem_ty = match op {
|
||||
Opcode::Istore8 => types::I8,
|
||||
Opcode::Istore16 => types::I16,
|
||||
Opcode::Istore32 => types::I32,
|
||||
Opcode::Store => ctx.input_ty(insn, 0),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let addr = match op {
|
||||
Opcode::Store | Opcode::Istore8 | Opcode::Istore16 | Opcode::Istore32 => {
|
||||
assert_eq!(inputs.len(), 2, "only one input for store memory operands");
|
||||
lower_to_amode(ctx, inputs[1], offset)
|
||||
}
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
if elem_ty == types::I128 {
|
||||
let srcs = put_input_in_regs(ctx, inputs[0]);
|
||||
ctx.emit(Inst::store(types::I64, srcs.regs()[0], addr.clone()));
|
||||
ctx.emit(Inst::store(types::I64, srcs.regs()[1], addr.offset(8)));
|
||||
} else {
|
||||
let src = put_input_in_reg(ctx, inputs[0]);
|
||||
ctx.emit(Inst::store(elem_ty, src, addr));
|
||||
}
|
||||
implemented_in_isle(ctx);
|
||||
}
|
||||
|
||||
Opcode::AtomicRmw => {
|
||||
|
||||
@@ -544,6 +544,11 @@ where
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn amode_offset(&mut self, addr: &Amode, offset: u32) -> Amode {
|
||||
addr.offset(offset)
|
||||
}
|
||||
}
|
||||
|
||||
// Since x64 doesn't have 8x16 shifts and we must use a 16x8 shift instead, we
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
src/clif.isle 443b34b797fc8ace
|
||||
src/prelude.isle afd037c4d91c875c
|
||||
src/isa/x64/inst.isle cad03431447aca1b
|
||||
src/isa/x64/lower.isle a7181571835ddd1e
|
||||
src/prelude.isle d8a93eb727abd7f4
|
||||
src/isa/x64/inst.isle 2fa48b8183f9d5cb
|
||||
src/isa/x64/lower.isle b7fe1c95c21edbe4
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user