x64 backend: migrate stores, and remainder of loads (I128 case), to ISLE. (#4069)

This commit is contained in:
Chris Fallin
2022-04-26 09:50:46 -07:00
committed by GitHub
parent f384938a10
commit 164bfeaf7e
12 changed files with 792 additions and 406 deletions

View File

@@ -838,6 +838,11 @@
(rule (to_amode flags base offset)
(amode_imm_reg_flags offset (put_in_gpr base) flags))
;; Offsetting an Amode. Used when we need to do consecutive
;; loads/stores to adjacent addresses.
(decl amode_offset (Amode u32) Amode)
(extern constructor amode_offset amode_offset)
;; Shift kinds.
(type ShiftKind extern
@@ -1404,6 +1409,15 @@
(rule (x64_pmovzxdq from)
(xmm_unary_rm_r (SseOpcode.Pmovzxdq) from))
(decl x64_movrm (Type SyntheticAmode Gpr) SideEffectNoResult)
(rule (x64_movrm ty addr data)
(let ((size OperandSize (raw_operand_size_of_type ty)))
(SideEffectNoResult.Inst (MInst.MovRM size data addr))))
(decl x64_xmm_movrm (SseOpcode SyntheticAmode Xmm) SideEffectNoResult)
(rule (x64_xmm_movrm op addr data)
(SideEffectNoResult.Inst (MInst.XmmMovRM op data addr)))
;; Load a constant into an XMM register.
(decl x64_xmm_load_const (Type VCodeConstant) Xmm)
(rule (x64_xmm_load_const ty const)

View File

@@ -2554,6 +2554,15 @@
(rule (lower (has_type (ty_vec128 ty) (load flags address offset)))
(x64_movdqu (to_amode flags address offset)))
;; We can load an I128/B128 by doing two 64-bit loads.
(rule (lower (has_type (ty_int_bool_128 _)
(load flags address offset)))
(let ((addr_lo Amode (to_amode flags address offset))
(addr_hi Amode (amode_offset addr_lo 8))
(value_lo Reg (x64_mov addr_lo))
(value_hi Reg (x64_mov addr_hi)))
(value_regs value_lo value_hi)))
;; We also include widening vector loads; these sign- or zero-extend each lane
;; to the next wider width (e.g., 16x4 -> 32x4).
(rule (lower (has_type $I16X8 (sload8x8 flags address offset)))
@@ -2570,3 +2579,79 @@
(x64_pmovzxdq (to_amode flags address offset)))
;; TODO: Multi-register loads (I128)
;; Rules for `store*` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; 8-, 16-, 32- and 64-bit GPR stores.
(rule (lower (store flags
value @ (value_type (is_gpr_type ty))
address
offset))
(side_effect
(x64_movrm ty (to_amode flags address offset) value)))
;; Explicit 8/16/32-bit opcodes.
(rule (lower (istore8 flags value address offset))
(side_effect
(x64_movrm $I8 (to_amode flags address offset) value)))
(rule (lower (istore16 flags value address offset))
(side_effect
(x64_movrm $I16 (to_amode flags address offset) value)))
(rule (lower (istore32 flags value address offset))
(side_effect
(x64_movrm $I32 (to_amode flags address offset) value)))
;; F32 stores of values in XMM registers.
(rule (lower (store flags
value @ (value_type $F32)
address
offset))
(side_effect
(x64_xmm_movrm (SseOpcode.Movss) (to_amode flags address offset) value)))
;; F64 stores of values in XMM registers.
(rule (lower (store flags
value @ (value_type $F64)
address
offset))
(side_effect
(x64_xmm_movrm (SseOpcode.Movsd) (to_amode flags address offset) value)))
;; Stores of F32X4 vectors.
(rule (lower (store flags
value @ (value_type $F32X4)
address
offset))
(side_effect
(x64_xmm_movrm (SseOpcode.Movups) (to_amode flags address offset) value)))
;; Stores of F64X2 vectors.
(rule (lower (store flags
value @ (value_type $F64X2)
address
offset))
(side_effect
(x64_xmm_movrm (SseOpcode.Movupd) (to_amode flags address offset) value)))
;; Stores of all other 128-bit vector types with integer lanes.
(rule (lower (store flags
value @ (value_type (ty_vec128_int _))
address
offset))
(side_effect
(x64_xmm_movrm (SseOpcode.Movdqu) (to_amode flags address offset) value)))
;; Stores of I128/B128 values: store the two 64-bit halves separately.
(rule (lower (store flags
value @ (value_type (ty_int_bool_128 _))
address
offset))
(let ((value_reg ValueRegs value)
(value_lo Gpr (value_regs_get_gpr value_reg 0))
(value_hi Gpr (value_regs_get_gpr value_reg 1))
(addr_lo Amode (to_amode flags address offset))
(addr_hi Amode (amode_offset addr_lo 8)))
(side_effect
(side_effect_concat
(x64_movrm $I64 addr_lo value_lo)
(x64_movrm $I64 addr_hi value_hi)))))

View File

@@ -2146,75 +2146,11 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::Uload16x4
| Opcode::Sload32x2
| Opcode::Uload32x2 => {
let offset = ctx.data(insn).load_store_offset().unwrap();
let elem_ty = match op {
Opcode::Sload8 | Opcode::Uload8 => types::I8,
Opcode::Sload16 | Opcode::Uload16 => types::I16,
Opcode::Sload32 | Opcode::Uload32 => types::I32,
Opcode::Sload8x8 | Opcode::Uload8x8 => types::I8X8,
Opcode::Sload16x4 | Opcode::Uload16x4 => types::I16X4,
Opcode::Sload32x2 | Opcode::Uload32x2 => types::I32X2,
Opcode::Load => ctx.output_ty(insn, 0),
_ => unimplemented!(),
};
let amode = match op {
Opcode::Load
| Opcode::Uload8
| Opcode::Sload8
| Opcode::Uload16
| Opcode::Sload16
| Opcode::Uload32
| Opcode::Sload32
| Opcode::Sload8x8
| Opcode::Uload8x8
| Opcode::Sload16x4
| Opcode::Uload16x4
| Opcode::Sload32x2
| Opcode::Uload32x2 => {
assert_eq!(inputs.len(), 1, "only one input for load operands");
lower_to_amode(ctx, inputs[0], offset)
}
_ => unreachable!(),
};
if elem_ty == types::I128 {
let dsts = get_output_reg(ctx, outputs[0]);
ctx.emit(Inst::mov64_m_r(amode.clone(), dsts.regs()[0]));
ctx.emit(Inst::mov64_m_r(amode.offset(8), dsts.regs()[1]));
} else {
implemented_in_isle(ctx);
}
implemented_in_isle(ctx);
}
Opcode::Store | Opcode::Istore8 | Opcode::Istore16 | Opcode::Istore32 => {
let offset = ctx.data(insn).load_store_offset().unwrap();
let elem_ty = match op {
Opcode::Istore8 => types::I8,
Opcode::Istore16 => types::I16,
Opcode::Istore32 => types::I32,
Opcode::Store => ctx.input_ty(insn, 0),
_ => unreachable!(),
};
let addr = match op {
Opcode::Store | Opcode::Istore8 | Opcode::Istore16 | Opcode::Istore32 => {
assert_eq!(inputs.len(), 2, "only one input for store memory operands");
lower_to_amode(ctx, inputs[1], offset)
}
_ => unreachable!(),
};
if elem_ty == types::I128 {
let srcs = put_input_in_regs(ctx, inputs[0]);
ctx.emit(Inst::store(types::I64, srcs.regs()[0], addr.clone()));
ctx.emit(Inst::store(types::I64, srcs.regs()[1], addr.offset(8)));
} else {
let src = put_input_in_reg(ctx, inputs[0]);
ctx.emit(Inst::store(elem_ty, src, addr));
}
implemented_in_isle(ctx);
}
Opcode::AtomicRmw => {

View File

@@ -544,6 +544,11 @@ where
None
}
}
#[inline]
fn amode_offset(&mut self, addr: &Amode, offset: u32) -> Amode {
addr.offset(offset)
}
}
// Since x64 doesn't have 8x16 shifts and we must use a 16x8 shift instead, we

View File

@@ -1,4 +1,4 @@
src/clif.isle 443b34b797fc8ace
src/prelude.isle afd037c4d91c875c
src/isa/x64/inst.isle cad03431447aca1b
src/isa/x64/lower.isle a7181571835ddd1e
src/prelude.isle d8a93eb727abd7f4
src/isa/x64/inst.isle 2fa48b8183f9d5cb
src/isa/x64/lower.isle b7fe1c95c21edbe4

File diff suppressed because it is too large Load Diff