x64: port load to ISLE (#3993)

This change moves the majority of the lowerings for CLIF's `load`
instruction over to ISLE. To do so, it also migrates the previous
mechanism for creating an `Amode` (`lower_to_amode`) to several ISLE
rules (see `to_amode`).
This commit is contained in:
Andrew Brown
2022-04-07 18:31:22 -07:00
committed by GitHub
parent 76f7cde673
commit f62199da8c
12 changed files with 1726 additions and 806 deletions

View File

@@ -1,4 +1,4 @@
src/clif.isle 443b34b797fc8ace
src/prelude.isle 74d9514ac948e163
src/prelude.isle c0751050a11e2686
src/isa/aarch64/inst.isle 19ccefb6a496d392
src/isa/aarch64/lower.isle d88b62dd6b40622

View File

@@ -55,6 +55,7 @@ pub trait Context {
fn ty_32_or_64(&mut self, arg0: Type) -> Option<Type>;
fn ty_8_or_16(&mut self, arg0: Type) -> Option<Type>;
fn ty_int_bool_64(&mut self, arg0: Type) -> Option<Type>;
fn ty_int_bool_ref_64(&mut self, arg0: Type) -> Option<Type>;
fn ty_int_bool_128(&mut self, arg0: Type) -> Option<Type>;
fn ty_scalar_float(&mut self, arg0: Type) -> Option<Type>;
fn ty_vec128(&mut self, arg0: Type) -> Option<Type>;
@@ -76,6 +77,7 @@ pub trait Context {
fn value_type(&mut self, arg0: Value) -> Type;
fn multi_lane(&mut self, arg0: Type) -> Option<(u8, u16)>;
fn def_inst(&mut self, arg0: Value) -> Option<Inst>;
fn offset32_to_u32(&mut self, arg0: Offset32) -> u32;
fn emit(&mut self, arg0: &MInst) -> Unit;
fn emit_safepoint(&mut self, arg0: &MInst) -> Unit;
fn trap_code_division_by_zero(&mut self) -> TrapCode;
@@ -130,13 +132,13 @@ pub trait Context {
fn rotr_opposite_amount(&mut self, arg0: Type, arg1: ImmShift) -> ImmShift;
}
/// Internal type SideEffectNoResult: defined at src/prelude.isle line 397.
/// Internal type SideEffectNoResult: defined at src/prelude.isle line 405.
#[derive(Clone, Debug)]
pub enum SideEffectNoResult {
Inst { inst: MInst },
}
/// Internal type ProducesFlags: defined at src/prelude.isle line 419.
/// Internal type ProducesFlags: defined at src/prelude.isle line 427.
#[derive(Clone, Debug)]
pub enum ProducesFlags {
ProducesFlagsSideEffect { inst: MInst },
@@ -144,7 +146,7 @@ pub enum ProducesFlags {
ProducesFlagsReturnsResultWithConsumer { inst: MInst, result: Reg },
}
/// Internal type ConsumesFlags: defined at src/prelude.isle line 430.
/// Internal type ConsumesFlags: defined at src/prelude.isle line 438.
#[derive(Clone, Debug)]
pub enum ConsumesFlags {
ConsumesFlagsReturnsResultWithProducer {
@@ -1086,7 +1088,7 @@ pub fn constructor_side_effect<C: Context>(
inst: ref pattern1_0,
} = pattern0_0
{
// Rule at src/prelude.isle line 402.
// Rule at src/prelude.isle line 410.
let expr0_0 = C::emit(ctx, pattern1_0);
let expr1_0 = C::output_none(ctx);
return Some(expr1_0);
@@ -1104,7 +1106,7 @@ pub fn constructor_safepoint<C: Context>(
inst: ref pattern1_0,
} = pattern0_0
{
// Rule at src/prelude.isle line 408.
// Rule at src/prelude.isle line 416.
let expr0_0 = C::emit_safepoint(ctx, pattern1_0);
let expr1_0 = C::output_none(ctx);
return Some(expr1_0);
@@ -1123,7 +1125,7 @@ pub fn constructor_produces_flags_get_reg<C: Context>(
result: pattern1_1,
} = pattern0_0
{
// Rule at src/prelude.isle line 446.
// Rule at src/prelude.isle line 454.
return Some(pattern1_1);
}
return None;
@@ -1140,7 +1142,7 @@ pub fn constructor_produces_flags_ignore<C: Context>(
inst: ref pattern1_0,
result: pattern1_1,
} => {
// Rule at src/prelude.isle line 451.
// Rule at src/prelude.isle line 459.
let expr0_0 = ProducesFlags::ProducesFlagsSideEffect {
inst: pattern1_0.clone(),
};
@@ -1150,7 +1152,7 @@ pub fn constructor_produces_flags_ignore<C: Context>(
inst: ref pattern1_0,
result: pattern1_1,
} => {
// Rule at src/prelude.isle line 453.
// Rule at src/prelude.isle line 461.
let expr0_0 = ProducesFlags::ProducesFlagsSideEffect {
inst: pattern1_0.clone(),
};
@@ -1179,7 +1181,7 @@ pub fn constructor_consumes_flags_concat<C: Context>(
result: pattern3_1,
} = pattern2_0
{
// Rule at src/prelude.isle line 460.
// Rule at src/prelude.isle line 468.
let expr0_0 = C::value_regs(ctx, pattern1_1, pattern3_1);
let expr1_0 = ConsumesFlags::ConsumesFlagsTwiceReturnsValueRegs {
inst1: pattern1_0.clone(),
@@ -1209,7 +1211,7 @@ pub fn constructor_with_flags<C: Context>(
inst: ref pattern3_0,
result: pattern3_1,
} => {
// Rule at src/prelude.isle line 485.
// Rule at src/prelude.isle line 493.
let expr0_0 = C::emit(ctx, pattern1_0);
let expr1_0 = C::emit(ctx, pattern3_0);
let expr2_0 = C::value_reg(ctx, pattern3_1);
@@ -1220,7 +1222,7 @@ pub fn constructor_with_flags<C: Context>(
inst2: ref pattern3_1,
result: pattern3_2,
} => {
// Rule at src/prelude.isle line 491.
// Rule at src/prelude.isle line 499.
let expr0_0 = C::emit(ctx, pattern1_0);
let expr1_0 = C::emit(ctx, pattern3_0);
let expr2_0 = C::emit(ctx, pattern3_1);
@@ -1233,7 +1235,7 @@ pub fn constructor_with_flags<C: Context>(
inst4: ref pattern3_3,
result: pattern3_4,
} => {
// Rule at src/prelude.isle line 503.
// Rule at src/prelude.isle line 511.
let expr0_0 = C::emit(ctx, pattern1_0);
let expr1_0 = C::emit(ctx, pattern3_0);
let expr2_0 = C::emit(ctx, pattern3_1);
@@ -1254,7 +1256,7 @@ pub fn constructor_with_flags<C: Context>(
result: pattern3_1,
} = pattern2_0
{
// Rule at src/prelude.isle line 479.
// Rule at src/prelude.isle line 487.
let expr0_0 = C::emit(ctx, pattern1_0);
let expr1_0 = C::emit(ctx, pattern3_0);
let expr2_0 = C::value_regs(ctx, pattern1_1, pattern3_1);
@@ -1274,7 +1276,7 @@ pub fn constructor_with_flags_reg<C: Context>(
) -> Option<Reg> {
let pattern0_0 = arg0;
let pattern1_0 = arg1;
// Rule at src/prelude.isle line 520.
// Rule at src/prelude.isle line 528.
let expr0_0 = constructor_with_flags(ctx, pattern0_0, pattern1_0)?;
let expr1_0: usize = 0;
let expr2_0 = C::value_regs_get(ctx, expr0_0, expr1_0);

View File

@@ -1,4 +1,4 @@
src/clif.isle 443b34b797fc8ace
src/prelude.isle 74d9514ac948e163
src/prelude.isle c0751050a11e2686
src/isa/s390x/inst.isle d91a16074ab186a8
src/isa/s390x/lower.isle 1cc5a12adc8c75f9

View File

@@ -55,6 +55,7 @@ pub trait Context {
fn ty_32_or_64(&mut self, arg0: Type) -> Option<Type>;
fn ty_8_or_16(&mut self, arg0: Type) -> Option<Type>;
fn ty_int_bool_64(&mut self, arg0: Type) -> Option<Type>;
fn ty_int_bool_ref_64(&mut self, arg0: Type) -> Option<Type>;
fn ty_int_bool_128(&mut self, arg0: Type) -> Option<Type>;
fn ty_scalar_float(&mut self, arg0: Type) -> Option<Type>;
fn ty_vec128(&mut self, arg0: Type) -> Option<Type>;
@@ -76,6 +77,7 @@ pub trait Context {
fn value_type(&mut self, arg0: Value) -> Type;
fn multi_lane(&mut self, arg0: Type) -> Option<(u8, u16)>;
fn def_inst(&mut self, arg0: Value) -> Option<Inst>;
fn offset32_to_u32(&mut self, arg0: Offset32) -> u32;
fn emit(&mut self, arg0: &MInst) -> Unit;
fn emit_safepoint(&mut self, arg0: &MInst) -> Unit;
fn trap_code_division_by_zero(&mut self) -> TrapCode;
@@ -153,13 +155,13 @@ pub trait Context {
fn same_reg(&mut self, arg0: Reg, arg1: WritableReg) -> Option<()>;
}
/// Internal type SideEffectNoResult: defined at src/prelude.isle line 397.
/// Internal type SideEffectNoResult: defined at src/prelude.isle line 405.
#[derive(Clone, Debug)]
pub enum SideEffectNoResult {
Inst { inst: MInst },
}
/// Internal type ProducesFlags: defined at src/prelude.isle line 419.
/// Internal type ProducesFlags: defined at src/prelude.isle line 427.
#[derive(Clone, Debug)]
pub enum ProducesFlags {
ProducesFlagsSideEffect { inst: MInst },
@@ -167,7 +169,7 @@ pub enum ProducesFlags {
ProducesFlagsReturnsResultWithConsumer { inst: MInst, result: Reg },
}
/// Internal type ConsumesFlags: defined at src/prelude.isle line 430.
/// Internal type ConsumesFlags: defined at src/prelude.isle line 438.
#[derive(Clone, Debug)]
pub enum ConsumesFlags {
ConsumesFlagsReturnsResultWithProducer {
@@ -957,7 +959,7 @@ pub fn constructor_side_effect<C: Context>(
inst: ref pattern1_0,
} = pattern0_0
{
// Rule at src/prelude.isle line 402.
// Rule at src/prelude.isle line 410.
let expr0_0 = C::emit(ctx, pattern1_0);
let expr1_0 = C::output_none(ctx);
return Some(expr1_0);
@@ -975,7 +977,7 @@ pub fn constructor_safepoint<C: Context>(
inst: ref pattern1_0,
} = pattern0_0
{
// Rule at src/prelude.isle line 408.
// Rule at src/prelude.isle line 416.
let expr0_0 = C::emit_safepoint(ctx, pattern1_0);
let expr1_0 = C::output_none(ctx);
return Some(expr1_0);
@@ -994,7 +996,7 @@ pub fn constructor_produces_flags_get_reg<C: Context>(
result: pattern1_1,
} = pattern0_0
{
// Rule at src/prelude.isle line 446.
// Rule at src/prelude.isle line 454.
return Some(pattern1_1);
}
return None;
@@ -1011,7 +1013,7 @@ pub fn constructor_produces_flags_ignore<C: Context>(
inst: ref pattern1_0,
result: pattern1_1,
} => {
// Rule at src/prelude.isle line 451.
// Rule at src/prelude.isle line 459.
let expr0_0 = ProducesFlags::ProducesFlagsSideEffect {
inst: pattern1_0.clone(),
};
@@ -1021,7 +1023,7 @@ pub fn constructor_produces_flags_ignore<C: Context>(
inst: ref pattern1_0,
result: pattern1_1,
} => {
// Rule at src/prelude.isle line 453.
// Rule at src/prelude.isle line 461.
let expr0_0 = ProducesFlags::ProducesFlagsSideEffect {
inst: pattern1_0.clone(),
};
@@ -1050,7 +1052,7 @@ pub fn constructor_consumes_flags_concat<C: Context>(
result: pattern3_1,
} = pattern2_0
{
// Rule at src/prelude.isle line 460.
// Rule at src/prelude.isle line 468.
let expr0_0 = C::value_regs(ctx, pattern1_1, pattern3_1);
let expr1_0 = ConsumesFlags::ConsumesFlagsTwiceReturnsValueRegs {
inst1: pattern1_0.clone(),
@@ -1080,7 +1082,7 @@ pub fn constructor_with_flags<C: Context>(
inst: ref pattern3_0,
result: pattern3_1,
} => {
// Rule at src/prelude.isle line 485.
// Rule at src/prelude.isle line 493.
let expr0_0 = C::emit(ctx, pattern1_0);
let expr1_0 = C::emit(ctx, pattern3_0);
let expr2_0 = C::value_reg(ctx, pattern3_1);
@@ -1091,7 +1093,7 @@ pub fn constructor_with_flags<C: Context>(
inst2: ref pattern3_1,
result: pattern3_2,
} => {
// Rule at src/prelude.isle line 491.
// Rule at src/prelude.isle line 499.
let expr0_0 = C::emit(ctx, pattern1_0);
let expr1_0 = C::emit(ctx, pattern3_0);
let expr2_0 = C::emit(ctx, pattern3_1);
@@ -1104,7 +1106,7 @@ pub fn constructor_with_flags<C: Context>(
inst4: ref pattern3_3,
result: pattern3_4,
} => {
// Rule at src/prelude.isle line 503.
// Rule at src/prelude.isle line 511.
let expr0_0 = C::emit(ctx, pattern1_0);
let expr1_0 = C::emit(ctx, pattern3_0);
let expr2_0 = C::emit(ctx, pattern3_1);
@@ -1125,7 +1127,7 @@ pub fn constructor_with_flags<C: Context>(
result: pattern3_1,
} = pattern2_0
{
// Rule at src/prelude.isle line 479.
// Rule at src/prelude.isle line 487.
let expr0_0 = C::emit(ctx, pattern1_0);
let expr1_0 = C::emit(ctx, pattern3_0);
let expr2_0 = C::value_regs(ctx, pattern1_1, pattern3_1);
@@ -1145,7 +1147,7 @@ pub fn constructor_with_flags_reg<C: Context>(
) -> Option<Reg> {
let pattern0_0 = arg0;
let pattern1_0 = arg1;
// Rule at src/prelude.isle line 520.
// Rule at src/prelude.isle line 528.
let expr0_0 = constructor_with_flags(ctx, pattern0_0, pattern1_0)?;
let expr1_0: usize = 0;
let expr2_0 = C::value_regs_get(ctx, expr0_0, expr1_0);

View File

@@ -748,18 +748,96 @@
(decl put_in_reg_mem (Value) RegMem)
(extern constructor put_in_reg_mem put_in_reg_mem)
;; Addressing modes.
(type SyntheticAmode extern (enum))
(decl synthetic_amode_to_reg_mem (SyntheticAmode) RegMem)
(extern constructor synthetic_amode_to_reg_mem synthetic_amode_to_reg_mem)
(decl amode_to_synthetic_amode (Amode) SyntheticAmode)
(extern constructor amode_to_synthetic_amode amode_to_synthetic_amode)
(type Amode extern (enum))
(decl amode_with_flags (Amode MemFlags) Amode)
(extern constructor amode_with_flags amode_with_flags)
(decl amode_imm_reg (u32 Gpr) Amode)
(extern constructor amode_imm_reg amode_imm_reg)
(decl amode_imm_reg_flags (u32 Gpr MemFlags) Amode)
(rule (amode_imm_reg_flags offset base flags)
(amode_with_flags (amode_imm_reg offset base) flags))
(decl amode_imm_reg_reg_shift (u32 Gpr Gpr u8) Amode)
(extern constructor amode_imm_reg_reg_shift amode_imm_reg_reg_shift)
(decl amode_to_synthetic_amode (Amode) SyntheticAmode)
(extern constructor amode_to_synthetic_amode amode_to_synthetic_amode)
(decl amode_imm_reg_reg_shift_flags (u32 Gpr Gpr u8 MemFlags) Amode)
(rule (amode_imm_reg_reg_shift_flags offset base index shift flags)
(amode_with_flags (amode_imm_reg_reg_shift offset base index shift) flags))
;; A helper to check if a shift amount (the `Value`) is both constant and
;; less-than or equal to 3; this is needed since x64 can only shift addresses
;; using two bits.
(decl const_shift_lt_eq_3 (u8) Value)
(extern extractor const_shift_lt_eq_3 const_shift_lt_eq_3 )
;; A helper to both check that the `Imm64` and `Offset32` values sum to less
;; than 32-bits AND return this summed `u32` value. Also, the `Imm64` will be
;; zero-extended from `Type` up to 64 bits. This is useful for `to_amode`.
(decl sum_extend_fits_in_32_bits (Type Imm64 u32) Offset32)
(extern extractor sum_extend_fits_in_32_bits sum_extend_fits_in_32_bits (in in out))
;; To generate an address for a memory access, we can pattern-match various CLIF
;; sub-trees to x64's complex addressing modes (`Amode`). In pseudo-code:
;;
;; if address matches iadd(a, b):
;; if either a or b:
;; matches (ishl c with shift amount <= 3):
;; amode(base + offset + (c << amount))
;; matches (iconst c where c + offset will fit in 32 bits):
;; amode(base + eval(c + offset))
;; matches (uextend (iconst c) where c + offset will fit in 32 bits):
;; amode(base + eval(c + offset))
;; else:
;; amode(a + offset + (b << 0))
;; else:
;; amode(base + offset)
;;
;; The rules for `to_amode` correspond to a subset of the possible addressing
;; modes available by tweaking the SIB byte, the MOD bits, and the size of the
;; displacement (i.e., offset). More information is available in Intel's
;; Software Developer's Manual, volume 2, section 2.1.5, "Addressing-Mode
;; Encoding of ModR/M and SIB Bytes."
(decl to_amode (MemFlags Value Offset32) Amode)
;; ...matches (ishl c ...)
(rule (to_amode flags (iadd (ishl src (const_shift_lt_eq_3 amt)) base) offset)
(amode_imm_reg_reg_shift_flags offset (put_in_gpr base) (put_in_gpr src) amt flags))
(rule (to_amode flags (iadd base (ishl src (const_shift_lt_eq_3 amt))) offset)
(amode_imm_reg_reg_shift_flags offset (put_in_gpr base) (put_in_gpr src) amt flags))
;; ...matches (iconst c ...); note how this matching pattern uses an in-out
;; extractor to check that the offset and constant value (`c`, the in
;; parameter), when summed will fit into x64's 32-bit displacement, returned as
;; `sum` (the out parameter). The syntax for this could be improved (TODO).
(rule (to_amode flags (iadd (iconst c) base) _offset @ (sum_extend_fits_in_32_bits <$I64 <c sum))
(amode_imm_reg_flags sum (put_in_gpr base) flags))
(rule (to_amode flags (iadd base (iconst c)) _offset @ (sum_extend_fits_in_32_bits <$I64 <c sum))
(amode_imm_reg_flags sum (put_in_gpr base) flags))
;; ...matches (uextend(iconst c) ...); see notes above.
(rule (to_amode flags (iadd (has_type ty (uextend (iconst c))) base) _offset @ (sum_extend_fits_in_32_bits <ty <c sum))
(amode_imm_reg_flags sum (put_in_gpr base) flags))
(rule (to_amode flags (iadd base (has_type ty (uextend (iconst c)))) _offset @ (sum_extend_fits_in_32_bits <ty <c sum))
(amode_imm_reg_flags sum (put_in_gpr base) flags))
;; ...else only matches (iadd(a b))
(rule (to_amode flags (iadd base index) offset)
(amode_imm_reg_reg_shift_flags offset (put_in_gpr base) (put_in_gpr index) 0 flags))
;; ...else
(rule (to_amode flags base offset)
(amode_imm_reg_flags offset (put_in_gpr base) flags))
;; Shift kinds.
(type ShiftKind extern
(enum ShiftLeft
@@ -1152,11 +1230,11 @@
;; Zero extending uses `movzx`.
(rule (extend (ExtendKind.Zero) ty mode src)
(x64_movzx ty mode src))
(x64_movzx mode src))
;; Sign extending uses `movsx`.
(rule (extend (ExtendKind.Sign) ty mode src)
(x64_movsx ty mode src))
(x64_movsx mode src))
;;;; Helpers for Working SSE tidbits ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -1235,8 +1313,7 @@
(decl x64_load (Type SyntheticAmode ExtKind) Reg)
(rule (x64_load (fits_in_32 ty) addr (ExtKind.SignExtend))
(x64_movsx ty
(ext_mode (ty_bytes ty) 8)
(x64_movsx (ext_mode (ty_bytes ty) 8)
addr))
(rule (x64_load $I64 addr _ext_kind)
@@ -1264,6 +1341,68 @@
(xmm_unary_rm_r (SseOpcode.Movdqu)
addr))
(decl x64_mov (Amode) Reg)
(rule (x64_mov addr)
(let ((dst WritableGpr (temp_writable_gpr))
(_ Unit (emit (MInst.Mov64MR addr dst))))
dst))
(decl x64_movzx (ExtMode GprMem) Gpr)
(rule (x64_movzx mode src)
(let ((dst WritableGpr (temp_writable_gpr))
(_ Unit (emit (MInst.MovzxRmR mode src dst))))
dst))
(decl x64_movsx (ExtMode GprMem) Gpr)
(rule (x64_movsx mode src)
(let ((dst WritableGpr (temp_writable_gpr))
(_ Unit (emit (MInst.MovsxRmR mode src dst))))
dst))
(decl x64_movss_load (XmmMem) Xmm)
(rule (x64_movss_load from)
(xmm_unary_rm_r (SseOpcode.Movss) from))
(decl x64_movsd_load (XmmMem) Xmm)
(rule (x64_movsd_load from)
(xmm_unary_rm_r (SseOpcode.Movsd) from))
(decl x64_movups (XmmMem) Xmm)
(rule (x64_movups from)
(xmm_unary_rm_r (SseOpcode.Movups) from))
(decl x64_movupd (XmmMem) Xmm)
(rule (x64_movupd from)
(xmm_unary_rm_r (SseOpcode.Movupd) from))
(decl x64_movdqu (XmmMem) Xmm)
(rule (x64_movdqu from)
(xmm_unary_rm_r (SseOpcode.Movdqu) from))
(decl x64_pmovsxbw (XmmMem) Xmm)
(rule (x64_pmovsxbw from)
(xmm_unary_rm_r (SseOpcode.Pmovsxbw) from))
(decl x64_pmovzxbw (XmmMem) Xmm)
(rule (x64_pmovzxbw from)
(xmm_unary_rm_r (SseOpcode.Pmovzxbw) from))
(decl x64_pmovsxwd (XmmMem) Xmm)
(rule (x64_pmovsxwd from)
(xmm_unary_rm_r (SseOpcode.Pmovsxwd) from))
(decl x64_pmovzxwd (XmmMem) Xmm)
(rule (x64_pmovzxwd from)
(xmm_unary_rm_r (SseOpcode.Pmovzxwd) from))
(decl x64_pmovsxdq (XmmMem) Xmm)
(rule (x64_pmovsxdq from)
(xmm_unary_rm_r (SseOpcode.Pmovsxdq) from))
(decl x64_pmovzxdq (XmmMem) Xmm)
(rule (x64_pmovzxdq from)
(xmm_unary_rm_r (SseOpcode.Pmovzxdq) from))
;; Load a constant into an XMM register.
(decl x64_xmm_load_const (Type VCodeConstant) Xmm)
(rule (x64_xmm_load_const ty const)
@@ -1665,20 +1804,6 @@
(MInst.Setcc cc dst)
dst)))
;; Helper for creating `MInst.MovzxRmR` instructions.
(decl x64_movzx (Type ExtMode GprMem) Gpr)
(rule (x64_movzx ty mode src)
(let ((dst WritableGpr (temp_writable_gpr))
(_ Unit (emit (MInst.MovzxRmR mode src dst))))
dst))
;; Helper for creating `MInst.MovsxRmR` instructions.
(decl x64_movsx (Type ExtMode GprMem) Gpr)
(rule (x64_movsx ty mode src)
(let ((dst WritableGpr (temp_writable_gpr))
(_ Unit (emit (MInst.MovsxRmR mode src dst))))
dst))
;; Helper for creating `MInst.XmmRmR` instructions.
(decl xmm_rm_r (Type SseOpcode Xmm XmmMem) Xmm)
(rule (xmm_rm_r ty op src1 src2)
@@ -1995,8 +2120,8 @@
(xmm_rm_r $F64X2 (SseOpcode.Blendvpd) src1 src2)))
;; Helper for creating `movsd` instructions.
(decl x64_movsd (Xmm XmmMem) Xmm)
(rule (x64_movsd src1 src2)
(decl x64_movsd_regmove (Xmm XmmMem) Xmm)
(rule (x64_movsd_regmove src1 src2)
(xmm_rm_r $I8X16 (SseOpcode.Movsd) src1 src2))
;; Helper for creating `movlhps` instructions.
@@ -2191,16 +2316,6 @@
(_ Unit (emit (MInst.XmmUnaryRmR op src dst))))
dst))
;; Helper for creating `pmovsxbw` instructions.
(decl x64_pmovsxbw (XmmMem) Xmm)
(rule (x64_pmovsxbw src)
(xmm_unary_rm_r (SseOpcode.Pmovsxbw) src))
;; Helper for creating `pmovzxbw` instructions.
(decl x64_pmovzxbw (XmmMem) Xmm)
(rule (x64_pmovzxbw src)
(xmm_unary_rm_r (SseOpcode.Pmovzxbw) src))
;; Helper for creating `pabsb` instructions.
(decl x64_pabsb (XmmMem) Xmm)
(rule (x64_pabsb src)
@@ -2582,7 +2697,9 @@
(convert Imm8Reg Imm8Gpr imm8_reg_to_imm8_gpr)
(convert Amode SyntheticAmode amode_to_synthetic_amode)
(convert Amode GprMem amode_to_gpr_mem)
(convert SyntheticAmode GprMem synthetic_amode_to_gpr_mem)
(convert Amode XmmMem amode_to_xmm_mem)
(convert SyntheticAmode XmmMem synthetic_amode_to_xmm_mem)
(convert IntCC CC intcc_to_cc)
@@ -2614,8 +2731,14 @@
(value_reg w_xmm))
(decl synthetic_amode_to_gpr_mem (SyntheticAmode) GprMem)
(decl amode_to_gpr_mem (Amode) GprMem)
(rule (amode_to_gpr_mem amode)
(amode_to_synthetic_amode amode))
(rule (synthetic_amode_to_gpr_mem amode)
(synthetic_amode_to_reg_mem amode))
(decl amode_to_xmm_mem (Amode) XmmMem)
(rule (amode_to_xmm_mem amode)
(amode_to_synthetic_amode amode))
(decl synthetic_amode_to_xmm_mem (SyntheticAmode) XmmMem)
(rule (synthetic_amode_to_xmm_mem amode)
(synthetic_amode_to_reg_mem amode))

View File

@@ -1349,10 +1349,9 @@
;; internally as `xmm_rm_r` will merge the temp register into our `vec`
;; register.
(rule (vec_insert_lane $F64X2 vec (RegMem.Reg val) 0)
(x64_movsd vec val))
(x64_movsd_regmove vec val))
(rule (vec_insert_lane $F64X2 vec mem 0)
(x64_movsd vec (xmm_unary_rm_r (SseOpcode.Movsd)
mem)))
(x64_movsd_regmove vec (x64_movsd_load mem)))
;; f64x2.replace_lane 1
;;
@@ -2506,3 +2505,65 @@
(x64_maxps y x))
(rule (lower (has_type $F64X2 (fmax_pseudo x y)))
(x64_maxpd y x))
;; Rules for `load*` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; In order to load a value from memory to a GPR register, we may need to extend
;; the loaded value from 8-, 16-, or 32-bits to this backend's expected GPR
;; width: 64 bits. Note that `ext_mode` will load 1-bit types (booleans) as
;; 8-bit loads.
;;
;; By default, we zero-extend all sub-64-bit loads to a GPR.
(rule (lower (has_type (and (fits_in_32 ty) (is_gpr_type _)) (load flags address offset)))
(x64_movzx (ext_mode (ty_bits_u16 ty) 64) (to_amode flags address offset)))
;; But if we know that both the `from` and `to` are 64 bits, we simply load with
;; no extension.
(rule (lower (has_type (ty_int_bool_ref_64 ty) (load flags address offset)))
(x64_mov (to_amode flags address offset)))
;; Also, certain scalar loads have a specific `from` width and extension kind
;; (signed -> `sx`, zeroed -> `zx`). We overwrite the high bits of the 64-bit
;; GPR even if the `to` type is smaller (e.g., 16-bits).
(rule (lower (has_type (is_gpr_type ty) (uload8 flags address offset)))
(x64_movzx (ExtMode.BQ) (to_amode flags address offset)))
(rule (lower (has_type (is_gpr_type ty) (sload8 flags address offset)))
(x64_movsx (ExtMode.BQ) (to_amode flags address offset)))
(rule (lower (has_type (is_gpr_type ty) (uload16 flags address offset)))
(x64_movzx (ExtMode.WQ) (to_amode flags address offset)))
(rule (lower (has_type (is_gpr_type ty) (sload16 flags address offset)))
(x64_movsx (ExtMode.WQ) (to_amode flags address offset)))
(rule (lower (has_type (is_gpr_type ty) (uload32 flags address offset)))
(x64_movzx (ExtMode.LQ) (to_amode flags address offset)))
(rule (lower (has_type (is_gpr_type ty) (sload32 flags address offset)))
(x64_movsx (ExtMode.LQ) (to_amode flags address offset)))
;; To load to XMM registers, we use the x64-specific instructions for each type.
;; For `$F32` and `$F64` this is important--we only want to load 32 or 64 bits.
;; But for the 128-bit types, this is not strictly necessary for performance but
;; might help with clarity during disassembly.
(rule (lower (has_type $F32 (load flags address offset)))
(x64_movss_load (to_amode flags address offset)))
(rule (lower (has_type $F64 (load flags address offset)))
(x64_movsd_load (to_amode flags address offset)))
(rule (lower (has_type $F32X4 (load flags address offset)))
(x64_movups (to_amode flags address offset)))
(rule (lower (has_type $F64X2 (load flags address offset)))
(x64_movupd (to_amode flags address offset)))
(rule (lower (has_type (ty_vec128 ty) (load flags address offset)))
(x64_movdqu (to_amode flags address offset)))
;; We also include widening vector loads; these sign- or zero-extend each lane
;; to the next wider width (e.g., 16x4 -> 32x4).
(rule (lower (has_type $I16X8 (sload8x8 flags address offset)))
(x64_pmovsxbw (to_amode flags address offset)))
(rule (lower (has_type $I16X8 (uload8x8 flags address offset)))
(x64_pmovzxbw (to_amode flags address offset)))
(rule (lower (has_type $I32X4 (sload16x4 flags address offset)))
(x64_pmovsxwd (to_amode flags address offset)))
(rule (lower (has_type $I32X4 (uload16x4 flags address offset)))
(x64_pmovzxwd (to_amode flags address offset)))
(rule (lower (has_type $I64X2 (sload32x2 flags address offset)))
(x64_pmovsxdq (to_amode flags address offset)))
(rule (lower (has_type $I64X2 (uload32x2 flags address offset)))
(x64_pmovzxdq (to_amode flags address offset)))
;; TODO: Multi-register loads (I128)

View File

@@ -2192,18 +2192,6 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
_ => unimplemented!(),
};
let ext_mode = ExtMode::new(elem_ty.bits(), 64);
let sign_extend = match op {
Opcode::Sload8
| Opcode::Sload16
| Opcode::Sload32
| Opcode::Sload8x8
| Opcode::Sload16x4
| Opcode::Sload32x2 => true,
_ => false,
};
let amode = match op {
Opcode::Load
| Opcode::Uload8
@@ -2229,60 +2217,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
ctx.emit(Inst::mov64_m_r(amode.clone(), dsts.regs()[0]));
ctx.emit(Inst::mov64_m_r(amode.offset(8), dsts.regs()[1]));
} else {
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let is_xmm = elem_ty.is_float() || elem_ty.is_vector();
match (sign_extend, is_xmm) {
(true, false) => {
// The load is sign-extended only when the output size is lower than 64 bits,
// so ext-mode is defined in this case.
ctx.emit(Inst::movsx_rm_r(ext_mode.unwrap(), RegMem::mem(amode), dst));
}
(false, false) => {
if elem_ty.bytes() == 8 {
// Use a plain load.
ctx.emit(Inst::mov64_m_r(amode, dst))
} else {
// Use a zero-extended load.
ctx.emit(Inst::movzx_rm_r(ext_mode.unwrap(), RegMem::mem(amode), dst))
}
}
(_, true) => {
ctx.emit(match elem_ty {
types::F32 => Inst::xmm_mov(SseOpcode::Movss, RegMem::mem(amode), dst),
types::F64 => Inst::xmm_mov(SseOpcode::Movsd, RegMem::mem(amode), dst),
types::I8X8 => {
if sign_extend == true {
Inst::xmm_mov(SseOpcode::Pmovsxbw, RegMem::mem(amode), dst)
} else {
Inst::xmm_mov(SseOpcode::Pmovzxbw, RegMem::mem(amode), dst)
}
}
types::I16X4 => {
if sign_extend == true {
Inst::xmm_mov(SseOpcode::Pmovsxwd, RegMem::mem(amode), dst)
} else {
Inst::xmm_mov(SseOpcode::Pmovzxwd, RegMem::mem(amode), dst)
}
}
types::I32X2 => {
if sign_extend == true {
Inst::xmm_mov(SseOpcode::Pmovsxdq, RegMem::mem(amode), dst)
} else {
Inst::xmm_mov(SseOpcode::Pmovzxdq, RegMem::mem(amode), dst)
}
}
_ if elem_ty.is_vector() && elem_ty.bits() == 128 => {
Inst::xmm_mov(SseOpcode::Movups, RegMem::mem(amode), dst)
}
// TODO Specialize for different types: MOVUPD, MOVDQU
_ => unreachable!(
"unexpected type for load: {:?} - {:?}",
elem_ty,
elem_ty.bits()
),
});
}
}
implemented_in_isle(ctx);
}
}

View File

@@ -12,7 +12,7 @@ use crate::{
condcodes::{FloatCC, IntCC},
immediates::*,
types::*,
Inst, InstructionData, Opcode, TrapCode, Value, ValueLabel, ValueList,
Inst, InstructionData, MemFlags, Opcode, TrapCode, Value, ValueLabel, ValueList,
},
isa::{
settings::Flags,
@@ -313,11 +313,30 @@ where
Amode::imm_reg_reg_shift(simm32, base, index, shift)
}
#[inline]
fn amode_imm_reg(&mut self, simm32: u32, base: Gpr) -> Amode {
Amode::imm_reg(simm32, base.to_reg())
}
#[inline]
fn amode_with_flags(&mut self, amode: &Amode, flags: MemFlags) -> Amode {
amode.with_flags(flags)
}
#[inline]
fn amode_to_synthetic_amode(&mut self, amode: &Amode) -> SyntheticAmode {
amode.clone().into()
}
#[inline]
fn const_shift_lt_eq_3(&mut self, shift_amount: Value) -> Option<u8> {
let input = self.lower_ctx.get_value_as_source_or_const(shift_amount);
match input.constant {
Some(shift_amount) if shift_amount <= 3 => Some(shift_amount as u8),
_ => None,
}
}
#[inline]
fn writable_gpr_to_reg(&mut self, r: WritableGpr) -> WritableReg {
r.to_writable_reg()
@@ -519,6 +538,28 @@ where
fn intcc_to_cc(&mut self, intcc: &IntCC) -> CC {
CC::from_intcc(*intcc)
}
#[inline]
fn sum_extend_fits_in_32_bits(
&mut self,
offset: Offset32,
extend_from_ty: Type,
constant_value: Imm64,
) -> Option<u32> {
let offset: i64 = offset.into();
let constant_value: u64 = constant_value.bits() as u64;
// If necessary, zero extend `constant_value` up to 64 bits.
let shift = 64 - extend_from_ty.bits();
let zero_extended_constant_value = (constant_value << shift) >> shift;
// Sum up the two operands.
let sum = offset.wrapping_add(zero_extended_constant_value as i64);
// Check that the sum will fit in 32-bits.
if sum == ((sum << 32) >> 32) {
Some(sum as u32)
} else {
None
}
}
}
// Since x64 doesn't have 8x16 shifts and we must use a 16x8 shift instead, we

View File

@@ -1,4 +1,4 @@
src/clif.isle 443b34b797fc8ace
src/prelude.isle 74d9514ac948e163
src/isa/x64/inst.isle a002d62dcfce285
src/isa/x64/lower.isle 8f3e1ed2929fd07e
src/prelude.isle c0751050a11e2686
src/isa/x64/inst.isle c4729db7808ba0b5
src/isa/x64/lower.isle 7e839e6b667bfe77

File diff suppressed because it is too large Load Diff

View File

@@ -245,6 +245,14 @@ macro_rules! isle_prelude_methods {
}
}
#[inline]
fn ty_int_bool_ref_64(&mut self, ty: Type) -> Option<Type> {
match ty {
I64 | B64 | R64 => Some(ty),
_ => None,
}
}
#[inline]
fn ty_int_bool_128(&mut self, ty: Type) -> Option<Type> {
match ty {
@@ -441,6 +449,12 @@ macro_rules! isle_prelude_methods {
fn lane_type(&mut self, ty: Type) -> Type {
ty.lane_type()
}
#[inline]
fn offset32_to_u32(&mut self, offset: Offset32) -> u32 {
let offset: i32 = offset.into();
offset as u32
}
};
}

View File

@@ -265,6 +265,10 @@
(decl ty_int_bool_64 (Type) Type)
(extern extractor ty_int_bool_64 ty_int_bool_64)
;; An extractor that matches I64 or B64 or R64.
(decl ty_int_bool_ref_64 (Type) Type)
(extern extractor ty_int_bool_ref_64 ty_int_bool_ref_64)
;; An extractor that matches I128 or B128.
(decl ty_int_bool_128 (Type) Type)
(extern extractor ty_int_bool_128 ty_int_bool_128)
@@ -379,6 +383,10 @@
(extractor (u64_from_iconst x)
(def_inst (iconst (u64_from_imm64 x))))
;; Convert an `Offset32` to a primitive number.
(decl offset32_to_u32 (Offset32) u32)
(extern constructor offset32_to_u32 offset32_to_u32)
;; Instruction creation helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Emit an instruction.
@@ -564,3 +572,4 @@
(convert ValueRegs InstOutput output)
(convert Reg InstOutput output_reg)
(convert Value InstOutput output_value)
(convert Offset32 u32 offset32_to_u32)