Merge pull request #3690 from fitzgen/a-bunch-more-isle

cranelift: Port a bunch more lowerings to ISLE on x64
This commit is contained in:
Nick Fitzgerald
2022-01-13 18:08:31 -08:00
committed by GitHub
12 changed files with 846 additions and 910 deletions

View File

@@ -1,4 +1,4 @@
src/clif.isle f176ef3bba99365 src/clif.isle f176ef3bba99365
src/prelude.isle 7b911d3b894ae17 src/prelude.isle 22dd5ff133398960
src/isa/aarch64/inst.isle 5fa80451697b084f src/isa/aarch64/inst.isle 5fa80451697b084f
src/isa/aarch64/lower.isle 2d2e1e076a0c8a23 src/isa/aarch64/lower.isle 2d2e1e076a0c8a23

View File

@@ -24,6 +24,7 @@ pub trait Context {
fn u8_and(&mut self, arg0: u8, arg1: u8) -> u8; fn u8_and(&mut self, arg0: u8, arg1: u8) -> u8;
fn value_reg(&mut self, arg0: Reg) -> ValueRegs; fn value_reg(&mut self, arg0: Reg) -> ValueRegs;
fn value_regs(&mut self, arg0: Reg, arg1: Reg) -> ValueRegs; fn value_regs(&mut self, arg0: Reg, arg1: Reg) -> ValueRegs;
fn value_regs_invalid(&mut self) -> ValueRegs;
fn temp_writable_reg(&mut self, arg0: Type) -> WritableReg; fn temp_writable_reg(&mut self, arg0: Type) -> WritableReg;
fn invalid_reg(&mut self) -> Reg; fn invalid_reg(&mut self) -> Reg;
fn put_in_reg(&mut self, arg0: Value) -> Reg; fn put_in_reg(&mut self, arg0: Value) -> Reg;
@@ -92,13 +93,19 @@ pub trait Context {
fn rotr_opposite_amount(&mut self, arg0: Type, arg1: ImmShift) -> ImmShift; fn rotr_opposite_amount(&mut self, arg0: Type, arg1: ImmShift) -> ImmShift;
} }
/// Internal type ProducesFlags: defined at src/prelude.isle line 277. /// Internal type SideEffectNoResult: defined at src/prelude.isle line 279.
#[derive(Clone, Debug)]
pub enum SideEffectNoResult {
Inst { inst: MInst },
}
/// Internal type ProducesFlags: defined at src/prelude.isle line 292.
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub enum ProducesFlags { pub enum ProducesFlags {
ProducesFlags { inst: MInst, result: Reg }, ProducesFlags { inst: MInst, result: Reg },
} }
/// Internal type ConsumesFlags: defined at src/prelude.isle line 280. /// Internal type ConsumesFlags: defined at src/prelude.isle line 295.
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub enum ConsumesFlags { pub enum ConsumesFlags {
ConsumesFlags { inst: MInst, result: Reg }, ConsumesFlags { inst: MInst, result: Reg },
@@ -978,7 +985,7 @@ pub enum AtomicRMWOp {
// Generated as internal constructor for term temp_reg. // Generated as internal constructor for term temp_reg.
pub fn constructor_temp_reg<C: Context>(ctx: &mut C, arg0: Type) -> Option<Reg> { pub fn constructor_temp_reg<C: Context>(ctx: &mut C, arg0: Type) -> Option<Reg> {
let pattern0_0 = arg0; let pattern0_0 = arg0;
// Rule at src/prelude.isle line 66. // Rule at src/prelude.isle line 70.
let expr0_0 = C::temp_writable_reg(ctx, pattern0_0); let expr0_0 = C::temp_writable_reg(ctx, pattern0_0);
let expr1_0 = C::writable_reg_to_reg(ctx, expr0_0); let expr1_0 = C::writable_reg_to_reg(ctx, expr0_0);
return Some(expr1_0); return Some(expr1_0);
@@ -987,13 +994,31 @@ pub fn constructor_temp_reg<C: Context>(ctx: &mut C, arg0: Type) -> Option<Reg>
// Generated as internal constructor for term lo_reg. // Generated as internal constructor for term lo_reg.
pub fn constructor_lo_reg<C: Context>(ctx: &mut C, arg0: Value) -> Option<Reg> { pub fn constructor_lo_reg<C: Context>(ctx: &mut C, arg0: Value) -> Option<Reg> {
let pattern0_0 = arg0; let pattern0_0 = arg0;
// Rule at src/prelude.isle line 101. // Rule at src/prelude.isle line 105.
let expr0_0 = C::put_in_regs(ctx, pattern0_0); let expr0_0 = C::put_in_regs(ctx, pattern0_0);
let expr1_0: usize = 0; let expr1_0: usize = 0;
let expr2_0 = C::value_regs_get(ctx, expr0_0, expr1_0); let expr2_0 = C::value_regs_get(ctx, expr0_0, expr1_0);
return Some(expr2_0); return Some(expr2_0);
} }
// Generated as internal constructor for term value_regs_none.
pub fn constructor_value_regs_none<C: Context>(
ctx: &mut C,
arg0: &SideEffectNoResult,
) -> Option<ValueRegs> {
let pattern0_0 = arg0;
if let &SideEffectNoResult::Inst {
inst: ref pattern1_0,
} = pattern0_0
{
// Rule at src/prelude.isle line 284.
let expr0_0 = C::emit(ctx, &pattern1_0);
let expr1_0 = C::value_regs_invalid(ctx);
return Some(expr1_0);
}
return None;
}
// Generated as internal constructor for term with_flags. // Generated as internal constructor for term with_flags.
pub fn constructor_with_flags<C: Context>( pub fn constructor_with_flags<C: Context>(
ctx: &mut C, ctx: &mut C,
@@ -1012,7 +1037,7 @@ pub fn constructor_with_flags<C: Context>(
result: pattern3_1, result: pattern3_1,
} = pattern2_0 } = pattern2_0
{ {
// Rule at src/prelude.isle line 290. // Rule at src/prelude.isle line 305.
let expr0_0 = C::emit(ctx, &pattern1_0); let expr0_0 = C::emit(ctx, &pattern1_0);
let expr1_0 = C::emit(ctx, &pattern3_0); let expr1_0 = C::emit(ctx, &pattern3_0);
let expr2_0 = C::value_regs(ctx, pattern1_1, pattern3_1); let expr2_0 = C::value_regs(ctx, pattern1_1, pattern3_1);
@@ -1040,7 +1065,7 @@ pub fn constructor_with_flags_1<C: Context>(
result: pattern3_1, result: pattern3_1,
} = pattern2_0 } = pattern2_0
{ {
// Rule at src/prelude.isle line 298. // Rule at src/prelude.isle line 313.
let expr0_0 = C::emit(ctx, &pattern1_0); let expr0_0 = C::emit(ctx, &pattern1_0);
let expr1_0 = C::emit(ctx, &pattern3_0); let expr1_0 = C::emit(ctx, &pattern3_0);
return Some(pattern3_1); return Some(pattern3_1);
@@ -1074,7 +1099,7 @@ pub fn constructor_with_flags_2<C: Context>(
result: pattern5_1, result: pattern5_1,
} = pattern4_0 } = pattern4_0
{ {
// Rule at src/prelude.isle line 308. // Rule at src/prelude.isle line 323.
let expr0_0 = C::emit(ctx, &pattern1_0); let expr0_0 = C::emit(ctx, &pattern1_0);
let expr1_0 = C::emit(ctx, &pattern5_0); let expr1_0 = C::emit(ctx, &pattern5_0);
let expr2_0 = C::emit(ctx, &pattern3_0); let expr2_0 = C::emit(ctx, &pattern3_0);

View File

@@ -4,6 +4,7 @@
(type MInst extern (type MInst extern
(enum (Nop (len u8)) (enum (Nop (len u8))
(Ud2 (trap_code TrapCode))
(AluRmiR (size OperandSize) (AluRmiR (size OperandSize)
(op AluRmiROpcode) (op AluRmiROpcode)
(src1 Reg) (src1 Reg)
@@ -72,9 +73,11 @@
(Not (size OperandSize) (Not (size OperandSize)
(src Reg) (src Reg)
(dst WritableReg)) (dst WritableReg))
(LoadEffectiveAddress (addr SyntheticAmode) (Neg (size OperandSize)
(src Reg)
(dst WritableReg)) (dst WritableReg))
)) (LoadEffectiveAddress (addr SyntheticAmode)
(dst WritableReg))))
(type OperandSize extern (type OperandSize extern
(enum Size8 (enum Size8
@@ -697,8 +700,8 @@
;; ;;
;; Use `m_` prefix (short for "mach inst") to disambiguate with the ISLE-builtin ;; Use `m_` prefix (short for "mach inst") to disambiguate with the ISLE-builtin
;; `and` operator. ;; `and` operator.
(decl m_and (Type Reg RegMemImm) Reg) (decl x64_and (Type Reg RegMemImm) Reg)
(rule (m_and ty src1 src2) (rule (x64_and ty src1 src2)
(alu_rmi_r ty (alu_rmi_r ty
(AluRmiROpcode.And) (AluRmiROpcode.And)
src1 src1
@@ -724,7 +727,7 @@
(decl imm (Type u64) Reg) (decl imm (Type u64) Reg)
;; Integer immediates. ;; Integer immediates.
(rule (imm ty simm64) (rule (imm (fits_in_64 ty) simm64)
(let ((dst WritableReg (temp_writable_reg ty)) (let ((dst WritableReg (temp_writable_reg ty))
(size OperandSize (operand_size_of_type_32_64 ty)) (size OperandSize (operand_size_of_type_32_64 ty))
(_ Unit (emit (MInst.Imm size simm64 dst)))) (_ Unit (emit (MInst.Imm size simm64 dst))))
@@ -749,7 +752,7 @@
(writable_reg_to_reg dst))) (writable_reg_to_reg dst)))
;; Special case for integer zero immediates: turn them into an `xor r, r`. ;; Special case for integer zero immediates: turn them into an `xor r, r`.
(rule (imm ty 0) (rule (imm (fits_in_64 ty) 0)
(let ((wr WritableReg (temp_writable_reg ty)) (let ((wr WritableReg (temp_writable_reg ty))
(r Reg (writable_reg_to_reg wr)) (r Reg (writable_reg_to_reg wr))
(size OperandSize (operand_size_of_type_32_64 ty)) (size OperandSize (operand_size_of_type_32_64 ty))
@@ -807,10 +810,16 @@
;; Helper for creating `rotl` instructions (prefixed with "m_", short for "mach ;; Helper for creating `rotl` instructions (prefixed with "m_", short for "mach
;; inst", to disambiguate this from clif's `rotl`). ;; inst", to disambiguate this from clif's `rotl`).
(decl m_rotl (Type Reg Imm8Reg) Reg) (decl x64_rotl (Type Reg Imm8Reg) Reg)
(rule (m_rotl ty src1 src2) (rule (x64_rotl ty src1 src2)
(shift_r ty (ShiftKind.RotateLeft) src1 src2)) (shift_r ty (ShiftKind.RotateLeft) src1 src2))
;; Helper for creating `rotr` instructions (prefixed with "m_", short for "mach
;; inst", to disambiguate this from clif's `rotr`).
(decl x64_rotr (Type Reg Imm8Reg) Reg)
(rule (x64_rotr ty src1 src2)
(shift_r ty (ShiftKind.RotateRight) src1 src2))
;; Helper for creating `shl` instructions. ;; Helper for creating `shl` instructions.
(decl shl (Type Reg Imm8Reg) Reg) (decl shl (Type Reg Imm8Reg) Reg)
(rule (shl ty src1 src2) (rule (shl ty src1 src2)
@@ -1423,8 +1432,21 @@
(_ Unit (emit (MInst.Not size src dst)))) (_ Unit (emit (MInst.Not size src dst))))
(writable_reg_to_reg dst))) (writable_reg_to_reg dst)))
;; Helper for creating `neg` instructions.
(decl neg (Type Reg) Reg)
(rule (neg ty src)
(let ((dst WritableReg (temp_writable_reg ty))
(size OperandSize (operand_size_of_type_32_64 ty))
(_ Unit (emit (MInst.Neg size src dst))))
(writable_reg_to_reg dst)))
(decl lea (SyntheticAmode) Reg) (decl lea (SyntheticAmode) Reg)
(rule (lea addr) (rule (lea addr)
(let ((dst WritableReg (temp_writable_reg $I64)) (let ((dst WritableReg (temp_writable_reg $I64))
(_ Unit (emit (MInst.LoadEffectiveAddress addr dst)))) (_ Unit (emit (MInst.LoadEffectiveAddress addr dst))))
(writable_reg_to_reg dst))) (writable_reg_to_reg dst)))
;; Helper for creating `ud2` instructions.
(decl ud2 (TrapCode) SideEffectNoResult)
(rule (ud2 code)
(SideEffectNoResult.Inst (MInst.Ud2 code)))

View File

@@ -16,6 +16,17 @@ use super::*;
use crate::isa::x64; use crate::isa::x64;
use alloc::vec::Vec; use alloc::vec::Vec;
impl Inst {
fn neg(size: OperandSize, src: Writable<Reg>) -> Inst {
debug_assert_eq!(src.to_reg().get_class(), RegClass::I64);
Inst::Neg {
size,
src: src.to_reg(),
dst: src,
}
}
}
#[test] #[test]
fn test_x64_emit() { fn test_x64_emit() {
let rax = regs::rax(); let rax = regs::rax();

View File

@@ -668,15 +668,6 @@ impl Inst {
} }
} }
pub(crate) fn neg(size: OperandSize, src: Writable<Reg>) -> Inst {
debug_assert_eq!(src.to_reg().get_class(), RegClass::I64);
Inst::Neg {
size,
src: src.to_reg(),
dst: src,
}
}
pub(crate) fn div(size: OperandSize, signed: bool, divisor: RegMem) -> Inst { pub(crate) fn div(size: OperandSize, signed: bool, divisor: RegMem) -> Inst {
divisor.assert_regclass_is(RegClass::I64); divisor.assert_regclass_is(RegClass::I64);
Inst::Div { Inst::Div {

View File

@@ -326,7 +326,7 @@
;; And two registers. ;; And two registers.
(rule (lower (has_type (fits_in_64 ty) (band x y))) (rule (lower (has_type (fits_in_64 ty) (band x y)))
(value_reg (m_and ty (value_reg (x64_and ty
(put_in_reg x) (put_in_reg x)
(RegMemImm.Reg (put_in_reg y))))) (RegMemImm.Reg (put_in_reg y)))))
@@ -334,13 +334,13 @@
(rule (lower (has_type (fits_in_64 ty) (rule (lower (has_type (fits_in_64 ty)
(band x (sinkable_load y)))) (band x (sinkable_load y))))
(value_reg (m_and ty (value_reg (x64_and ty
(put_in_reg x) (put_in_reg x)
(sink_load y)))) (sink_load y))))
(rule (lower (has_type (fits_in_64 ty) (rule (lower (has_type (fits_in_64 ty)
(band (sinkable_load x) y))) (band (sinkable_load x) y)))
(value_reg (m_and ty (value_reg (x64_and ty
(put_in_reg y) (put_in_reg y)
(sink_load x)))) (sink_load x))))
@@ -348,13 +348,13 @@
(rule (lower (has_type (fits_in_64 ty) (rule (lower (has_type (fits_in_64 ty)
(band x (simm32_from_value y)))) (band x (simm32_from_value y))))
(value_reg (m_and ty (value_reg (x64_and ty
(put_in_reg x) (put_in_reg x)
y))) y)))
(rule (lower (has_type (fits_in_64 ty) (rule (lower (has_type (fits_in_64 ty)
(band (simm32_from_value x) y))) (band (simm32_from_value x) y)))
(value_reg (m_and ty (value_reg (x64_and ty
(put_in_reg y) (put_in_reg y)
x))) x)))
@@ -378,8 +378,8 @@
(y_regs ValueRegs (put_in_regs y)) (y_regs ValueRegs (put_in_regs y))
(y_lo Reg (value_regs_get y_regs 0)) (y_lo Reg (value_regs_get y_regs 0))
(y_hi Reg (value_regs_get y_regs 1))) (y_hi Reg (value_regs_get y_regs 1)))
(value_regs (m_and $I64 x_lo (RegMemImm.Reg y_lo)) (value_regs (x64_and $I64 x_lo (RegMemImm.Reg y_lo))
(m_and $I64 x_hi (RegMemImm.Reg y_hi))))) (x64_and $I64 x_hi (RegMemImm.Reg y_hi)))))
(rule (lower (has_type $B128 (band x y))) (rule (lower (has_type $B128 (band x y)))
;; Booleans are always `0` or `1`, so we only need to do the `and` on the ;; Booleans are always `0` or `1`, so we only need to do the `and` on the
@@ -389,7 +389,7 @@
(x_lo Reg (value_regs_get x_regs 0)) (x_lo Reg (value_regs_get x_regs 0))
(x_hi Reg (value_regs_get x_regs 1)) (x_hi Reg (value_regs_get x_regs 1))
(y_lo Reg (lo_reg y))) (y_lo Reg (lo_reg y)))
(value_regs (m_and $I64 x_lo (RegMemImm.Reg y_lo)) (value_regs (x64_and $I64 x_lo (RegMemImm.Reg y_lo))
x_hi))) x_hi)))
;;;; Rules for `bor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Rules for `bor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -832,11 +832,11 @@
(rule (lower (has_type (ty_8_or_16 ty) (rotl src amt))) (rule (lower (has_type (ty_8_or_16 ty) (rotl src amt)))
(let ((amt_ Reg (extend_to_reg amt $I32 (ExtendKind.Zero)))) (let ((amt_ Reg (extend_to_reg amt $I32 (ExtendKind.Zero))))
(value_reg (m_rotl ty (put_in_reg src) (Imm8Reg.Reg amt_))))) (value_reg (x64_rotl ty (put_in_reg src) (Imm8Reg.Reg amt_)))))
(rule (lower (has_type (ty_8_or_16 ty) (rule (lower (has_type (ty_8_or_16 ty)
(rotl src (u64_from_iconst amt)))) (rotl src (u64_from_iconst amt))))
(value_reg (m_rotl ty (value_reg (x64_rotl ty
(put_in_reg src) (put_in_reg src)
(const_to_type_masked_imm8 amt ty)))) (const_to_type_masked_imm8 amt ty))))
@@ -847,11 +847,11 @@
;; NB: Only the low bits of `amt` matter since we logically mask the ;; NB: Only the low bits of `amt` matter since we logically mask the
;; shift amount to the value's bit width. ;; shift amount to the value's bit width.
(let ((amt_ Reg (lo_reg amt))) (let ((amt_ Reg (lo_reg amt)))
(value_reg (m_rotl ty (put_in_reg src) (Imm8Reg.Reg amt_))))) (value_reg (x64_rotl ty (put_in_reg src) (Imm8Reg.Reg amt_)))))
(rule (lower (has_type (ty_32_or_64 ty) (rule (lower (has_type (ty_32_or_64 ty)
(rotl src (u64_from_iconst amt)))) (rotl src (u64_from_iconst amt))))
(value_reg (m_rotl ty (value_reg (x64_rotl ty
(put_in_reg src) (put_in_reg src)
(const_to_type_masked_imm8 amt ty)))) (const_to_type_masked_imm8 amt ty))))
@@ -865,6 +865,71 @@
(or_i128 (shl_i128 src_ amt_) (or_i128 (shl_i128 src_ amt_)
(shr_i128 src_ (sub $I64 (imm $I64 128) (RegMemImm.Reg amt_)))))) (shr_i128 src_ (sub $I64 (imm $I64 128) (RegMemImm.Reg amt_))))))
;;;; Rules for `rotr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; `i16` and `i8`: we need to extend the shift amount, or mask the
;; constant.
(rule (lower (has_type (ty_8_or_16 ty) (rotr src amt)))
(let ((amt_ Reg (extend_to_reg amt $I32 (ExtendKind.Zero))))
(value_reg (x64_rotr ty (put_in_reg src) (Imm8Reg.Reg amt_)))))
(rule (lower (has_type (ty_8_or_16 ty)
(rotr src (u64_from_iconst amt))))
(value_reg (x64_rotr ty
(put_in_reg src)
(const_to_type_masked_imm8 amt ty))))
;; `i64` and `i32`: we can rely on x86's rotate-amount masking since
;; we operate on the whole register.
(rule (lower (has_type (ty_32_or_64 ty) (rotr src amt)))
;; NB: Only the low bits of `amt` matter since we logically mask the
;; shift amount to the value's bit width.
(let ((amt_ Reg (lo_reg amt)))
(value_reg (x64_rotr ty (put_in_reg src) (Imm8Reg.Reg amt_)))))
(rule (lower (has_type (ty_32_or_64 ty)
(rotr src (u64_from_iconst amt))))
(value_reg (x64_rotr ty
(put_in_reg src)
(const_to_type_masked_imm8 amt ty))))
;; `i128`.
(rule (lower (has_type $I128 (rotr src amt)))
(let ((src_ ValueRegs (put_in_regs src))
;; NB: Only the low bits of `amt` matter since we logically mask the
;; rotation amount to the value's bit width.
(amt_ Reg (lo_reg amt)))
(or_i128 (shr_i128 src_ amt_)
(shl_i128 src_ (sub $I64 (imm $I64 128) (RegMemImm.Reg amt_))))))
;;;; Rules for `ineg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; `i64` and smaller.
(rule (lower (has_type (fits_in_64 ty) (ineg x)))
(value_reg (neg ty (put_in_reg x))))
;; SSE.
(rule (lower (has_type $I8X16 (ineg x)))
(value_reg (psubb (imm $I8X16 0)
(put_in_reg_mem x))))
(rule (lower (has_type $I16X8 (ineg x)))
(value_reg (psubw (imm $I16X8 0)
(put_in_reg_mem x))))
(rule (lower (has_type $I32X4 (ineg x)))
(value_reg (psubd (imm $I32X4 0)
(put_in_reg_mem x))))
(rule (lower (has_type $I64X2 (ineg x)))
(value_reg (psubq (imm $I64X2 0)
(put_in_reg_mem x))))
;;;; Rules for `avg_round` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Rules for `avg_round` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type (multi_lane 8 16) (rule (lower (has_type (multi_lane 8 16)
@@ -1367,3 +1432,13 @@
(rule (lower (has_type $I32X4 (umin x y))) (rule (lower (has_type $I32X4 (umin x y)))
(value_reg (pminud (put_in_reg x) (put_in_reg_mem y)))) (value_reg (pminud (put_in_reg x) (put_in_reg_mem y))))
;;;; Rules for `trap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (trap code))
(value_regs_none (ud2 code)))
;;;; Rules for `resumable_trap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (resumable_trap code))
(value_regs_none (ud2 code)))

View File

@@ -877,316 +877,6 @@ fn emit_bitrev<C: LowerCtx<I = Inst>>(ctx: &mut C, src: Reg, dst: Writable<Reg>,
ctx.emit(Inst::gen_move(dst, tmp0.to_reg(), types::I64)); ctx.emit(Inst::gen_move(dst, tmp0.to_reg(), types::I64));
} }
fn emit_shl_i128<C: LowerCtx<I = Inst>>(
ctx: &mut C,
src: ValueRegs<Reg>,
dst: ValueRegs<Writable<Reg>>,
amt_src: Reg,
) {
let src_lo = src.regs()[0];
let src_hi = src.regs()[1];
let dst_lo = dst.regs()[0];
let dst_hi = dst.regs()[1];
// mov tmp1, src_lo
// shl tmp1, amt_src
// mov tmp2, src_hi
// shl tmp2, amt_src
// mov amt, 64
// sub amt, amt_src
// mov tmp3, src_lo
// shr tmp3, amt
// xor dst_lo, dst_lo
// test amt_src, 127
// cmovz tmp3, dst_lo
// or tmp3, tmp2
// mov amt, amt_src
// and amt, 64
// cmovz dst_hi, tmp3
// cmovz dst_lo, tmp1
// cmovnz dst_hi, tmp1
let tmp1 = ctx.alloc_tmp(types::I64).only_reg().unwrap();
let tmp2 = ctx.alloc_tmp(types::I64).only_reg().unwrap();
let tmp3 = ctx.alloc_tmp(types::I64).only_reg().unwrap();
let amt = ctx.alloc_tmp(types::I64).only_reg().unwrap();
ctx.emit(Inst::gen_move(tmp1, src_lo, types::I64));
ctx.emit(Inst::gen_move(
Writable::from_reg(regs::rcx()),
amt_src,
types::I64,
));
ctx.emit(Inst::shift_r(
OperandSize::Size64,
ShiftKind::ShiftLeft,
None,
tmp1,
));
ctx.emit(Inst::gen_move(tmp2, src_hi, types::I64));
ctx.emit(Inst::gen_move(
Writable::from_reg(regs::rcx()),
amt_src,
types::I64,
));
ctx.emit(Inst::shift_r(
OperandSize::Size64,
ShiftKind::ShiftLeft,
None,
tmp2,
));
ctx.emit(Inst::imm(OperandSize::Size64, 64, amt));
ctx.emit(Inst::alu_rmi_r(
OperandSize::Size64,
AluRmiROpcode::Sub,
RegMemImm::reg(amt_src),
amt,
));
ctx.emit(Inst::gen_move(tmp3, src_lo, types::I64));
ctx.emit(Inst::gen_move(
Writable::from_reg(regs::rcx()),
amt.to_reg(),
types::I64,
));
ctx.emit(Inst::shift_r(
OperandSize::Size64,
ShiftKind::ShiftRightLogical,
None,
tmp3,
));
ctx.emit(Inst::alu_rmi_r(
OperandSize::Size64,
AluRmiROpcode::Xor,
RegMemImm::reg(dst_lo.to_reg()),
dst_lo,
));
ctx.emit(Inst::test_rmi_r(
OperandSize::Size64,
RegMemImm::imm(127),
amt_src,
));
ctx.emit(Inst::cmove(
OperandSize::Size64,
CC::Z,
RegMem::reg(dst_lo.to_reg()),
tmp3,
));
ctx.emit(Inst::alu_rmi_r(
OperandSize::Size64,
AluRmiROpcode::Or,
RegMemImm::reg(tmp2.to_reg()),
tmp3,
));
// This isn't semantically necessary, but it keeps the
// register allocator happy, because it cannot otherwise
// infer that cmovz + cmovnz always defines dst_hi.
ctx.emit(Inst::alu_rmi_r(
OperandSize::Size64,
AluRmiROpcode::Xor,
RegMemImm::reg(dst_hi.to_reg()),
dst_hi,
));
ctx.emit(Inst::gen_move(amt, amt_src, types::I64));
ctx.emit(Inst::alu_rmi_r(
OperandSize::Size64,
AluRmiROpcode::And,
RegMemImm::imm(64),
amt,
));
ctx.emit(Inst::cmove(
OperandSize::Size64,
CC::Z,
RegMem::reg(tmp3.to_reg()),
dst_hi,
));
ctx.emit(Inst::cmove(
OperandSize::Size64,
CC::Z,
RegMem::reg(tmp1.to_reg()),
dst_lo,
));
ctx.emit(Inst::cmove(
OperandSize::Size64,
CC::NZ,
RegMem::reg(tmp1.to_reg()),
dst_hi,
));
}
fn emit_shr_i128<C: LowerCtx<I = Inst>>(
ctx: &mut C,
src: ValueRegs<Reg>,
dst: ValueRegs<Writable<Reg>>,
amt_src: Reg,
is_signed: bool,
) {
let src_lo = src.regs()[0];
let src_hi = src.regs()[1];
let dst_lo = dst.regs()[0];
let dst_hi = dst.regs()[1];
// mov tmp1, src_hi
// {u,s}shr tmp1, amt_src
// mov tmp2, src_lo
// ushr tmp2, amt_src
// mov amt, 64
// sub amt, amt_src
// mov tmp3, src_hi
// shl tmp3, amt
// xor dst_lo, dst_lo
// test amt_src, 127
// cmovz tmp3, dst_lo
// or tmp3, tmp2
// if is_signed:
// mov dst_hi, src_hi
// sshr dst_hi, 63 // get the sign bit
// else:
// xor dst_hi, dst_hi
// mov amt, amt_src
// and amt, 64
// cmovz dst_hi, tmp1
// cmovz dst_lo, tmp3
// cmovnz dst_lo, tmp1
let tmp1 = ctx.alloc_tmp(types::I64).only_reg().unwrap();
let tmp2 = ctx.alloc_tmp(types::I64).only_reg().unwrap();
let tmp3 = ctx.alloc_tmp(types::I64).only_reg().unwrap();
let amt = ctx.alloc_tmp(types::I64).only_reg().unwrap();
let shift_kind = if is_signed {
ShiftKind::ShiftRightArithmetic
} else {
ShiftKind::ShiftRightLogical
};
ctx.emit(Inst::gen_move(tmp1, src_hi, types::I64));
ctx.emit(Inst::gen_move(
Writable::from_reg(regs::rcx()),
amt_src,
types::I64,
));
ctx.emit(Inst::shift_r(OperandSize::Size64, shift_kind, None, tmp1));
ctx.emit(Inst::gen_move(tmp2, src_lo, types::I64));
ctx.emit(Inst::gen_move(
Writable::from_reg(regs::rcx()),
amt_src,
types::I64,
));
// N.B.: right-shift of *lower* half is *always* unsigned (its MSB is not a sign bit).
ctx.emit(Inst::shift_r(
OperandSize::Size64,
ShiftKind::ShiftRightLogical,
None,
tmp2,
));
ctx.emit(Inst::imm(OperandSize::Size64, 64, amt));
ctx.emit(Inst::alu_rmi_r(
OperandSize::Size64,
AluRmiROpcode::Sub,
RegMemImm::reg(amt_src),
amt,
));
ctx.emit(Inst::gen_move(tmp3, src_hi, types::I64));
ctx.emit(Inst::gen_move(
Writable::from_reg(regs::rcx()),
amt.to_reg(),
types::I64,
));
ctx.emit(Inst::shift_r(
OperandSize::Size64,
ShiftKind::ShiftLeft,
None,
tmp3,
));
ctx.emit(Inst::alu_rmi_r(
OperandSize::Size64,
AluRmiROpcode::Xor,
RegMemImm::reg(dst_lo.to_reg()),
dst_lo,
));
ctx.emit(Inst::test_rmi_r(
OperandSize::Size64,
RegMemImm::imm(127),
amt_src,
));
ctx.emit(Inst::cmove(
OperandSize::Size64,
CC::Z,
RegMem::reg(dst_lo.to_reg()),
tmp3,
));
ctx.emit(Inst::alu_rmi_r(
OperandSize::Size64,
AluRmiROpcode::Or,
RegMemImm::reg(tmp2.to_reg()),
tmp3,
));
if is_signed {
ctx.emit(Inst::gen_move(dst_hi, src_hi, types::I64));
ctx.emit(Inst::shift_r(
OperandSize::Size64,
ShiftKind::ShiftRightArithmetic,
Some(63),
dst_hi,
));
} else {
ctx.emit(Inst::alu_rmi_r(
OperandSize::Size64,
AluRmiROpcode::Xor,
RegMemImm::reg(dst_hi.to_reg()),
dst_hi,
));
}
// This isn't semantically necessary, but it keeps the
// register allocator happy, because it cannot otherwise
// infer that cmovz + cmovnz always defines dst_lo.
ctx.emit(Inst::alu_rmi_r(
OperandSize::Size64,
AluRmiROpcode::Xor,
RegMemImm::reg(dst_lo.to_reg()),
dst_lo,
));
ctx.emit(Inst::gen_move(amt, amt_src, types::I64));
ctx.emit(Inst::alu_rmi_r(
OperandSize::Size64,
AluRmiROpcode::And,
RegMemImm::imm(64),
amt,
));
ctx.emit(Inst::cmove(
OperandSize::Size64,
CC::Z,
RegMem::reg(tmp1.to_reg()),
dst_hi,
));
ctx.emit(Inst::cmove(
OperandSize::Size64,
CC::Z,
RegMem::reg(tmp3.to_reg()),
dst_lo,
));
ctx.emit(Inst::cmove(
OperandSize::Size64,
CC::NZ,
RegMem::reg(tmp1.to_reg()),
dst_lo,
));
}
fn make_libcall_sig<C: LowerCtx<I = Inst>>( fn make_libcall_sig<C: LowerCtx<I = Inst>>(
ctx: &mut C, ctx: &mut C,
insn: IRInst, insn: IRInst,
@@ -1541,142 +1231,12 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::Vselect | Opcode::Vselect
| Opcode::Ushr | Opcode::Ushr
| Opcode::Sshr | Opcode::Sshr
| Opcode::Ishl => implemented_in_isle(ctx), | Opcode::Ishl
| Opcode::Rotl
Opcode::Rotl | Opcode::Rotr => { | Opcode::Rotr
let dst_ty = ctx.output_ty(insn, 0); | Opcode::Ineg
debug_assert_eq!(ctx.input_ty(insn, 0), dst_ty); | Opcode::Trap
| Opcode::ResumableTrap => implemented_in_isle(ctx),
if !dst_ty.is_vector() && dst_ty.bits() <= 64 {
if op != Opcode::Rotr {
implemented_in_isle(ctx);
}
// Scalar shifts on x86 have various encodings:
// - shift by one bit, e.g. `SAL r/m8, 1` (not used here)
// - shift by an immediate amount, e.g. `SAL r/m8, imm8`
// - shift by a dynamic amount but only from the CL register, e.g. `SAL r/m8, CL`.
// This implementation uses the last two encoding methods.
let (size, lhs) = match dst_ty {
types::I8 | types::I16 => match op {
Opcode::Rotr => (
OperandSize::from_ty(dst_ty),
put_input_in_reg(ctx, inputs[0]),
),
_ => unreachable!(),
},
types::I32 | types::I64 => (
OperandSize::from_ty(dst_ty),
put_input_in_reg(ctx, inputs[0]),
),
_ => unreachable!("unhandled output type for shift/rotates: {}", dst_ty),
};
let (count, rhs) =
if let Some(cst) = ctx.get_input_as_source_or_const(insn, 1).constant {
// Mask count, according to Cranelift's semantics.
let cst = (cst as u8) & (dst_ty.bits() as u8 - 1);
(Some(cst), None)
} else {
// We can ignore upper registers if shift amount is multi-reg, because we
// are taking the shift amount mod 2^(lhs_width) anyway.
(None, Some(put_input_in_regs(ctx, inputs[1]).regs()[0]))
};
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let shift_kind = match op {
Opcode::Rotr => ShiftKind::RotateRight,
_ => unreachable!(),
};
let w_rcx = Writable::from_reg(regs::rcx());
ctx.emit(Inst::mov_r_r(OperandSize::Size64, lhs, dst));
if count.is_none() {
ctx.emit(Inst::mov_r_r(OperandSize::Size64, rhs.unwrap(), w_rcx));
}
ctx.emit(Inst::shift_r(size, shift_kind, count, dst));
} else if dst_ty == types::I128 {
let amt_src = put_input_in_regs(ctx, inputs[1]).regs()[0];
let src = put_input_in_regs(ctx, inputs[0]);
let dst = get_output_reg(ctx, outputs[0]);
match op {
Opcode::Rotr => {
// (mov tmp, src)
// (ushr.i128 tmp, amt)
// (mov dst, src)
// (shl.i128 dst, 128-amt)
// (or dst, tmp)
let tmp = ctx.alloc_tmp(types::I128);
emit_shr_i128(ctx, src, tmp, amt_src, /* is_signed = */ false);
let inv_amt = ctx.alloc_tmp(types::I64).only_reg().unwrap();
ctx.emit(Inst::imm(OperandSize::Size64, 128, inv_amt));
ctx.emit(Inst::alu_rmi_r(
OperandSize::Size64,
AluRmiROpcode::Sub,
RegMemImm::reg(amt_src),
inv_amt,
));
emit_shl_i128(ctx, src, dst, inv_amt.to_reg());
ctx.emit(Inst::alu_rmi_r(
OperandSize::Size64,
AluRmiROpcode::Or,
RegMemImm::reg(tmp.regs()[0].to_reg()),
dst.regs()[0],
));
ctx.emit(Inst::alu_rmi_r(
OperandSize::Size64,
AluRmiROpcode::Or,
RegMemImm::reg(tmp.regs()[1].to_reg()),
dst.regs()[1],
));
}
_ => unreachable!(),
}
} else {
implemented_in_isle(ctx);
}
}
Opcode::Ineg => {
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let ty = ty.unwrap();
if ty.is_vector() {
// Zero's out a register and then does a packed subtraction
// of the input from the register.
let src = input_to_reg_mem(ctx, inputs[0]);
let tmp = ctx.alloc_tmp(types::I32X4).only_reg().unwrap();
let subtract_opcode = match ty {
types::I8X16 => SseOpcode::Psubb,
types::I16X8 => SseOpcode::Psubw,
types::I32X4 => SseOpcode::Psubd,
types::I64X2 => SseOpcode::Psubq,
_ => panic!("Unsupported type for Ineg instruction, found {}", ty),
};
// Note we must zero out a tmp instead of using the destination register since
// the desitnation could be an alias for the source input register
ctx.emit(Inst::xmm_rm_r(
SseOpcode::Pxor,
RegMem::reg(tmp.to_reg()),
tmp,
));
ctx.emit(Inst::xmm_rm_r(subtract_opcode, src, tmp));
ctx.emit(Inst::xmm_unary_rm_r(
SseOpcode::Movapd,
RegMem::reg(tmp.to_reg()),
dst,
));
} else {
let src = put_input_in_reg(ctx, inputs[0]);
ctx.emit(Inst::gen_move(dst, src, ty));
ctx.emit(Inst::neg(OperandSize::from_ty(ty), dst));
}
}
Opcode::Clz => { Opcode::Clz => {
let orig_ty = ty.unwrap(); let orig_ty = ty.unwrap();
@@ -2811,11 +2371,6 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
ctx.emit(Inst::Hlt); ctx.emit(Inst::Hlt);
} }
Opcode::Trap | Opcode::ResumableTrap => {
let trap_code = ctx.data(insn).trap_code().unwrap();
ctx.emit_safepoint(Inst::Ud2 { trap_code });
}
Opcode::Trapif | Opcode::Trapff => { Opcode::Trapif | Opcode::Trapff => {
let trap_code = ctx.data(insn).trap_code().unwrap(); let trap_code = ctx.data(insn).trap_code().unwrap();

View File

@@ -1,4 +1,4 @@
src/clif.isle f176ef3bba99365 src/clif.isle f176ef3bba99365
src/prelude.isle 7b911d3b894ae17 src/prelude.isle 22dd5ff133398960
src/isa/x64/inst.isle 41304d8ef6f7d816 src/isa/x64/inst.isle 61004acbb1289816
src/isa/x64/lower.isle 4689585f55f41438 src/isa/x64/lower.isle 82db7f7d47ac7809

File diff suppressed because it is too large Load Diff

View File

@@ -48,6 +48,11 @@ macro_rules! isle_prelude_methods {
ValueRegs::two(r1, r2) ValueRegs::two(r1, r2)
} }
#[inline]
fn value_regs_invalid(&mut self) -> ValueRegs {
ValueRegs::invalid()
}
#[inline] #[inline]
fn temp_writable_reg(&mut self, ty: Type) -> WritableReg { fn temp_writable_reg(&mut self, ty: Type) -> WritableReg {
let value_regs = self.lower_ctx.alloc_tmp(ty); let value_regs = self.lower_ctx.alloc_tmp(ty);

View File

@@ -57,6 +57,10 @@
(decl value_regs (Reg Reg) ValueRegs) (decl value_regs (Reg Reg) ValueRegs)
(extern constructor value_regs value_regs) (extern constructor value_regs value_regs)
;; Construct an empty `ValueRegs` containing only invalid register sentinels.
(decl value_regs_invalid () ValueRegs)
(extern constructor value_regs_invalid value_regs_invalid)
;; Get a temporary register for writing. ;; Get a temporary register for writing.
(decl temp_writable_reg (Type) WritableReg) (decl temp_writable_reg (Type) WritableReg)
(extern constructor temp_writable_reg temp_writable_reg) (extern constructor temp_writable_reg temp_writable_reg)
@@ -270,6 +274,17 @@
(extractor (u64_from_iconst x) (extractor (u64_from_iconst x)
(def_inst (iconst (u64_from_imm64 x)))) (def_inst (iconst (u64_from_imm64 x))))
;;;; Helpers for Side-Effectful Instructions Without Results ;;;;;;;;;;;;;;;;;;;
(type SideEffectNoResult (enum (Inst (inst MInst))))
;; Create an empty `ValueRegs`, but do emit the given side-effectful
;; instruction.
(decl value_regs_none (SideEffectNoResult) ValueRegs)
(rule (value_regs_none (SideEffectNoResult.Inst inst))
(let ((_ Unit (emit inst)))
(value_regs_invalid)))
;;;; Helpers for Working with Flags ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Helpers for Working with Flags ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Newtype wrapper around `MInst` for instructions that are used for their ;; Newtype wrapper around `MInst` for instructions that are used for their

View File

@@ -1270,56 +1270,52 @@ block0(v0: i128, v1: i128):
; Entry block: 0 ; Entry block: 0
; Block 0: ; Block 0:
; (original IR block: block0) ; (original IR block: block0)
; (instruction range: 0 .. 50) ; (instruction range: 0 .. 46)
; Inst 0: pushq %rbp ; Inst 0: pushq %rbp
; Inst 1: movq %rsp, %rbp ; Inst 1: movq %rsp, %rbp
; Inst 2: movq %rsi, %rax ; Inst 2: movq %rdi, %rax
; Inst 3: movq %rax, %r9 ; Inst 3: movq %rdx, %rcx
; Inst 4: movq %rdx, %rcx ; Inst 4: shrq %cl, %rax
; Inst 5: shrq %cl, %r9 ; Inst 5: movq %rsi, %r8
; Inst 6: movq %rdi, %rsi ; Inst 6: movq %rdx, %rcx
; Inst 7: movq %rdx, %rcx ; Inst 7: shrq %cl, %r8
; Inst 8: shrq %cl, %rsi ; Inst 8: movl $64, %ecx
; Inst 9: movl $64, %ecx ; Inst 9: subq %rdx, %rcx
; Inst 10: subq %rdx, %rcx ; Inst 10: movq %rsi, %r9
; Inst 11: movq %rax, %r10 ; Inst 11: shlq %cl, %r9
; Inst 12: shlq %cl, %r10 ; Inst 12: xorq %rcx, %rcx
; Inst 13: xorq %rcx, %rcx ; Inst 13: testq $127, %rdx
; Inst 14: testq $127, %rdx ; Inst 14: cmovzq %rcx, %r9
; Inst 15: cmovzq %rcx, %r10 ; Inst 15: movq %r9, %rcx
; Inst 16: orq %rsi, %r10 ; Inst 16: orq %rax, %rcx
; Inst 17: xorq %rsi, %rsi ; Inst 17: xorq %rax, %rax
; Inst 18: xorq %r8, %r8 ; Inst 18: testq $64, %rdx
; Inst 19: movq %rdx, %rcx ; Inst 19: cmovzq %r8, %rax
; Inst 20: andq $64, %rcx ; Inst 20: cmovzq %rcx, %r8
; Inst 21: cmovzq %r9, %rsi ; Inst 21: movl $128, %r9d
; Inst 22: cmovzq %r10, %r8 ; Inst 22: subq %rdx, %r9
; Inst 23: cmovnzq %r9, %r8 ; Inst 23: movq %rdi, %rdx
; Inst 24: movl $128, %r9d ; Inst 24: movq %r9, %rcx
; Inst 25: subq %rdx, %r9 ; Inst 25: shlq %cl, %rdx
; Inst 26: movq %rdi, %rdx ; Inst 26: movq %r9, %rcx
; Inst 27: movq %r9, %rcx ; Inst 27: shlq %cl, %rsi
; Inst 28: shlq %cl, %rdx ; Inst 28: movl $64, %ecx
; Inst 29: movq %r9, %rcx ; Inst 29: subq %r9, %rcx
; Inst 30: shlq %cl, %rax ; Inst 30: shrq %cl, %rdi
; Inst 31: movl $64, %ecx ; Inst 31: xorq %rcx, %rcx
; Inst 32: subq %r9, %rcx ; Inst 32: testq $127, %r9
; Inst 33: shrq %cl, %rdi ; Inst 33: cmovzq %rcx, %rdi
; Inst 34: xorq %rcx, %rcx ; Inst 34: orq %rsi, %rdi
; Inst 35: testq $127, %r9 ; Inst 35: testq $64, %r9
; Inst 36: cmovzq %rcx, %rdi ; Inst 36: movq %rdx, %rsi
; Inst 37: orq %rax, %rdi ; Inst 37: cmovzq %rdi, %rsi
; Inst 38: xorq %rax, %rax ; Inst 38: cmovzq %rdx, %rcx
; Inst 39: andq $64, %r9 ; Inst 39: orq %rcx, %r8
; Inst 40: cmovzq %rdi, %rax ; Inst 40: orq %rsi, %rax
; Inst 41: cmovzq %rdx, %rcx ; Inst 41: movq %rax, %rdx
; Inst 42: cmovnzq %rdx, %rax ; Inst 42: movq %r8, %rax
; Inst 43: orq %r8, %rcx ; Inst 43: movq %rbp, %rsp
; Inst 44: orq %rsi, %rax ; Inst 44: popq %rbp
; Inst 45: movq %rax, %rdx ; Inst 45: ret
; Inst 46: movq %rcx, %rax
; Inst 47: movq %rbp, %rsp
; Inst 48: popq %rbp
; Inst 49: ret
; }} ; }}