Migrate clz, ctz, popcnt, bitrev, is_null, is_invalid on x64 to ISLE. (#3848)

This commit is contained in:
Chris Fallin
2022-02-28 09:45:13 -08:00
committed by GitHub
parent 2a6969d2bd
commit 24f145cd1e
19 changed files with 2812 additions and 1990 deletions

View File

@@ -1,4 +1,4 @@
src/clif.isle 9ea75a6f790b5c03 src/clif.isle 9ea75a6f790b5c03
src/prelude.isle 9830498351ddf6a3 src/prelude.isle 6b0160bfcac86902
src/isa/aarch64/inst.isle 3678d0a37bdb4cff src/isa/aarch64/inst.isle 3678d0a37bdb4cff
src/isa/aarch64/lower.isle 90accbfcadaea46d src/isa/aarch64/lower.isle 90accbfcadaea46d

View File

@@ -39,8 +39,14 @@ pub trait Context {
fn u8_as_u64(&mut self, arg0: u8) -> u64; fn u8_as_u64(&mut self, arg0: u8) -> u64;
fn u16_as_u64(&mut self, arg0: u16) -> u64; fn u16_as_u64(&mut self, arg0: u16) -> u64;
fn u32_as_u64(&mut self, arg0: u32) -> u64; fn u32_as_u64(&mut self, arg0: u32) -> u64;
fn i64_as_u64(&mut self, arg0: i64) -> u64;
fn u64_add(&mut self, arg0: u64, arg1: u64) -> u64;
fn u64_sub(&mut self, arg0: u64, arg1: u64) -> u64;
fn u64_and(&mut self, arg0: u64, arg1: u64) -> u64;
fn ty_bits(&mut self, arg0: Type) -> u8; fn ty_bits(&mut self, arg0: Type) -> u8;
fn ty_bits_u16(&mut self, arg0: Type) -> u16; fn ty_bits_u16(&mut self, arg0: Type) -> u16;
fn ty_bits_u64(&mut self, arg0: Type) -> u64;
fn ty_mask(&mut self, arg0: Type) -> u64;
fn ty_bytes(&mut self, arg0: Type) -> u16; fn ty_bytes(&mut self, arg0: Type) -> u16;
fn lane_type(&mut self, arg0: Type) -> Type; fn lane_type(&mut self, arg0: Type) -> Type;
fn fits_in_16(&mut self, arg0: Type) -> Option<Type>; fn fits_in_16(&mut self, arg0: Type) -> Option<Type>;
@@ -110,13 +116,13 @@ pub trait Context {
fn rotr_opposite_amount(&mut self, arg0: Type, arg1: ImmShift) -> ImmShift; fn rotr_opposite_amount(&mut self, arg0: Type, arg1: ImmShift) -> ImmShift;
} }
/// Internal type SideEffectNoResult: defined at src/prelude.isle line 363. /// Internal type SideEffectNoResult: defined at src/prelude.isle line 385.
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub enum SideEffectNoResult { pub enum SideEffectNoResult {
Inst { inst: MInst }, Inst { inst: MInst },
} }
/// Internal type ProducesFlags: defined at src/prelude.isle line 385. /// Internal type ProducesFlags: defined at src/prelude.isle line 407.
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub enum ProducesFlags { pub enum ProducesFlags {
ProducesFlagsSideEffect { inst: MInst }, ProducesFlagsSideEffect { inst: MInst },
@@ -124,7 +130,7 @@ pub enum ProducesFlags {
ProducesFlagsReturnsResultWithConsumer { inst: MInst, result: Reg }, ProducesFlagsReturnsResultWithConsumer { inst: MInst, result: Reg },
} }
/// Internal type ConsumesFlags: defined at src/prelude.isle line 396. /// Internal type ConsumesFlags: defined at src/prelude.isle line 418.
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub enum ConsumesFlags { pub enum ConsumesFlags {
ConsumesFlagsReturnsResultWithProducer { ConsumesFlagsReturnsResultWithProducer {
@@ -140,6 +146,13 @@ pub enum ConsumesFlags {
inst2: MInst, inst2: MInst,
result: ValueRegs, result: ValueRegs,
}, },
ConsumesFlagsFourTimesReturnsValueRegs {
inst1: MInst,
inst2: MInst,
inst3: MInst,
inst4: MInst,
result: ValueRegs,
},
} }
/// Internal type MInst: defined at src/isa/aarch64/inst.isle line 2. /// Internal type MInst: defined at src/isa/aarch64/inst.isle line 2.
@@ -1050,7 +1063,7 @@ pub fn constructor_side_effect<C: Context>(
inst: ref pattern1_0, inst: ref pattern1_0,
} = pattern0_0 } = pattern0_0
{ {
// Rule at src/prelude.isle line 368. // Rule at src/prelude.isle line 390.
let expr0_0 = C::emit(ctx, pattern1_0); let expr0_0 = C::emit(ctx, pattern1_0);
let expr1_0 = C::output_none(ctx); let expr1_0 = C::output_none(ctx);
return Some(expr1_0); return Some(expr1_0);
@@ -1068,7 +1081,7 @@ pub fn constructor_safepoint<C: Context>(
inst: ref pattern1_0, inst: ref pattern1_0,
} = pattern0_0 } = pattern0_0
{ {
// Rule at src/prelude.isle line 374. // Rule at src/prelude.isle line 396.
let expr0_0 = C::emit_safepoint(ctx, pattern1_0); let expr0_0 = C::emit_safepoint(ctx, pattern1_0);
let expr1_0 = C::output_none(ctx); let expr1_0 = C::output_none(ctx);
return Some(expr1_0); return Some(expr1_0);
@@ -1076,6 +1089,55 @@ pub fn constructor_safepoint<C: Context>(
return None; return None;
} }
// Generated as internal constructor for term produces_flags_get_reg.
pub fn constructor_produces_flags_get_reg<C: Context>(
ctx: &mut C,
arg0: &ProducesFlags,
) -> Option<Reg> {
let pattern0_0 = arg0;
if let &ProducesFlags::ProducesFlagsReturnsReg {
inst: ref pattern1_0,
result: pattern1_1,
} = pattern0_0
{
// Rule at src/prelude.isle line 434.
return Some(pattern1_1);
}
return None;
}
// Generated as internal constructor for term produces_flags_ignore.
pub fn constructor_produces_flags_ignore<C: Context>(
ctx: &mut C,
arg0: &ProducesFlags,
) -> Option<ProducesFlags> {
let pattern0_0 = arg0;
match pattern0_0 {
&ProducesFlags::ProducesFlagsReturnsReg {
inst: ref pattern1_0,
result: pattern1_1,
} => {
// Rule at src/prelude.isle line 439.
let expr0_0 = ProducesFlags::ProducesFlagsSideEffect {
inst: pattern1_0.clone(),
};
return Some(expr0_0);
}
&ProducesFlags::ProducesFlagsReturnsResultWithConsumer {
inst: ref pattern1_0,
result: pattern1_1,
} => {
// Rule at src/prelude.isle line 441.
let expr0_0 = ProducesFlags::ProducesFlagsSideEffect {
inst: pattern1_0.clone(),
};
return Some(expr0_0);
}
_ => {}
}
return None;
}
// Generated as internal constructor for term consumes_flags_concat. // Generated as internal constructor for term consumes_flags_concat.
pub fn constructor_consumes_flags_concat<C: Context>( pub fn constructor_consumes_flags_concat<C: Context>(
ctx: &mut C, ctx: &mut C,
@@ -1094,7 +1156,7 @@ pub fn constructor_consumes_flags_concat<C: Context>(
result: pattern3_1, result: pattern3_1,
} = pattern2_0 } = pattern2_0
{ {
// Rule at src/prelude.isle line 408. // Rule at src/prelude.isle line 448.
let expr0_0 = C::value_regs(ctx, pattern1_1, pattern3_1); let expr0_0 = C::value_regs(ctx, pattern1_1, pattern3_1);
let expr1_0 = ConsumesFlags::ConsumesFlagsTwiceReturnsValueRegs { let expr1_0 = ConsumesFlags::ConsumesFlagsTwiceReturnsValueRegs {
inst1: pattern1_0.clone(), inst1: pattern1_0.clone(),
@@ -1124,7 +1186,7 @@ pub fn constructor_with_flags<C: Context>(
inst: ref pattern3_0, inst: ref pattern3_0,
result: pattern3_1, result: pattern3_1,
} => { } => {
// Rule at src/prelude.isle line 433. // Rule at src/prelude.isle line 473.
let expr0_0 = C::emit(ctx, pattern1_0); let expr0_0 = C::emit(ctx, pattern1_0);
let expr1_0 = C::emit(ctx, pattern3_0); let expr1_0 = C::emit(ctx, pattern3_0);
let expr2_0 = C::value_reg(ctx, pattern3_1); let expr2_0 = C::value_reg(ctx, pattern3_1);
@@ -1135,12 +1197,27 @@ pub fn constructor_with_flags<C: Context>(
inst2: ref pattern3_1, inst2: ref pattern3_1,
result: pattern3_2, result: pattern3_2,
} => { } => {
// Rule at src/prelude.isle line 439. // Rule at src/prelude.isle line 479.
let expr0_0 = C::emit(ctx, pattern1_0); let expr0_0 = C::emit(ctx, pattern1_0);
let expr1_0 = C::emit(ctx, pattern3_1); let expr1_0 = C::emit(ctx, pattern3_0);
let expr2_0 = C::emit(ctx, pattern3_0); let expr2_0 = C::emit(ctx, pattern3_1);
return Some(pattern3_2); return Some(pattern3_2);
} }
&ConsumesFlags::ConsumesFlagsFourTimesReturnsValueRegs {
inst1: ref pattern3_0,
inst2: ref pattern3_1,
inst3: ref pattern3_2,
inst4: ref pattern3_3,
result: pattern3_4,
} => {
// Rule at src/prelude.isle line 491.
let expr0_0 = C::emit(ctx, pattern1_0);
let expr1_0 = C::emit(ctx, pattern3_0);
let expr2_0 = C::emit(ctx, pattern3_1);
let expr3_0 = C::emit(ctx, pattern3_2);
let expr4_0 = C::emit(ctx, pattern3_3);
return Some(pattern3_4);
}
_ => {} _ => {}
} }
} }
@@ -1154,7 +1231,7 @@ pub fn constructor_with_flags<C: Context>(
result: pattern3_1, result: pattern3_1,
} = pattern2_0 } = pattern2_0
{ {
// Rule at src/prelude.isle line 427. // Rule at src/prelude.isle line 467.
let expr0_0 = C::emit(ctx, pattern1_0); let expr0_0 = C::emit(ctx, pattern1_0);
let expr1_0 = C::emit(ctx, pattern3_0); let expr1_0 = C::emit(ctx, pattern3_0);
let expr2_0 = C::value_regs(ctx, pattern1_1, pattern3_1); let expr2_0 = C::value_regs(ctx, pattern1_1, pattern3_1);
@@ -1174,7 +1251,7 @@ pub fn constructor_with_flags_reg<C: Context>(
) -> Option<Reg> { ) -> Option<Reg> {
let pattern0_0 = arg0; let pattern0_0 = arg0;
let pattern1_0 = arg1; let pattern1_0 = arg1;
// Rule at src/prelude.isle line 452. // Rule at src/prelude.isle line 508.
let expr0_0 = constructor_with_flags(ctx, pattern0_0, pattern1_0)?; let expr0_0 = constructor_with_flags(ctx, pattern0_0, pattern1_0)?;
let expr1_0: usize = 0; let expr1_0: usize = 0;
let expr2_0 = C::value_regs_get(ctx, expr0_0, expr1_0); let expr2_0 = C::value_regs_get(ctx, expr0_0, expr1_0);

View File

@@ -1,4 +1,4 @@
src/clif.isle 9ea75a6f790b5c03 src/clif.isle 9ea75a6f790b5c03
src/prelude.isle 9830498351ddf6a3 src/prelude.isle 6b0160bfcac86902
src/isa/s390x/inst.isle d91a16074ab186a8 src/isa/s390x/inst.isle d91a16074ab186a8
src/isa/s390x/lower.isle 1cc5a12adc8c75f9 src/isa/s390x/lower.isle 1cc5a12adc8c75f9

View File

@@ -39,8 +39,14 @@ pub trait Context {
fn u8_as_u64(&mut self, arg0: u8) -> u64; fn u8_as_u64(&mut self, arg0: u8) -> u64;
fn u16_as_u64(&mut self, arg0: u16) -> u64; fn u16_as_u64(&mut self, arg0: u16) -> u64;
fn u32_as_u64(&mut self, arg0: u32) -> u64; fn u32_as_u64(&mut self, arg0: u32) -> u64;
fn i64_as_u64(&mut self, arg0: i64) -> u64;
fn u64_add(&mut self, arg0: u64, arg1: u64) -> u64;
fn u64_sub(&mut self, arg0: u64, arg1: u64) -> u64;
fn u64_and(&mut self, arg0: u64, arg1: u64) -> u64;
fn ty_bits(&mut self, arg0: Type) -> u8; fn ty_bits(&mut self, arg0: Type) -> u8;
fn ty_bits_u16(&mut self, arg0: Type) -> u16; fn ty_bits_u16(&mut self, arg0: Type) -> u16;
fn ty_bits_u64(&mut self, arg0: Type) -> u64;
fn ty_mask(&mut self, arg0: Type) -> u64;
fn ty_bytes(&mut self, arg0: Type) -> u16; fn ty_bytes(&mut self, arg0: Type) -> u16;
fn lane_type(&mut self, arg0: Type) -> Type; fn lane_type(&mut self, arg0: Type) -> Type;
fn fits_in_16(&mut self, arg0: Type) -> Option<Type>; fn fits_in_16(&mut self, arg0: Type) -> Option<Type>;
@@ -144,13 +150,13 @@ pub trait Context {
fn same_reg(&mut self, arg0: Reg, arg1: WritableReg) -> Option<()>; fn same_reg(&mut self, arg0: Reg, arg1: WritableReg) -> Option<()>;
} }
/// Internal type SideEffectNoResult: defined at src/prelude.isle line 363. /// Internal type SideEffectNoResult: defined at src/prelude.isle line 385.
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub enum SideEffectNoResult { pub enum SideEffectNoResult {
Inst { inst: MInst }, Inst { inst: MInst },
} }
/// Internal type ProducesFlags: defined at src/prelude.isle line 385. /// Internal type ProducesFlags: defined at src/prelude.isle line 407.
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub enum ProducesFlags { pub enum ProducesFlags {
ProducesFlagsSideEffect { inst: MInst }, ProducesFlagsSideEffect { inst: MInst },
@@ -158,7 +164,7 @@ pub enum ProducesFlags {
ProducesFlagsReturnsResultWithConsumer { inst: MInst, result: Reg }, ProducesFlagsReturnsResultWithConsumer { inst: MInst, result: Reg },
} }
/// Internal type ConsumesFlags: defined at src/prelude.isle line 396. /// Internal type ConsumesFlags: defined at src/prelude.isle line 418.
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub enum ConsumesFlags { pub enum ConsumesFlags {
ConsumesFlagsReturnsResultWithProducer { ConsumesFlagsReturnsResultWithProducer {
@@ -174,6 +180,13 @@ pub enum ConsumesFlags {
inst2: MInst, inst2: MInst,
result: ValueRegs, result: ValueRegs,
}, },
ConsumesFlagsFourTimesReturnsValueRegs {
inst1: MInst,
inst2: MInst,
inst3: MInst,
inst4: MInst,
result: ValueRegs,
},
} }
/// Internal type MInst: defined at src/isa/s390x/inst.isle line 2. /// Internal type MInst: defined at src/isa/s390x/inst.isle line 2.
@@ -941,7 +954,7 @@ pub fn constructor_side_effect<C: Context>(
inst: ref pattern1_0, inst: ref pattern1_0,
} = pattern0_0 } = pattern0_0
{ {
// Rule at src/prelude.isle line 368. // Rule at src/prelude.isle line 390.
let expr0_0 = C::emit(ctx, pattern1_0); let expr0_0 = C::emit(ctx, pattern1_0);
let expr1_0 = C::output_none(ctx); let expr1_0 = C::output_none(ctx);
return Some(expr1_0); return Some(expr1_0);
@@ -959,7 +972,7 @@ pub fn constructor_safepoint<C: Context>(
inst: ref pattern1_0, inst: ref pattern1_0,
} = pattern0_0 } = pattern0_0
{ {
// Rule at src/prelude.isle line 374. // Rule at src/prelude.isle line 396.
let expr0_0 = C::emit_safepoint(ctx, pattern1_0); let expr0_0 = C::emit_safepoint(ctx, pattern1_0);
let expr1_0 = C::output_none(ctx); let expr1_0 = C::output_none(ctx);
return Some(expr1_0); return Some(expr1_0);
@@ -967,6 +980,55 @@ pub fn constructor_safepoint<C: Context>(
return None; return None;
} }
// Generated as internal constructor for term produces_flags_get_reg.
pub fn constructor_produces_flags_get_reg<C: Context>(
ctx: &mut C,
arg0: &ProducesFlags,
) -> Option<Reg> {
let pattern0_0 = arg0;
if let &ProducesFlags::ProducesFlagsReturnsReg {
inst: ref pattern1_0,
result: pattern1_1,
} = pattern0_0
{
// Rule at src/prelude.isle line 434.
return Some(pattern1_1);
}
return None;
}
// Generated as internal constructor for term produces_flags_ignore.
pub fn constructor_produces_flags_ignore<C: Context>(
ctx: &mut C,
arg0: &ProducesFlags,
) -> Option<ProducesFlags> {
let pattern0_0 = arg0;
match pattern0_0 {
&ProducesFlags::ProducesFlagsReturnsReg {
inst: ref pattern1_0,
result: pattern1_1,
} => {
// Rule at src/prelude.isle line 439.
let expr0_0 = ProducesFlags::ProducesFlagsSideEffect {
inst: pattern1_0.clone(),
};
return Some(expr0_0);
}
&ProducesFlags::ProducesFlagsReturnsResultWithConsumer {
inst: ref pattern1_0,
result: pattern1_1,
} => {
// Rule at src/prelude.isle line 441.
let expr0_0 = ProducesFlags::ProducesFlagsSideEffect {
inst: pattern1_0.clone(),
};
return Some(expr0_0);
}
_ => {}
}
return None;
}
// Generated as internal constructor for term consumes_flags_concat. // Generated as internal constructor for term consumes_flags_concat.
pub fn constructor_consumes_flags_concat<C: Context>( pub fn constructor_consumes_flags_concat<C: Context>(
ctx: &mut C, ctx: &mut C,
@@ -985,7 +1047,7 @@ pub fn constructor_consumes_flags_concat<C: Context>(
result: pattern3_1, result: pattern3_1,
} = pattern2_0 } = pattern2_0
{ {
// Rule at src/prelude.isle line 408. // Rule at src/prelude.isle line 448.
let expr0_0 = C::value_regs(ctx, pattern1_1, pattern3_1); let expr0_0 = C::value_regs(ctx, pattern1_1, pattern3_1);
let expr1_0 = ConsumesFlags::ConsumesFlagsTwiceReturnsValueRegs { let expr1_0 = ConsumesFlags::ConsumesFlagsTwiceReturnsValueRegs {
inst1: pattern1_0.clone(), inst1: pattern1_0.clone(),
@@ -1015,7 +1077,7 @@ pub fn constructor_with_flags<C: Context>(
inst: ref pattern3_0, inst: ref pattern3_0,
result: pattern3_1, result: pattern3_1,
} => { } => {
// Rule at src/prelude.isle line 433. // Rule at src/prelude.isle line 473.
let expr0_0 = C::emit(ctx, pattern1_0); let expr0_0 = C::emit(ctx, pattern1_0);
let expr1_0 = C::emit(ctx, pattern3_0); let expr1_0 = C::emit(ctx, pattern3_0);
let expr2_0 = C::value_reg(ctx, pattern3_1); let expr2_0 = C::value_reg(ctx, pattern3_1);
@@ -1026,12 +1088,27 @@ pub fn constructor_with_flags<C: Context>(
inst2: ref pattern3_1, inst2: ref pattern3_1,
result: pattern3_2, result: pattern3_2,
} => { } => {
// Rule at src/prelude.isle line 439. // Rule at src/prelude.isle line 479.
let expr0_0 = C::emit(ctx, pattern1_0); let expr0_0 = C::emit(ctx, pattern1_0);
let expr1_0 = C::emit(ctx, pattern3_1); let expr1_0 = C::emit(ctx, pattern3_0);
let expr2_0 = C::emit(ctx, pattern3_0); let expr2_0 = C::emit(ctx, pattern3_1);
return Some(pattern3_2); return Some(pattern3_2);
} }
&ConsumesFlags::ConsumesFlagsFourTimesReturnsValueRegs {
inst1: ref pattern3_0,
inst2: ref pattern3_1,
inst3: ref pattern3_2,
inst4: ref pattern3_3,
result: pattern3_4,
} => {
// Rule at src/prelude.isle line 491.
let expr0_0 = C::emit(ctx, pattern1_0);
let expr1_0 = C::emit(ctx, pattern3_0);
let expr2_0 = C::emit(ctx, pattern3_1);
let expr3_0 = C::emit(ctx, pattern3_2);
let expr4_0 = C::emit(ctx, pattern3_3);
return Some(pattern3_4);
}
_ => {} _ => {}
} }
} }
@@ -1045,7 +1122,7 @@ pub fn constructor_with_flags<C: Context>(
result: pattern3_1, result: pattern3_1,
} = pattern2_0 } = pattern2_0
{ {
// Rule at src/prelude.isle line 427. // Rule at src/prelude.isle line 467.
let expr0_0 = C::emit(ctx, pattern1_0); let expr0_0 = C::emit(ctx, pattern1_0);
let expr1_0 = C::emit(ctx, pattern3_0); let expr1_0 = C::emit(ctx, pattern3_0);
let expr2_0 = C::value_regs(ctx, pattern1_1, pattern3_1); let expr2_0 = C::value_regs(ctx, pattern1_1, pattern3_1);
@@ -1065,7 +1142,7 @@ pub fn constructor_with_flags_reg<C: Context>(
) -> Option<Reg> { ) -> Option<Reg> {
let pattern0_0 = arg0; let pattern0_0 = arg0;
let pattern1_0 = arg1; let pattern1_0 = arg1;
// Rule at src/prelude.isle line 452. // Rule at src/prelude.isle line 508.
let expr0_0 = constructor_with_flags(ctx, pattern0_0, pattern1_0)?; let expr0_0 = constructor_with_flags(ctx, pattern0_0, pattern1_0)?;
let expr1_0: usize = 0; let expr1_0: usize = 0;
let expr2_0 = C::value_regs_get(ctx, expr0_0, expr1_0); let expr2_0 = C::value_regs_get(ctx, expr0_0, expr1_0);

View File

@@ -159,15 +159,6 @@
(alternative Gpr) (alternative Gpr)
(dst WritableGpr)) (dst WritableGpr))
;; GPR conditional move with the `OR` of two conditions; overwrites
;; the destination register.
(CmoveOr (size OperandSize)
(cc1 CC)
(cc2 CC)
(consequent GprMem)
(alternative Gpr)
(dst WritableGpr))
;; XMM conditional move; overwrites the destination register. ;; XMM conditional move; overwrites the destination register.
(XmmCmove (size OperandSize) (XmmCmove (size OperandSize)
(cc CC) (cc CC)
@@ -175,15 +166,6 @@
(alternative Xmm) (alternative Xmm)
(dst WritableXmm)) (dst WritableXmm))
;; XMM conditional move with the `OR` of two conditions; overwrites
;; the destination register.
(XmmCmoveOr (size OperandSize)
(cc1 CC)
(cc2 CC)
(consequent XmmMem)
(alternative Xmm)
(dst WritableXmm))
;; ========================================= ;; =========================================
;; Stack manipulation. ;; Stack manipulation.
@@ -1074,6 +1056,18 @@
(decl avx512f_enabled () Type) (decl avx512f_enabled () Type)
(extern extractor avx512f_enabled avx512f_enabled) (extern extractor avx512f_enabled avx512f_enabled)
(decl avx512bitalg_enabled () Type)
(extern extractor avx512bitalg_enabled avx512bitalg_enabled)
(decl use_lzcnt () Type)
(extern extractor use_lzcnt use_lzcnt)
(decl use_bmi1 () Type)
(extern extractor use_bmi1 use_bmi1)
(decl use_popcnt () Type)
(extern extractor use_popcnt use_popcnt)
;;;; Helpers for Merging and Sinking Immediates/Loads ;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Helpers for Merging and Sinking Immediates/Loads ;;;;;;;;;;;;;;;;;;;;;;;;;
;; Extract a constant `Imm8Reg.Imm8` from a value operand. ;; Extract a constant `Imm8Reg.Imm8` from a value operand.
@@ -1266,6 +1260,13 @@
(xmm_unary_rm_r (SseOpcode.Movdqu) (xmm_unary_rm_r (SseOpcode.Movdqu)
addr)) addr))
;; Load a constant into an XMM register.
(decl xmm_load_const (Type VCodeConstant) Xmm)
(rule (xmm_load_const ty const)
(let ((dst WritableXmm (temp_writable_xmm))
(_ Unit (emit (MInst.XmmLoadConst const dst ty))))
dst))
;;;; Instruction Constructors ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Instruction Constructors ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ;;
;; These constructors create SSA-style `MInst`s. It is their responsibility to ;; These constructors create SSA-style `MInst`s. It is their responsibility to
@@ -1398,6 +1399,13 @@
(imm $I64 bits) (imm $I64 bits)
(OperandSize.Size64))) (OperandSize.Size64)))
;; Helper for emitting immediates with an `i64` value. Note that
;; integer constants in ISLE are always parsed as `i64`s; this enables
;; negative numbers to be used as immediates.
(decl imm_i64 (Type i64) Reg)
(rule (imm_i64 ty value)
(imm ty (i64_as_u64 value)))
(decl nonzero_u64_fits_in_u32 (u64) u64) (decl nonzero_u64_fits_in_u32 (u64) u64)
(extern extractor nonzero_u64_fits_in_u32 nonzero_u64_fits_in_u32) (extern extractor nonzero_u64_fits_in_u32 nonzero_u64_fits_in_u32)
@@ -1504,6 +1512,11 @@
(rule (cmp size src1 src2) (rule (cmp size src1 src2)
(cmp_rmi_r size (CmpOpcode.Cmp) src1 src2)) (cmp_rmi_r size (CmpOpcode.Cmp) src1 src2))
;; Helper for creating `cmp` instructions with an immediate.
(decl cmp_imm (OperandSize u32 Gpr) ProducesFlags)
(rule (cmp_imm size src1 src2)
(cmp_rmi_r size (CmpOpcode.Cmp) (RegMemImm.Imm src1) src2))
;; Helper for creating `MInst.XmmCmpRmR` instructions. ;; Helper for creating `MInst.XmmCmpRmR` instructions.
(decl xmm_cmp_rm_r (SseOpcode XmmMem Xmm) ProducesFlags) (decl xmm_cmp_rm_r (SseOpcode XmmMem Xmm) ProducesFlags)
(rule (xmm_cmp_rm_r opcode src1 src2) (rule (xmm_cmp_rm_r opcode src1 src2)
@@ -1579,17 +1592,25 @@
(decl cmove_or (Type CC CC GprMem Gpr) ConsumesFlags) (decl cmove_or (Type CC CC GprMem Gpr) ConsumesFlags)
(rule (cmove_or ty cc1 cc2 consequent alternative) (rule (cmove_or ty cc1 cc2 consequent alternative)
(let ((dst WritableGpr (temp_writable_gpr)) (let ((dst WritableGpr (temp_writable_gpr))
(size OperandSize (operand_size_of_type_32_64 ty))) (tmp WritableGpr (temp_writable_gpr))
(ConsumesFlags.ConsumesFlagsReturnsReg (size OperandSize (operand_size_of_type_32_64 ty))
(MInst.CmoveOr size cc1 cc2 consequent alternative dst) (cmove1 MInst (MInst.Cmove size cc1 consequent alternative tmp))
(cmove2 MInst (MInst.Cmove size cc2 consequent tmp dst)))
(ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs
cmove1
cmove2
dst))) dst)))
(decl cmove_or_xmm (Type CC CC XmmMem Xmm) ConsumesFlags) (decl cmove_or_xmm (Type CC CC XmmMem Xmm) ConsumesFlags)
(rule (cmove_or_xmm ty cc1 cc2 consequent alternative) (rule (cmove_or_xmm ty cc1 cc2 consequent alternative)
(let ((dst WritableXmm (temp_writable_xmm)) (let ((dst WritableXmm (temp_writable_xmm))
(size OperandSize (operand_size_of_type_32_64 ty))) (tmp WritableXmm (temp_writable_xmm))
(ConsumesFlags.ConsumesFlagsReturnsReg (size OperandSize (operand_size_of_type_32_64 ty))
(MInst.XmmCmoveOr size cc1 cc2 consequent alternative dst) (cmove1 MInst (MInst.XmmCmove size cc1 consequent alternative tmp))
(cmove2 MInst (MInst.XmmCmove size cc2 consequent tmp dst)))
(ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs
cmove1
cmove2
dst))) dst)))
;; Helper for creating `cmove_or` instructions directly from values. This allows ;; Helper for creating `cmove_or` instructions directly from values. This allows
@@ -1601,12 +1622,18 @@
(alt ValueRegs alternative) (alt ValueRegs alternative)
(dst1 WritableGpr (temp_writable_gpr)) (dst1 WritableGpr (temp_writable_gpr))
(dst2 WritableGpr (temp_writable_gpr)) (dst2 WritableGpr (temp_writable_gpr))
(tmp1 WritableGpr (temp_writable_gpr))
(tmp2 WritableGpr (temp_writable_gpr))
(size OperandSize (OperandSize.Size64)) (size OperandSize (OperandSize.Size64))
(lower_cmove MInst (MInst.CmoveOr size cc1 cc2 (value_regs_get_gpr cons 0) (value_regs_get_gpr alt 0) dst1)) (cmove1 MInst (MInst.Cmove size cc1 (value_regs_get_gpr cons 0) (value_regs_get_gpr alt 0) tmp1))
(upper_cmove MInst (MInst.CmoveOr size cc1 cc2 (value_regs_get_gpr cons 1) (value_regs_get_gpr alt 1) dst2))) (cmove2 MInst (MInst.Cmove size cc1 (value_regs_get_gpr cons 0) tmp1 dst1))
(ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs (cmove3 MInst (MInst.Cmove size cc1 (value_regs_get_gpr cons 1) (value_regs_get_gpr alt 1) tmp2))
lower_cmove (cmove4 MInst (MInst.Cmove size cc1 (value_regs_get_gpr cons 1) tmp2 dst2)))
upper_cmove (ConsumesFlags.ConsumesFlagsFourTimesReturnsValueRegs
cmove1
cmove2
cmove3
cmove4
(value_regs dst1 dst2)))) (value_regs dst1 dst2))))
(rule (cmove_or_from_values (is_gpr_type (is_single_register_type ty)) cc1 cc2 consequent alternative) (rule (cmove_or_from_values (is_gpr_type (is_single_register_type ty)) cc1 cc2 consequent alternative)
@@ -1615,6 +1642,14 @@
(rule (cmove_or_from_values (is_xmm_type (is_single_register_type ty)) cc1 cc2 consequent alternative) (rule (cmove_or_from_values (is_xmm_type (is_single_register_type ty)) cc1 cc2 consequent alternative)
(cmove_or_xmm ty cc1 cc2 consequent alternative)) (cmove_or_xmm ty cc1 cc2 consequent alternative))
;; Helper for creating `MInst.Setcc` instructions.
(decl setcc (CC) ConsumesFlags)
(rule (setcc cc)
(let ((dst WritableGpr (temp_writable_gpr)))
(ConsumesFlags.ConsumesFlagsReturnsReg
(MInst.Setcc cc dst)
dst)))
;; Helper for creating `MInst.MovzxRmR` instructions. ;; Helper for creating `MInst.MovzxRmR` instructions.
(decl movzx (Type ExtMode GprMem) Gpr) (decl movzx (Type ExtMode GprMem) Gpr)
(rule (movzx ty mode src) (rule (movzx ty mode src)
@@ -2027,6 +2062,16 @@
size)))) size))))
dst)) dst))
;; Helper for creating `pshufb` instructions.
(decl pshufb (Xmm XmmMem) Xmm)
(rule (pshufb src1 src2)
(let ((dst WritableXmm (temp_writable_xmm))
(_ Unit (emit (MInst.XmmRmR (SseOpcode.Pshufb)
src1
src2
dst))))
dst))
;; Helper for creating `MInst.XmmUnaryRmR` instructions. ;; Helper for creating `MInst.XmmUnaryRmR` instructions.
(decl xmm_unary_rm_r (SseOpcode XmmMem) Xmm) (decl xmm_unary_rm_r (SseOpcode XmmMem) Xmm)
(rule (xmm_unary_rm_r op src) (rule (xmm_unary_rm_r op src)
@@ -2071,6 +2116,11 @@
(rule (vpabsq src) (rule (vpabsq src)
(xmm_unary_rm_r_evex (Avx512Opcode.Vpabsq) src)) (xmm_unary_rm_r_evex (Avx512Opcode.Vpabsq) src))
;; Helper for creating `vpopcntb` instructions.
(decl vpopcntb (XmmMem) Xmm)
(rule (vpopcntb src)
(xmm_unary_rm_r_evex (Avx512Opcode.Vpopcntb) src))
;; Helper for creating `MInst.XmmRmREvex` instructions. ;; Helper for creating `MInst.XmmRmREvex` instructions.
(decl xmm_rm_r_evex (Avx512Opcode XmmMem Xmm) Xmm) (decl xmm_rm_r_evex (Avx512Opcode XmmMem Xmm) Xmm)
(rule (xmm_rm_r_evex op src1 src2) (rule (xmm_rm_r_evex op src1 src2)
@@ -2221,6 +2271,70 @@
(rule (ud2 code) (rule (ud2 code)
(SideEffectNoResult.Inst (MInst.Ud2 code))) (SideEffectNoResult.Inst (MInst.Ud2 code)))
;; Helper for creating `lzcnt` instructions.
(decl lzcnt (Type Gpr) Gpr)
(rule (lzcnt ty src)
(let ((dst WritableGpr (temp_writable_gpr))
(size OperandSize (operand_size_of_type_32_64 ty))
(_ Unit (emit (MInst.UnaryRmR size (UnaryRmROpcode.Lzcnt) src dst))))
dst))
;; Helper for creating `tzcnt` instructions.
(decl tzcnt (Type Gpr) Gpr)
(rule (tzcnt ty src)
(let ((dst WritableGpr (temp_writable_gpr))
(size OperandSize (operand_size_of_type_32_64 ty))
(_ Unit (emit (MInst.UnaryRmR size (UnaryRmROpcode.Tzcnt) src dst))))
dst))
;; Helper for creating `bsr` instructions.
(decl bsr (Type Gpr) ProducesFlags)
(rule (bsr ty src)
(let ((dst WritableGpr (temp_writable_gpr))
(size OperandSize (operand_size_of_type_32_64 ty))
(inst MInst (MInst.UnaryRmR size (UnaryRmROpcode.Bsr) src dst)))
(ProducesFlags.ProducesFlagsReturnsReg inst dst)))
;; Helper for creating `bsr + cmov` instruction pairs that produce the
;; result of the `bsr`, or `alt` if the input was zero.
(decl bsr_or_else (Type Gpr Gpr) Gpr)
(rule (bsr_or_else ty src alt)
(let ((bsr ProducesFlags (bsr ty src))
;; Manually extract the result from the bsr, then ignore
;; it below, since we need to thread it into the cmove
;; before we pass the cmove to with_flags_reg.
(bsr_result Gpr (produces_flags_get_reg bsr))
(cmove ConsumesFlags (cmove ty (CC.Z) alt bsr_result)))
(with_flags_reg (produces_flags_ignore bsr) cmove)))
;; Helper for creating `bsf` instructions.
(decl bsf (Type Gpr) ProducesFlags)
(rule (bsf ty src)
(let ((dst WritableGpr (temp_writable_gpr))
(size OperandSize (operand_size_of_type_32_64 ty))
(inst MInst (MInst.UnaryRmR size (UnaryRmROpcode.Bsf) src dst)))
(ProducesFlags.ProducesFlagsReturnsReg inst dst)))
;; Helper for creating `bsf + cmov` instruction pairs that produce the
;; result of the `bsf`, or `alt` if the input was zero.
(decl bsf_or_else (Type Gpr Gpr) Gpr)
(rule (bsf_or_else ty src alt)
(let ((bsf ProducesFlags (bsf ty src))
;; Manually extract the result from the bsf, then ignore
;; it below, since we need to thread it into the cmove
;; before we pass the cmove to with_flags_reg.
(bsf_result Gpr (produces_flags_get_reg bsf))
(cmove ConsumesFlags (cmove ty (CC.Z) alt bsf_result)))
(with_flags_reg (produces_flags_ignore bsf) cmove)))
;; Helper for creating `popcnt` instructions.
(decl x64_popcnt (Type Gpr) Gpr)
(rule (x64_popcnt ty src)
(let ((dst WritableGpr (temp_writable_gpr))
(size OperandSize (operand_size_of_type_32_64 ty))
(_ Unit (emit (MInst.UnaryRmR size (UnaryRmROpcode.Popcnt) src dst))))
dst))
;;;; Automatic conversions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Automatic conversions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(convert Gpr InstOutput output_gpr) (convert Gpr InstOutput output_gpr)
@@ -2241,6 +2355,8 @@
(convert Reg GprMemImm reg_to_gpr_mem_imm) (convert Reg GprMemImm reg_to_gpr_mem_imm)
(convert WritableGpr WritableReg writable_gpr_to_reg) (convert WritableGpr WritableReg writable_gpr_to_reg)
(convert WritableGpr Reg writable_gpr_to_r_reg) (convert WritableGpr Reg writable_gpr_to_r_reg)
(convert WritableGpr GprMem writable_gpr_to_gpr_mem)
(convert WritableGpr ValueRegs writable_gpr_to_value_regs)
(convert Xmm InstOutput output_xmm) (convert Xmm InstOutput output_xmm)
(convert Value Xmm put_in_xmm) (convert Value Xmm put_in_xmm)
@@ -2259,8 +2375,10 @@
(convert WritableXmm WritableReg writable_xmm_to_reg) (convert WritableXmm WritableReg writable_xmm_to_reg)
(convert WritableXmm Reg writable_xmm_to_r_reg) (convert WritableXmm Reg writable_xmm_to_r_reg)
(convert WritableXmm XmmMem writable_xmm_to_xmm_mem) (convert WritableXmm XmmMem writable_xmm_to_xmm_mem)
(convert WritableXmm ValueRegs writable_xmm_to_value_regs)
(convert Gpr Imm8Gpr gpr_to_imm8_gpr) (convert Gpr Imm8Gpr gpr_to_imm8_gpr)
(convert Imm8Reg Imm8Gpr imm8_reg_to_imm8_gpr)
(convert Amode SyntheticAmode amode_to_synthetic_amode) (convert Amode SyntheticAmode amode_to_synthetic_amode)
(convert SyntheticAmode GprMem synthetic_amode_to_gpr_mem) (convert SyntheticAmode GprMem synthetic_amode_to_gpr_mem)
@@ -2276,12 +2394,21 @@
(decl writable_gpr_to_r_reg (WritableGpr) Reg) (decl writable_gpr_to_r_reg (WritableGpr) Reg)
(rule (writable_gpr_to_r_reg w_gpr) (rule (writable_gpr_to_r_reg w_gpr)
(writable_reg_to_reg (writable_gpr_to_reg w_gpr))) (writable_reg_to_reg (writable_gpr_to_reg w_gpr)))
(decl writable_gpr_to_gpr_mem (WritableGpr) GprMem)
(rule (writable_gpr_to_gpr_mem w_gpr)
(gpr_to_gpr_mem w_gpr))
(decl writable_gpr_to_value_regs (WritableGpr) ValueRegs)
(rule (writable_gpr_to_value_regs w_gpr)
(value_reg w_gpr))
(decl writable_xmm_to_r_reg (WritableXmm) Reg) (decl writable_xmm_to_r_reg (WritableXmm) Reg)
(rule (writable_xmm_to_r_reg w_xmm) (rule (writable_xmm_to_r_reg w_xmm)
(writable_reg_to_reg (writable_xmm_to_reg w_xmm))) (writable_reg_to_reg (writable_xmm_to_reg w_xmm)))
(decl writable_xmm_to_xmm_mem (WritableXmm) XmmMem) (decl writable_xmm_to_xmm_mem (WritableXmm) XmmMem)
(rule (writable_xmm_to_xmm_mem w_xmm) (rule (writable_xmm_to_xmm_mem w_xmm)
(xmm_to_xmm_mem (writable_xmm_to_xmm w_xmm))) (xmm_to_xmm_mem (writable_xmm_to_xmm w_xmm)))
(decl writable_xmm_to_value_regs (WritableXmm) ValueRegs)
(rule (writable_xmm_to_value_regs w_xmm)
(value_reg w_xmm))
(decl synthetic_amode_to_gpr_mem (SyntheticAmode) GprMem) (decl synthetic_amode_to_gpr_mem (SyntheticAmode) GprMem)
(rule (synthetic_amode_to_gpr_mem amode) (rule (synthetic_amode_to_gpr_mem amode)

View File

@@ -1104,33 +1104,6 @@ pub(crate) fn emit(
} }
} }
Inst::CmoveOr {
size,
cc1,
cc2,
consequent,
alternative,
dst,
} => {
let first_cmove = Inst::Cmove {
cc: *cc1,
size: *size,
consequent: consequent.clone(),
alternative: alternative.clone(),
dst: dst.clone(),
};
first_cmove.emit(sink, info, state);
let second_cmove = Inst::Cmove {
cc: *cc2,
size: *size,
consequent: consequent.clone(),
alternative: alternative.clone(),
dst: dst.clone(),
};
second_cmove.emit(sink, info, state);
}
Inst::XmmCmove { Inst::XmmCmove {
size, size,
cc, cc,
@@ -1159,39 +1132,6 @@ pub(crate) fn emit(
sink.bind_label(next); sink.bind_label(next);
} }
Inst::XmmCmoveOr {
size,
cc1,
cc2,
consequent,
alternative,
dst,
} => {
debug_assert_eq!(*alternative, dst.to_reg());
let op = if *size == OperandSize::Size64 {
SseOpcode::Movsd
} else {
SseOpcode::Movss
};
let second_test = sink.get_label();
let next_instruction = sink.get_label();
// Jump to second test if `cc1` is *not* set.
one_way_jmp(sink, cc1.invert(), next_instruction);
let inst =
Inst::xmm_unary_rm_r(op, consequent.clone().to_reg_mem(), dst.to_writable_reg());
inst.emit(sink, info, state);
sink.bind_label(second_test);
// Jump to next instruction if `cc2` is *not* set.
one_way_jmp(sink, cc2.invert(), next_instruction);
let inst =
Inst::xmm_unary_rm_r(op, consequent.clone().to_reg_mem(), dst.to_writable_reg());
inst.emit(sink, info, state);
sink.bind_label(next_instruction);
}
Inst::Push64 { src } => { Inst::Push64 { src } => {
if info.flags.enable_probestack() { if info.flags.enable_probestack() {
sink.add_trap(state.cur_srcloc(), TrapCode::StackOverflow); sink.add_trap(state.cur_srcloc(), TrapCode::StackOverflow);

View File

@@ -52,7 +52,6 @@ impl Inst {
| Inst::CallUnknown { .. } | Inst::CallUnknown { .. }
| Inst::CheckedDivOrRemSeq { .. } | Inst::CheckedDivOrRemSeq { .. }
| Inst::Cmove { .. } | Inst::Cmove { .. }
| Inst::CmoveOr { .. }
| Inst::CmpRmiR { .. } | Inst::CmpRmiR { .. }
| Inst::CvtFloatToSintSeq { .. } | Inst::CvtFloatToSintSeq { .. }
| Inst::CvtFloatToUintSeq { .. } | Inst::CvtFloatToUintSeq { .. }
@@ -89,7 +88,6 @@ impl Inst {
| Inst::Ud2 { .. } | Inst::Ud2 { .. }
| Inst::VirtualSPOffsetAdj { .. } | Inst::VirtualSPOffsetAdj { .. }
| Inst::XmmCmove { .. } | Inst::XmmCmove { .. }
| Inst::XmmCmoveOr { .. }
| Inst::XmmCmpRmR { .. } | Inst::XmmCmpRmR { .. }
| Inst::XmmLoadConst { .. } | Inst::XmmLoadConst { .. }
| Inst::XmmMinMaxSeq { .. } | Inst::XmmMinMaxSeq { .. }
@@ -141,6 +139,7 @@ impl Inst {
} }
} }
#[allow(dead_code)]
pub(crate) fn unary_rm_r( pub(crate) fn unary_rm_r(
size: OperandSize, size: OperandSize,
op: UnaryRmROpcode, op: UnaryRmROpcode,
@@ -906,12 +905,6 @@ impl Inst {
alternative, alternative,
dst, dst,
.. ..
}
| Inst::CmoveOr {
size,
alternative,
dst,
..
} => { } => {
if *alternative != dst.to_reg() { if *alternative != dst.to_reg() {
debug_assert!(alternative.is_virtual()); debug_assert!(alternative.is_virtual());
@@ -926,9 +919,6 @@ impl Inst {
} }
Inst::XmmCmove { Inst::XmmCmove {
alternative, dst, .. alternative, dst, ..
}
| Inst::XmmCmoveOr {
alternative, dst, ..
} => { } => {
if *alternative != dst.to_reg() { if *alternative != dst.to_reg() {
debug_assert!(alternative.is_virtual()); debug_assert!(alternative.is_virtual());
@@ -1619,27 +1609,6 @@ impl PrettyPrint for Inst {
show_ireg_sized(dst.to_reg().to_reg(), mb_rru, size.to_bytes()) show_ireg_sized(dst.to_reg().to_reg(), mb_rru, size.to_bytes())
), ),
Inst::CmoveOr {
size,
cc1,
cc2,
consequent: src,
alternative: _,
dst,
} => {
let src = src.show_rru_sized(mb_rru, size.to_bytes());
let dst = show_ireg_sized(dst.to_reg().to_reg(), mb_rru, size.to_bytes());
format!(
"{} {}, {}; {} {}, {}",
ljustify(format!("cmov{}{}", cc1.to_string(), suffix_bwlq(*size))),
src,
dst,
ljustify(format!("cmov{}{}", cc2.to_string(), suffix_bwlq(*size))),
src,
dst,
)
}
Inst::XmmCmove { Inst::XmmCmove {
size, size,
cc, cc,
@@ -1660,34 +1629,6 @@ impl PrettyPrint for Inst {
) )
} }
Inst::XmmCmoveOr {
size,
cc1,
cc2,
consequent: src,
dst,
..
} => {
let suffix = if *size == OperandSize::Size64 {
"sd"
} else {
"ss"
};
let src = src.show_rru_sized(mb_rru, size.to_bytes());
let dst = show_ireg_sized(dst.to_reg().to_reg(), mb_rru, size.to_bytes());
format!(
"j{} $check; mov{} {}, {}; $check: j{} $next; mov{} {}, {}; $next",
cc1.invert().to_string(),
suffix,
src,
dst,
cc2.invert().to_string(),
suffix,
src,
dst,
)
}
Inst::Push64 { src } => { Inst::Push64 { src } => {
format!("{} {}", ljustify("pushq".to_string()), src.show_rru(mb_rru)) format!("{} {}", ljustify("pushq".to_string()), src.show_rru(mb_rru))
} }
@@ -2086,11 +2027,6 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
consequent: src, consequent: src,
dst, dst,
.. ..
}
| Inst::CmoveOr {
consequent: src,
dst,
..
} => { } => {
src.get_regs_as_uses(collector); src.get_regs_as_uses(collector);
collector.add_mod(dst.to_writable_reg()); collector.add_mod(dst.to_writable_reg());
@@ -2099,11 +2035,6 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
consequent: src, consequent: src,
dst, dst,
.. ..
}
| Inst::XmmCmoveOr {
consequent: src,
dst,
..
} => { } => {
src.get_regs_as_uses(collector); src.get_regs_as_uses(collector);
collector.add_mod(dst.to_writable_reg()); collector.add_mod(dst.to_writable_reg());
@@ -2554,12 +2485,6 @@ pub(crate) fn x64_map_regs<RM: RegMapper>(inst: &mut Inst, mapper: &RM) {
ref mut dst, ref mut dst,
ref mut alternative, ref mut alternative,
.. ..
}
| Inst::CmoveOr {
consequent: ref mut src,
ref mut dst,
ref mut alternative,
..
} => { } => {
src.map_uses(mapper); src.map_uses(mapper);
dst.map_mod(mapper); dst.map_mod(mapper);
@@ -2570,12 +2495,6 @@ pub(crate) fn x64_map_regs<RM: RegMapper>(inst: &mut Inst, mapper: &RM) {
ref mut dst, ref mut dst,
ref mut alternative, ref mut alternative,
.. ..
}
| Inst::XmmCmoveOr {
consequent: ref mut src,
ref mut dst,
ref mut alternative,
..
} => { } => {
src.map_uses(mapper); src.map_uses(mapper);
dst.map_mod(mapper); dst.map_mod(mapper);

View File

@@ -1467,22 +1467,22 @@
;; - `CC.BE -> C = 1 OR Z = 1` (below or equal) ;; - `CC.BE -> C = 1 OR Z = 1` (below or equal)
;; - `CC.NBE -> C = 0 AND Z = 0` (not below or equal) ;; - `CC.NBE -> C = 0 AND Z = 0` (not below or equal)
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.Ordered) a b)) x y))) (rule (lower (has_type ty (select (fcmp (FloatCC.Ordered) a b) x y)))
(with_flags (fpcmp b a) (cmove_from_values ty (CC.NP) x y))) (with_flags (fpcmp b a) (cmove_from_values ty (CC.NP) x y)))
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.Unordered) a b)) x y))) (rule (lower (has_type ty (select (fcmp (FloatCC.Unordered) a b) x y)))
(with_flags (fpcmp b a) (cmove_from_values ty (CC.P) x y))) (with_flags (fpcmp b a) (cmove_from_values ty (CC.P) x y)))
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.GreaterThan) a b)) x y))) (rule (lower (has_type ty (select (fcmp (FloatCC.GreaterThan) a b) x y)))
(with_flags (fpcmp b a) (cmove_from_values ty (CC.NBE) x y))) (with_flags (fpcmp b a) (cmove_from_values ty (CC.NBE) x y)))
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.GreaterThanOrEqual) a b)) x y))) (rule (lower (has_type ty (select (fcmp (FloatCC.GreaterThanOrEqual) a b) x y)))
(with_flags (fpcmp b a) (cmove_from_values ty (CC.NB) x y))) (with_flags (fpcmp b a) (cmove_from_values ty (CC.NB) x y)))
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.UnorderedOrLessThan) a b)) x y))) (rule (lower (has_type ty (select (fcmp (FloatCC.UnorderedOrLessThan) a b) x y)))
(with_flags (fpcmp b a) (cmove_from_values ty (CC.B) x y))) (with_flags (fpcmp b a) (cmove_from_values ty (CC.B) x y)))
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.UnorderedOrLessThanOrEqual) a b)) x y))) (rule (lower (has_type ty (select (fcmp (FloatCC.UnorderedOrLessThanOrEqual) a b) x y)))
(with_flags (fpcmp b a) (cmove_from_values ty (CC.BE) x y))) (with_flags (fpcmp b a) (cmove_from_values ty (CC.BE) x y)))
;; Certain FloatCC variants are implemented by flipping the operands of the ;; Certain FloatCC variants are implemented by flipping the operands of the
@@ -1496,16 +1496,16 @@
;; not `LT | UNO`. By flipping the operands AND inverting the comparison (e.g., ;; not `LT | UNO`. By flipping the operands AND inverting the comparison (e.g.,
;; to `CC.NBE`), we also avoid these unordered cases. ;; to `CC.NBE`), we also avoid these unordered cases.
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.LessThan) a b)) x y))) (rule (lower (has_type ty (select (fcmp (FloatCC.LessThan) a b) x y)))
(with_flags (fpcmp a b) (cmove_from_values ty (CC.NBE) x y))) (with_flags (fpcmp a b) (cmove_from_values ty (CC.NBE) x y)))
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.LessThanOrEqual) a b)) x y))) (rule (lower (has_type ty (select (fcmp (FloatCC.LessThanOrEqual) a b) x y)))
(with_flags (fpcmp a b) (cmove_from_values ty (CC.NB) x y))) (with_flags (fpcmp a b) (cmove_from_values ty (CC.NB) x y)))
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.UnorderedOrGreaterThan) a b)) x y))) (rule (lower (has_type ty (select (fcmp (FloatCC.UnorderedOrGreaterThan) a b) x y)))
(with_flags (fpcmp a b) (cmove_from_values ty (CC.B) x y))) (with_flags (fpcmp a b) (cmove_from_values ty (CC.B) x y)))
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.UnorderedOrGreaterThanOrEqual) a b)) x y))) (rule (lower (has_type ty (select (fcmp (FloatCC.UnorderedOrGreaterThanOrEqual) a b) x y)))
(with_flags (fpcmp a b) (cmove_from_values ty (CC.BE) x y))) (with_flags (fpcmp a b) (cmove_from_values ty (CC.BE) x y)))
;; `FloatCC.Equal` and `FloatCC.NotEqual` can only be implemented with multiple ;; `FloatCC.Equal` and `FloatCC.NotEqual` can only be implemented with multiple
@@ -1521,8 +1521,341 @@
;; More details about the CLIF semantics for `fcmp` are available at ;; More details about the CLIF semantics for `fcmp` are available at
;; https://docs.rs/cranelift-codegen/latest/cranelift_codegen/ir/trait.InstBuilder.html#method.fcmp. ;; https://docs.rs/cranelift-codegen/latest/cranelift_codegen/ir/trait.InstBuilder.html#method.fcmp.
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.Equal) a b)) x y))) (rule (lower (has_type ty (select (fcmp (FloatCC.Equal) a b) x y)))
(with_flags (fpcmp a b) (cmove_or_from_values ty (CC.NZ) (CC.P) y x))) (with_flags (fpcmp a b) (cmove_or_from_values ty (CC.NZ) (CC.P) y x)))
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.NotEqual) a b)) x y))) (rule (lower (has_type ty (select (fcmp (FloatCC.NotEqual) a b) x y)))
(with_flags (fpcmp a b) (cmove_or_from_values ty (CC.NZ) (CC.P) x y))) (with_flags (fpcmp a b) (cmove_or_from_values ty (CC.NZ) (CC.P) x y)))
;; Rules for `clz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; If available, we can use a plain lzcnt instruction here. Note no
;; special handling is required for zero inputs, because the machine
;; instruction does what the CLIF expects for zero, i.e. it returns
;; zero.
(rule 1 (lower
(has_type (and
(ty_32_or_64 ty)
(use_lzcnt))
(clz src)))
(lzcnt ty src))
(rule (lower
(has_type (ty_32_or_64 ty)
(clz src)))
(do_clz ty ty src))
(rule (lower
(has_type (ty_8_or_16 ty)
(clz src)))
(do_clz $I32 ty (extend_to_gpr src $I32 (ExtendKind.Zero))))
(rule (lower
(has_type $I128
(clz src)))
(let ((upper Gpr (do_clz $I64 $I64 (value_regs_get_gpr src 1)))
(lower Gpr (add $I64
(do_clz $I64 $I64 (value_regs_get_gpr src 0))
(RegMemImm.Imm 64)))
(result_lo Gpr
(with_flags_reg
(cmp_imm (OperandSize.Size64) 64 upper)
(cmove $I64 (CC.NZ) upper lower))))
(value_regs result_lo (imm $I64 0))))
;; Implementation helper for clz; operates on 32 or 64-bit units.
(decl do_clz (Type Type Gpr) Gpr)
(rule (do_clz ty orig_ty src)
(let ((highest_bit_index Reg (bsr_or_else ty src (imm_i64 $I64 -1)))
(bits_minus_1 Reg (imm ty (u64_sub (ty_bits_u64 orig_ty) 1))))
(sub ty bits_minus_1 highest_bit_index)))
;; Rules for `ctz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Analogous to `clz` cases above, but using mirror instructions
;; (tzcnt vs lzcnt, bsf vs bsr).
(rule 1 (lower
(has_type (and
(ty_32_or_64 ty)
(use_bmi1))
(ctz src)))
(tzcnt ty src))
(rule (lower
(has_type (ty_32_or_64 ty)
(ctz src)))
(do_ctz ty ty src))
(rule (lower
(has_type (ty_8_or_16 ty)
(ctz src)))
(do_ctz $I32 ty (extend_to_gpr src $I32 (ExtendKind.Zero))))
(rule (lower
(has_type $I128
(ctz src)))
(let ((lower Gpr (do_ctz $I64 $I64 (value_regs_get_gpr src 0)))
(upper Gpr (add $I64
(do_ctz $I64 $I64 (value_regs_get_gpr src 1))
(RegMemImm.Imm 64)))
(result_lo Gpr
(with_flags_reg
(cmp_imm (OperandSize.Size64) 64 lower)
(cmove $I64 (CC.Z) upper lower))))
(value_regs result_lo (imm $I64 0))))
(decl do_ctz (Type Type Gpr) Gpr)
(rule (do_ctz ty orig_ty src)
(bsf_or_else ty src (imm $I64 (ty_bits_u64 orig_ty))))
;; Rules for `popcnt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 1 (lower
(has_type (and
(ty_32_or_64 ty)
(use_popcnt))
(popcnt src)))
(x64_popcnt ty src))
(rule 1 (lower
(has_type (and
(ty_8_or_16 ty)
(use_popcnt))
(popcnt src)))
(x64_popcnt $I32 (extend_to_gpr src $I32 (ExtendKind.Zero))))
(rule 1 (lower
(has_type (and
$I128
(use_popcnt))
(popcnt src)))
(let ((lo_count Gpr (x64_popcnt $I64 (value_regs_get_gpr src 0)))
(hi_count Gpr (x64_popcnt $I64 (value_regs_get_gpr src 1))))
(value_regs (add $I64 lo_count hi_count) (imm $I64 0))))
(rule (lower
(has_type (ty_32_or_64 ty)
(popcnt src)))
(do_popcnt ty src))
(rule (lower
(has_type (ty_8_or_16 ty)
(popcnt src)))
(do_popcnt $I32 (extend_to_gpr src $I32 (ExtendKind.Zero))))
(rule (lower
(has_type $I128
(popcnt src)))
(let ((lo_count Gpr (do_popcnt $I64 (value_regs_get_gpr src 0)))
(hi_count Gpr (do_popcnt $I64 (value_regs_get_gpr src 1))))
(value_regs (add $I64 lo_count hi_count) (imm $I64 0))))
;; Implementation of popcount when we don't nave a native popcount
;; instruction.
(decl do_popcnt (Type Gpr) Gpr)
(rule (do_popcnt $I64 src)
(let ((shifted1 Gpr (shr $I64 src (Imm8Reg.Imm8 1)))
(sevens Gpr (imm $I64 0x7777777777777777))
(masked1 Gpr (x64_and $I64 shifted1 sevens))
;; diff1 := src - ((src >> 1) & 0b0111_0111_0111...)
(diff1 Gpr (sub $I64 src masked1))
(shifted2 Gpr (shr $I64 masked1 (Imm8Reg.Imm8 1)))
(masked2 Gpr (x64_and $I64 shifted2 sevens))
;; diff2 := diff1 - ((diff1 >> 1) & 0b0111_0111_0111...)
(diff2 Gpr (sub $I64 diff1 masked2))
(shifted3 Gpr (shr $I64 masked2 (Imm8Reg.Imm8 1)))
(masked3 Gpr (x64_and $I64 shifted3 sevens))
;; diff3 := diff2 - ((diff2 >> 1) & 0b0111_0111_0111...)
;;
;; At this point, each nibble of diff3 is the popcount of
;; that nibble. This works because at each step above, we
;; are basically subtracting floor(value / 2) from the
;; running value; the leftover remainder is 1 if the LSB
;; was 1. After three steps, we have (nibble / 8) -- 0 or
;; 1 for the MSB of the nibble -- plus three possible
;; additions for the three other bits.
(diff3 Gpr (sub $I64 diff2 masked3))
;; Add the two nibbles of each byte together.
(sum1 Gpr (add $I64
(shr $I64 diff3 (Imm8Reg.Imm8 4))
diff3))
;; Mask the above sum to have the popcount for each byte
;; in the lower nibble of that byte.
(ofof Gpr (imm $I64 0x0f0f0f0f0f0f0f0f))
(masked4 Gpr (x64_and $I64 sum1 ofof))
(ones Gpr (imm $I64 0x0101010101010101))
;; Use a multiply to sum all of the bytes' popcounts into
;; the top byte. Consider the binomial expansion for the
;; top byte: it is the sum of the bytes (masked4 >> 56) *
;; 0x01 + (masked4 >> 48) * 0x01 + (masked4 >> 40) * 0x01
;; + ... + (masked4 >> 0).
(mul Gpr (mul $I64 masked4 ones))
;; Now take that top byte and return it as the popcount.
(final Gpr (shr $I64 mul (Imm8Reg.Imm8 56))))
final))
;; This is the 32-bit version of the above; the steps for each nibble
;; are the same, we just use constants half as wide.
(rule (do_popcnt $I32 src)
(let ((shifted1 Gpr (shr $I32 src (Imm8Reg.Imm8 1)))
(sevens Gpr (imm $I32 0x77777777))
(masked1 Gpr (x64_and $I32 shifted1 sevens))
(diff1 Gpr (sub $I32 src masked1))
(shifted2 Gpr (shr $I32 masked1 (Imm8Reg.Imm8 1)))
(masked2 Gpr (x64_and $I32 shifted2 sevens))
(diff2 Gpr (sub $I32 diff1 masked2))
(shifted3 Gpr (shr $I32 masked2 (Imm8Reg.Imm8 1)))
(masked3 Gpr (x64_and $I32 shifted3 sevens))
(diff3 Gpr (sub $I32 diff2 masked3))
(sum1 Gpr (add $I32
(shr $I32 diff3 (Imm8Reg.Imm8 4))
diff3))
(masked4 Gpr (x64_and $I32 sum1 (RegMemImm.Imm 0x0f0f0f0f)))
(mul Gpr (mul $I32 masked4 (RegMemImm.Imm 0x01010101)))
(final Gpr (shr $I32 mul (Imm8Reg.Imm8 24))))
final))
(rule 1 (lower (has_type (and
$I8X16
(avx512vl_enabled)
(avx512bitalg_enabled))
(popcnt src)))
(vpopcntb src))
;; For SSE 4.2 we use Mula's algorithm (https://arxiv.org/pdf/1611.07612.pdf):
;;
;; __m128i count_bytes ( __m128i v) {
;; __m128i lookup = _mm_setr_epi8(0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4);
;; __m128i low_mask = _mm_set1_epi8 (0x0f);
;; __m128i lo = _mm_and_si128 (v, low_mask);
;; __m128i hi = _mm_and_si128 (_mm_srli_epi16 (v, 4), low_mask);
;; __m128i cnt1 = _mm_shuffle_epi8 (lookup, lo);
;; __m128i cnt2 = _mm_shuffle_epi8 (lookup, hi);
;; return _mm_add_epi8 (cnt1, cnt2);
;; }
;;
;; Details of the above algorithm can be found in the reference noted above, but the basics
;; are to create a lookup table that pre populates the popcnt values for each number [0,15].
;; The algorithm uses shifts to isolate 4 bit sections of the vector, pshufb as part of the
;; lookup process, and adds together the results.
;;
;; __m128i lookup = _mm_setr_epi8(0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4);
(decl popcount_4bit_table () VCodeConstant) ;; bits-per-nibble table `lookup` above
(extern constructor popcount_4bit_table popcount_4bit_table)
(decl popcount_low_mask () VCodeConstant) ;; mask for low nibbles: 0x0f * 16
(extern constructor popcount_low_mask popcount_low_mask)
(rule (lower (has_type $I8X16
(popcnt src)))
(let ((nibble_table_const VCodeConstant (popcount_4bit_table))
(low_mask Xmm (xmm_load_const $I8X16 (popcount_low_mask)))
(low_nibbles Xmm (sse_and $I8X16 src low_mask))
;; Note that this is a 16x8 shift, but that's OK; we mask
;; off anything that traverses from one byte to the next
;; with the low_mask below.
(shifted_src Xmm (psrlw src (RegMemImm.Imm 4)))
(high_nibbles Xmm (sse_and $I8X16 shifted_src low_mask))
(lookup Xmm (xmm_load_const $I8X16 (popcount_4bit_table)))
(bit_counts_low Xmm (pshufb lookup low_nibbles))
(bit_counts_high Xmm (pshufb lookup high_nibbles)))
(paddb bit_counts_low bit_counts_high)))
;; Rules for `bitrev` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I8 (bitrev src)))
(do_bitrev8 $I32 src))
(rule (lower (has_type $I16 (bitrev src)))
(do_bitrev16 $I32 src))
(rule (lower (has_type $I32 (bitrev src)))
(do_bitrev32 $I32 src))
(rule (lower (has_type $I64 (bitrev src)))
(do_bitrev64 $I64 src))
(rule (lower (has_type $I128 (bitrev src)))
(value_regs
(do_bitrev64 $I64 (value_regs_get_gpr src 1))
(do_bitrev64 $I64 (value_regs_get_gpr src 0))))
(decl do_bitrev8 (Type Gpr) Gpr)
(rule (do_bitrev8 ty src)
(let ((tymask u64 (ty_mask ty))
(mask1 Gpr (imm ty (u64_and tymask 0x5555555555555555)))
(lo1 Gpr (x64_and ty src mask1))
(hi1 Gpr (x64_and ty (shr ty src (Imm8Reg.Imm8 1)) mask1))
(swap1 Gpr (or ty
(shl ty lo1 (Imm8Reg.Imm8 1))
hi1))
(mask2 Gpr (imm ty (u64_and tymask 0x3333333333333333)))
(lo2 Gpr (x64_and ty swap1 mask2))
(hi2 Gpr (x64_and ty (shr ty swap1 (Imm8Reg.Imm8 2)) mask2))
(swap2 Gpr (or ty
(shl ty lo2 (Imm8Reg.Imm8 2))
hi2))
(mask4 Gpr (imm ty (u64_and tymask 0x0f0f0f0f0f0f0f0f)))
(lo4 Gpr (x64_and ty swap2 mask4))
(hi4 Gpr (x64_and ty (shr ty swap2 (Imm8Reg.Imm8 4)) mask4))
(swap4 Gpr (or ty
(shl ty lo4 (Imm8Reg.Imm8 4))
hi4)))
swap4))
(decl do_bitrev16 (Type Gpr) Gpr)
(rule (do_bitrev16 ty src)
(let ((src_ Gpr (do_bitrev8 ty src))
(tymask u64 (ty_mask ty))
(mask8 Gpr (imm ty (u64_and tymask 0x00ff00ff00ff00ff)))
(lo8 Gpr (x64_and ty src_ mask8))
(hi8 Gpr (x64_and ty (shr ty src_ (Imm8Reg.Imm8 8)) mask8))
(swap8 Gpr (or ty
(shl ty lo8 (Imm8Reg.Imm8 8))
hi8)))
swap8))
(decl do_bitrev32 (Type Gpr) Gpr)
(rule (do_bitrev32 ty src)
(let ((src_ Gpr (do_bitrev16 ty src))
(tymask u64 (ty_mask ty))
(mask16 Gpr (imm ty (u64_and tymask 0x0000ffff0000ffff)))
(lo16 Gpr (x64_and ty src_ mask16))
(hi16 Gpr (x64_and ty (shr ty src_ (Imm8Reg.Imm8 16)) mask16))
(swap16 Gpr (or ty
(shl ty lo16 (Imm8Reg.Imm8 16))
hi16)))
swap16))
(decl do_bitrev64 (Type Gpr) Gpr)
(rule (do_bitrev64 ty @ $I64 src)
(let ((src_ Gpr (do_bitrev32 ty src))
(mask32 Gpr (imm ty 0xffffffff))
(lo32 Gpr (x64_and ty src_ mask32))
(hi32 Gpr (shr ty src_ (Imm8Reg.Imm8 32)))
(swap32 Gpr (or ty
(shl ty lo32 (Imm8Reg.Imm8 32))
hi32)))
swap32))
;; Rules for `is_null` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Null references are represented by the constant value `0`.
(rule (lower (is_null src @ (value_type $R64)))
(with_flags
(cmp_imm (OperandSize.Size64) 0 src)
(setcc (CC.Z))))
;; Rules for `is_invalid` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Null references are represented by the constant value `-1`.
(rule (lower (is_invalid src @ (value_type $R64)))
(with_flags
(cmp_imm (OperandSize.Size64) 0xffffffff src) ;; simm32 0xffff_ffff is sign-extended to -1.
(setcc (CC.Z))))

File diff suppressed because it is too large Load Diff

View File

@@ -171,6 +171,42 @@ where
} }
} }
#[inline]
fn avx512bitalg_enabled(&mut self, _: Type) -> Option<()> {
if self.isa_flags.use_avx512bitalg_simd() {
Some(())
} else {
None
}
}
#[inline]
fn use_lzcnt(&mut self, _: Type) -> Option<()> {
if self.isa_flags.use_lzcnt() {
Some(())
} else {
None
}
}
#[inline]
fn use_bmi1(&mut self, _: Type) -> Option<()> {
if self.isa_flags.use_bmi1() {
Some(())
} else {
None
}
}
#[inline]
fn use_popcnt(&mut self, _: Type) -> Option<()> {
if self.isa_flags.use_popcnt() {
Some(())
} else {
None
}
}
#[inline] #[inline]
fn imm8_from_value(&mut self, val: Value) -> Option<Imm8Reg> { fn imm8_from_value(&mut self, val: Value) -> Option<Imm8Reg> {
let inst = self.lower_ctx.dfg().value_def(val).inst()?; let inst = self.lower_ctx.dfg().value_def(val).inst()?;
@@ -326,6 +362,16 @@ where
SyntheticAmode::ConstantOffset(mask_table) SyntheticAmode::ConstantOffset(mask_table)
} }
fn popcount_4bit_table(&mut self) -> VCodeConstant {
self.lower_ctx
.use_constant(VCodeConstantData::WellKnown(&POPCOUNT_4BIT_TABLE))
}
fn popcount_low_mask(&mut self) -> VCodeConstant {
self.lower_ctx
.use_constant(VCodeConstantData::WellKnown(&POPCOUNT_LOW_MASK))
}
#[inline] #[inline]
fn writable_reg_to_xmm(&mut self, r: WritableReg) -> WritableXmm { fn writable_reg_to_xmm(&mut self, r: WritableReg) -> WritableXmm {
Writable::from_reg(Xmm::new(r.to_reg()).unwrap()) Writable::from_reg(Xmm::new(r.to_reg()).unwrap())
@@ -499,6 +545,18 @@ const I8X16_USHR_MASKS: [u8; 128] = [
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
]; ];
/// Number of bits set in a given nibble (4-bit value). Used in the
/// vector implementation of popcount.
#[rustfmt::skip] // Preserve 4x4 layout.
const POPCOUNT_4BIT_TABLE: [u8; 16] = [
0x00, 0x01, 0x01, 0x02,
0x01, 0x02, 0x02, 0x03,
0x01, 0x02, 0x02, 0x03,
0x02, 0x03, 0x03, 0x04,
];
const POPCOUNT_LOW_MASK: [u8; 16] = [0x0f; 16];
#[inline] #[inline]
fn to_simm32(constant: i64) -> Option<GprMemImm> { fn to_simm32(constant: i64) -> Option<GprMemImm> {
if constant == ((constant << 32) >> 32) { if constant == ((constant << 32) >> 32) {

View File

@@ -1,4 +1,4 @@
src/clif.isle 9ea75a6f790b5c03 src/clif.isle 9ea75a6f790b5c03
src/prelude.isle 9830498351ddf6a3 src/prelude.isle 6b0160bfcac86902
src/isa/x64/inst.isle 5ee89205e6e9a46b src/isa/x64/inst.isle 67eb719e568c2a81
src/isa/x64/lower.isle 348a808ea5de4cdb src/isa/x64/lower.isle 142626fe062fd7d7

File diff suppressed because it is too large Load Diff

View File

@@ -140,6 +140,26 @@ macro_rules! isle_prelude_methods {
x.into() x.into()
} }
#[inline]
fn i64_as_u64(&mut self, x: i64) -> u64 {
x as u64
}
#[inline]
fn u64_add(&mut self, x: u64, y: u64) -> u64 {
x.wrapping_add(y)
}
#[inline]
fn u64_sub(&mut self, x: u64, y: u64) -> u64 {
x.wrapping_sub(y)
}
#[inline]
fn u64_and(&mut self, x: u64, y: u64) -> u64 {
x & y
}
#[inline] #[inline]
fn ty_bits(&mut self, ty: Type) -> u8 { fn ty_bits(&mut self, ty: Type) -> u8 {
use std::convert::TryInto; use std::convert::TryInto;
@@ -151,11 +171,28 @@ macro_rules! isle_prelude_methods {
ty.bits() ty.bits()
} }
#[inline]
fn ty_bits_u64(&mut self, ty: Type) -> u64 {
ty.bits() as u64
}
#[inline] #[inline]
fn ty_bytes(&mut self, ty: Type) -> u16 { fn ty_bytes(&mut self, ty: Type) -> u16 {
u16::try_from(ty.bytes()).unwrap() u16::try_from(ty.bytes()).unwrap()
} }
#[inline]
fn ty_mask(&mut self, ty: Type) -> u64 {
match ty.bits() {
1 => 1,
8 => 0xff,
16 => 0xffff,
32 => 0xffff_ffff,
64 => 0xffff_ffff_ffff_ffff,
_ => unimplemented!(),
}
}
fn fits_in_16(&mut self, ty: Type) -> Option<Type> { fn fits_in_16(&mut self, ty: Type) -> Option<Type> {
if ty.bits() <= 16 { if ty.bits() <= 16 {
Some(ty) Some(ty)

View File

@@ -167,6 +167,20 @@
(decl u32_as_u64 (u32) u64) (decl u32_as_u64 (u32) u64)
(extern constructor u32_as_u64 u32_as_u64) (extern constructor u32_as_u64 u32_as_u64)
(decl i64_as_u64 (i64) u64)
(extern constructor i64_as_u64 i64_as_u64)
;;;; Primitive Arithmetic ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(decl u64_add (u64 u64) u64)
(extern constructor u64_add u64_add)
(decl u64_sub (u64 u64) u64)
(extern constructor u64_sub u64_sub)
(decl u64_and (u64 u64) u64)
(extern constructor u64_and u64_and)
;;;; `cranelift_codegen::ir::Type` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; `cranelift_codegen::ir::Type` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(extern const $B1 Type) (extern const $B1 Type)
@@ -209,6 +223,14 @@
(decl ty_bits_u16 (Type) u16) (decl ty_bits_u16 (Type) u16)
(extern constructor ty_bits_u16 ty_bits_u16) (extern constructor ty_bits_u16 ty_bits_u16)
;; Get the bit width of a given type.
(decl ty_bits_u64 (Type) u64)
(extern constructor ty_bits_u64 ty_bits_u64)
;; Get a mask for the width of a given type.
(decl ty_mask (Type) u64)
(extern constructor ty_mask ty_mask)
;; Get the byte width of a given type. ;; Get the byte width of a given type.
(decl ty_bytes (Type) u16) (decl ty_bytes (Type) u16)
(extern constructor ty_bytes ty_bytes) (extern constructor ty_bytes ty_bytes)
@@ -398,9 +420,27 @@
(ConsumesFlagsReturnsReg (inst MInst) (result Reg)) (ConsumesFlagsReturnsReg (inst MInst) (result Reg))
(ConsumesFlagsTwiceReturnsValueRegs (inst1 MInst) (ConsumesFlagsTwiceReturnsValueRegs (inst1 MInst)
(inst2 MInst) (inst2 MInst)
(result ValueRegs))
(ConsumesFlagsFourTimesReturnsValueRegs (inst1 MInst)
(inst2 MInst)
(inst3 MInst)
(inst4 MInst)
(result ValueRegs)))) (result ValueRegs))))
;; Get the produced register out of a ProducesFlags.
(decl produces_flags_get_reg (ProducesFlags) Reg)
(rule (produces_flags_get_reg (ProducesFlags.ProducesFlagsReturnsReg _ reg)) reg)
;; Modify a ProducesFlags to use it only for its side-effect, ignoring
;; its result.
(decl produces_flags_ignore (ProducesFlags) ProducesFlags)
(rule (produces_flags_ignore (ProducesFlags.ProducesFlagsReturnsReg inst _))
(ProducesFlags.ProducesFlagsSideEffect inst))
(rule (produces_flags_ignore (ProducesFlags.ProducesFlagsReturnsResultWithConsumer inst _))
(ProducesFlags.ProducesFlagsSideEffect inst))
;; Helper for combining two flags-consumer instructions that return a ;; Helper for combining two flags-consumer instructions that return a
;; single Reg, giving a ConsumesFlags that returns both values in a ;; single Reg, giving a ConsumesFlags that returns both values in a
;; ValueRegs. ;; ValueRegs.
@@ -440,12 +480,28 @@
(ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs consumer_inst_1 (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs consumer_inst_1
consumer_inst_2 consumer_inst_2
consumer_result)) consumer_result))
;; We must emit these instructions in order as the creator of
;; the ConsumesFlags may be relying on dataflow dependencies
;; amongst them.
(let ((_x Unit (emit producer_inst)) (let ((_x Unit (emit producer_inst))
;; Note that the order of emission here is swapped, as this seems (_y Unit (emit consumer_inst_1))
;; to generate better register allocation for now with fewer (_z Unit (emit consumer_inst_2)))
;; `mov` instructions. consumer_result))
(_y Unit (emit consumer_inst_2))
(_z Unit (emit consumer_inst_1))) (rule (with_flags (ProducesFlags.ProducesFlagsSideEffect producer_inst)
(ConsumesFlags.ConsumesFlagsFourTimesReturnsValueRegs consumer_inst_1
consumer_inst_2
consumer_inst_3
consumer_inst_4
consumer_result))
;; We must emit these instructions in order as the creator of
;; the ConsumesFlags may be relying on dataflow dependencies
;; amongst them.
(let ((_x Unit (emit producer_inst))
(_y Unit (emit consumer_inst_1))
(_z Unit (emit consumer_inst_2))
(_w Unit (emit consumer_inst_3))
(_v Unit (emit consumer_inst_4)))
consumer_result)) consumer_result))
(decl with_flags_reg (ProducesFlags ConsumesFlags) Reg) (decl with_flags_reg (ProducesFlags ConsumesFlags) Reg)

View File

@@ -1184,7 +1184,7 @@ block0(v0: i128, v1: i8):
; Entry block: 0 ; Entry block: 0
; Block 0: ; Block 0:
; (original IR block: block0) ; (original IR block: block0)
; (instruction range: 0 .. 10) ; (instruction range: 0 .. 13)
; Inst 0: lsl x4, x0, x2 ; Inst 0: lsl x4, x0, x2
; Inst 1: lsl x3, x1, x2 ; Inst 1: lsl x3, x1, x2
; Inst 2: orn w1, wzr, w2 ; Inst 2: orn w1, wzr, w2
@@ -1192,9 +1192,12 @@ block0(v0: i128, v1: i8):
; Inst 4: lsr x0, x0, x1 ; Inst 4: lsr x0, x0, x1
; Inst 5: orr x0, x3, x0 ; Inst 5: orr x0, x3, x0
; Inst 6: ands xzr, x2, #64 ; Inst 6: ands xzr, x2, #64
; Inst 7: csel x1, x4, x0, ne ; Inst 7: csel x1, xzr, x4, ne
; Inst 8: csel x0, xzr, x4, ne ; Inst 8: csel x0, x4, x0, ne
; Inst 9: ret ; Inst 9: mov x2, x0
; Inst 10: mov x0, x1
; Inst 11: mov x1, x2
; Inst 12: ret
; }} ; }}
function %ishl_i128_i128(i128, i128) -> i128 { function %ishl_i128_i128(i128, i128) -> i128 {
@@ -1207,7 +1210,7 @@ block0(v0: i128, v1: i128):
; Entry block: 0 ; Entry block: 0
; Block 0: ; Block 0:
; (original IR block: block0) ; (original IR block: block0)
; (instruction range: 0 .. 10) ; (instruction range: 0 .. 13)
; Inst 0: lsl x3, x0, x2 ; Inst 0: lsl x3, x0, x2
; Inst 1: lsl x1, x1, x2 ; Inst 1: lsl x1, x1, x2
; Inst 2: orn w4, wzr, w2 ; Inst 2: orn w4, wzr, w2
@@ -1215,9 +1218,12 @@ block0(v0: i128, v1: i128):
; Inst 4: lsr x0, x0, x4 ; Inst 4: lsr x0, x0, x4
; Inst 5: orr x0, x1, x0 ; Inst 5: orr x0, x1, x0
; Inst 6: ands xzr, x2, #64 ; Inst 6: ands xzr, x2, #64
; Inst 7: csel x1, x3, x0, ne ; Inst 7: csel x1, xzr, x3, ne
; Inst 8: csel x0, xzr, x3, ne ; Inst 8: csel x0, x3, x0, ne
; Inst 9: ret ; Inst 9: mov x2, x0
; Inst 10: mov x0, x1
; Inst 11: mov x1, x2
; Inst 12: ret
; }} ; }}
function %ushr_i128_i8(i128, i8) -> i128 { function %ushr_i128_i8(i128, i8) -> i128 {
@@ -1230,17 +1236,20 @@ block0(v0: i128, v1: i8):
; Entry block: 0 ; Entry block: 0
; Block 0: ; Block 0:
; (original IR block: block0) ; (original IR block: block0)
; (instruction range: 0 .. 10) ; (instruction range: 0 .. 13)
; Inst 0: lsr x3, x0, x2 ; Inst 0: lsr x3, x0, x2
; Inst 1: lsr x0, x1, x2 ; Inst 1: lsr x0, x1, x2
; Inst 2: orn w4, wzr, w2 ; Inst 2: orn w4, wzr, w2
; Inst 3: lsl x1, x1, #1 ; Inst 3: lsl x1, x1, #1
; Inst 4: lsl x1, x1, x4 ; Inst 4: lsl x1, x1, x4
; Inst 5: orr x3, x3, x1 ; Inst 5: orr x1, x3, x1
; Inst 6: ands xzr, x2, #64 ; Inst 6: ands xzr, x2, #64
; Inst 7: csel x1, xzr, x0, ne ; Inst 7: csel x1, x0, x1, ne
; Inst 8: csel x0, x0, x3, ne ; Inst 8: csel x0, xzr, x0, ne
; Inst 9: ret ; Inst 9: mov x2, x0
; Inst 10: mov x0, x1
; Inst 11: mov x1, x2
; Inst 12: ret
; }} ; }}
function %ushr_i128_i128(i128, i128) -> i128 { function %ushr_i128_i128(i128, i128) -> i128 {
@@ -1253,17 +1262,20 @@ block0(v0: i128, v1: i128):
; Entry block: 0 ; Entry block: 0
; Block 0: ; Block 0:
; (original IR block: block0) ; (original IR block: block0)
; (instruction range: 0 .. 10) ; (instruction range: 0 .. 13)
; Inst 0: lsr x3, x0, x2 ; Inst 0: lsr x3, x0, x2
; Inst 1: lsr x0, x1, x2 ; Inst 1: lsr x0, x1, x2
; Inst 2: orn w4, wzr, w2 ; Inst 2: orn w4, wzr, w2
; Inst 3: lsl x1, x1, #1 ; Inst 3: lsl x1, x1, #1
; Inst 4: lsl x1, x1, x4 ; Inst 4: lsl x1, x1, x4
; Inst 5: orr x3, x3, x1 ; Inst 5: orr x1, x3, x1
; Inst 6: ands xzr, x2, #64 ; Inst 6: ands xzr, x2, #64
; Inst 7: csel x1, xzr, x0, ne ; Inst 7: csel x1, x0, x1, ne
; Inst 8: csel x0, x0, x3, ne ; Inst 8: csel x0, xzr, x0, ne
; Inst 9: ret ; Inst 9: mov x2, x0
; Inst 10: mov x0, x1
; Inst 11: mov x1, x2
; Inst 12: ret
; }} ; }}
function %sshr_i128_i8(i128, i8) -> i128 { function %sshr_i128_i8(i128, i8) -> i128 {
@@ -1276,7 +1288,7 @@ block0(v0: i128, v1: i8):
; Entry block: 0 ; Entry block: 0
; Block 0: ; Block 0:
; (original IR block: block0) ; (original IR block: block0)
; (instruction range: 0 .. 11) ; (instruction range: 0 .. 13)
; Inst 0: lsr x3, x0, x2 ; Inst 0: lsr x3, x0, x2
; Inst 1: asr x0, x1, x2 ; Inst 1: asr x0, x1, x2
; Inst 2: orn w4, wzr, w2 ; Inst 2: orn w4, wzr, w2
@@ -1285,9 +1297,11 @@ block0(v0: i128, v1: i8):
; Inst 5: asr x1, x1, #63 ; Inst 5: asr x1, x1, #63
; Inst 6: orr x3, x3, x4 ; Inst 6: orr x3, x3, x4
; Inst 7: ands xzr, x2, #64 ; Inst 7: ands xzr, x2, #64
; Inst 8: csel x1, x1, x0, ne ; Inst 8: csel x2, x0, x3, ne
; Inst 9: csel x0, x0, x3, ne ; Inst 9: csel x0, x1, x0, ne
; Inst 10: ret ; Inst 10: mov x1, x0
; Inst 11: mov x0, x2
; Inst 12: ret
; }} ; }}
function %sshr_i128_i128(i128, i128) -> i128 { function %sshr_i128_i128(i128, i128) -> i128 {
@@ -1300,7 +1314,7 @@ block0(v0: i128, v1: i128):
; Entry block: 0 ; Entry block: 0
; Block 0: ; Block 0:
; (original IR block: block0) ; (original IR block: block0)
; (instruction range: 0 .. 11) ; (instruction range: 0 .. 13)
; Inst 0: lsr x3, x0, x2 ; Inst 0: lsr x3, x0, x2
; Inst 1: asr x0, x1, x2 ; Inst 1: asr x0, x1, x2
; Inst 2: orn w4, wzr, w2 ; Inst 2: orn w4, wzr, w2
@@ -1309,8 +1323,10 @@ block0(v0: i128, v1: i128):
; Inst 5: asr x1, x1, #63 ; Inst 5: asr x1, x1, #63
; Inst 6: orr x3, x3, x4 ; Inst 6: orr x3, x3, x4
; Inst 7: ands xzr, x2, #64 ; Inst 7: ands xzr, x2, #64
; Inst 8: csel x1, x1, x0, ne ; Inst 8: csel x2, x0, x3, ne
; Inst 9: csel x0, x0, x3, ne ; Inst 9: csel x0, x1, x0, ne
; Inst 10: ret ; Inst 10: mov x1, x0
; Inst 11: mov x0, x2
; Inst 12: ret
; }} ; }}

View File

@@ -16,19 +16,19 @@ block0(v0: i128, v1: i128):
; Entry block: 0 ; Entry block: 0
; Block 0: ; Block 0:
; (original IR block: block0) ; (original IR block: block0)
; (instruction range: 0 .. 24) ; (instruction range: 0 .. 25)
; Inst 0: mov x4, x1 ; Inst 0: mov x4, x1
; Inst 1: orr x1, xzr, #128 ; Inst 1: orr x1, xzr, #128
; Inst 2: sub x1, x1, x2 ; Inst 2: sub x1, x1, x2
; Inst 3: lsr x3, x0, x2 ; Inst 3: lsr x5, x0, x2
; Inst 4: lsr x5, x4, x2 ; Inst 4: lsr x3, x4, x2
; Inst 5: orn w6, wzr, w2 ; Inst 5: orn w6, wzr, w2
; Inst 6: lsl x7, x4, #1 ; Inst 6: lsl x7, x4, #1
; Inst 7: lsl x6, x7, x6 ; Inst 7: lsl x6, x7, x6
; Inst 8: orr x6, x3, x6 ; Inst 8: orr x5, x5, x6
; Inst 9: ands xzr, x2, #64 ; Inst 9: ands xzr, x2, #64
; Inst 10: csel x3, xzr, x5, ne ; Inst 10: csel x2, x3, x5, ne
; Inst 11: csel x2, x5, x6, ne ; Inst 11: csel x3, xzr, x3, ne
; Inst 12: lsl x5, x0, x1 ; Inst 12: lsl x5, x0, x1
; Inst 13: lsl x4, x4, x1 ; Inst 13: lsl x4, x4, x1
; Inst 14: orn w6, wzr, w1 ; Inst 14: orn w6, wzr, w1
@@ -36,11 +36,12 @@ block0(v0: i128, v1: i128):
; Inst 16: lsr x0, x0, x6 ; Inst 16: lsr x0, x0, x6
; Inst 17: orr x0, x4, x0 ; Inst 17: orr x0, x4, x0
; Inst 18: ands xzr, x1, #64 ; Inst 18: ands xzr, x1, #64
; Inst 19: csel x1, x5, x0, ne ; Inst 19: csel x1, xzr, x5, ne
; Inst 20: csel x0, xzr, x5, ne ; Inst 20: csel x0, x5, x0, ne
; Inst 21: orr x1, x3, x1 ; Inst 21: orr x3, x3, x0
; Inst 22: orr x0, x2, x0 ; Inst 22: orr x0, x2, x1
; Inst 23: ret ; Inst 23: mov x1, x3
; Inst 24: ret
; }} ; }}
function %f0(i64, i64) -> i64 { function %f0(i64, i64) -> i64 {
@@ -125,7 +126,7 @@ block0(v0: i128, v1: i128):
; Entry block: 0 ; Entry block: 0
; Block 0: ; Block 0:
; (original IR block: block0) ; (original IR block: block0)
; (instruction range: 0 .. 27) ; (instruction range: 0 .. 24)
; Inst 0: mov x4, x0 ; Inst 0: mov x4, x0
; Inst 1: orr x0, xzr, #128 ; Inst 1: orr x0, xzr, #128
; Inst 2: sub x0, x0, x2 ; Inst 2: sub x0, x0, x2
@@ -136,8 +137,8 @@ block0(v0: i128, v1: i128):
; Inst 7: lsr x6, x7, x6 ; Inst 7: lsr x6, x7, x6
; Inst 8: orr x5, x5, x6 ; Inst 8: orr x5, x5, x6
; Inst 9: ands xzr, x2, #64 ; Inst 9: ands xzr, x2, #64
; Inst 10: csel x2, x3, x5, ne ; Inst 10: csel x2, xzr, x3, ne
; Inst 11: csel x3, xzr, x3, ne ; Inst 11: csel x3, x3, x5, ne
; Inst 12: lsr x5, x4, x0 ; Inst 12: lsr x5, x4, x0
; Inst 13: lsr x4, x1, x0 ; Inst 13: lsr x4, x1, x0
; Inst 14: orn w6, wzr, w0 ; Inst 14: orn w6, wzr, w0
@@ -145,14 +146,11 @@ block0(v0: i128, v1: i128):
; Inst 16: lsl x1, x1, x6 ; Inst 16: lsl x1, x1, x6
; Inst 17: orr x1, x5, x1 ; Inst 17: orr x1, x5, x1
; Inst 18: ands xzr, x0, #64 ; Inst 18: ands xzr, x0, #64
; Inst 19: csel x0, xzr, x4, ne ; Inst 19: csel x0, x4, x1, ne
; Inst 20: csel x1, x4, x1, ne ; Inst 20: csel x1, xzr, x4, ne
; Inst 21: orr x1, x3, x1 ; Inst 21: orr x0, x2, x0
; Inst 22: orr x0, x2, x0 ; Inst 22: orr x1, x3, x1
; Inst 23: mov x2, x0 ; Inst 23: ret
; Inst 24: mov x0, x1
; Inst 25: mov x1, x2
; Inst 26: ret
; }} ; }}
function %f4(i64, i64) -> i64 { function %f4(i64, i64) -> i64 {

View File

@@ -43,7 +43,7 @@ block0(v0: f64, v1: i64):
; Entry block: 0 ; Entry block: 0
; Block 0: ; Block 0:
; (original IR block: block0) ; (original IR block: block0)
; (instruction range: 0 .. 16) ; (instruction range: 0 .. 17)
; Inst 0: pushq %rbp ; Inst 0: pushq %rbp
; Inst 1: movq %rsp, %rbp ; Inst 1: movq %rsp, %rbp
; Inst 2: movsd 0(%rdi), %xmm1 ; Inst 2: movsd 0(%rdi), %xmm1
@@ -54,10 +54,12 @@ block0(v0: f64, v1: i64):
; Inst 7: andq $1, %rsi ; Inst 7: andq $1, %rsi
; Inst 8: ucomisd %xmm0, %xmm1 ; Inst 8: ucomisd %xmm0, %xmm1
; Inst 9: movaps %xmm0, %xmm1 ; Inst 9: movaps %xmm0, %xmm1
; Inst 10: jz $check; movsd %xmm0, %xmm1; $check: jnp $next; movsd %xmm0, %xmm1; $next ; Inst 10: jz $next; movsd %xmm0, %xmm1; $next:
; Inst 11: movq %rsi, %rax ; Inst 11: jnp $next; movsd %xmm0, %xmm1; $next:
; Inst 12: movaps %xmm1, %xmm0 ; Inst 12: movq %rsi, %rax
; Inst 13: movq %rbp, %rsp ; Inst 13: movaps %xmm1, %xmm0
; Inst 14: popq %rbp ; Inst 14: movq %rbp, %rsp
; Inst 15: ret ; Inst 15: popq %rbp
; Inst 16: ret
; }} ; }}

View File

@@ -600,57 +600,55 @@ block0(v0: i128):
; Entry block: 0 ; Entry block: 0
; Block 0: ; Block 0:
; (original IR block: block0) ; (original IR block: block0)
; (instruction range: 0 .. 50) ; (instruction range: 0 .. 48)
; Inst 0: pushq %rbp ; Inst 0: pushq %rbp
; Inst 1: movq %rsp, %rbp ; Inst 1: movq %rsp, %rbp
; Inst 2: movq %rsi, %rdx ; Inst 2: movq %rdi, %rax
; Inst 3: movq %rdi, %rsi ; Inst 3: movq %rax, %rcx
; Inst 4: shrq $1, %rsi ; Inst 4: shrq $1, %rcx
; Inst 5: movabsq $8608480567731124087, %rcx ; Inst 5: movabsq $8608480567731124087, %rdi
; Inst 6: andq %rcx, %rsi ; Inst 6: andq %rdi, %rcx
; Inst 7: movq %rdi, %rax ; Inst 7: subq %rcx, %rax
; Inst 8: subq %rsi, %rax ; Inst 8: shrq $1, %rcx
; Inst 9: shrq $1, %rsi ; Inst 9: andq %rdi, %rcx
; Inst 10: andq %rcx, %rsi ; Inst 10: subq %rcx, %rax
; Inst 11: subq %rsi, %rax ; Inst 11: shrq $1, %rcx
; Inst 12: shrq $1, %rsi ; Inst 12: andq %rdi, %rcx
; Inst 13: andq %rcx, %rsi ; Inst 13: subq %rcx, %rax
; Inst 14: subq %rsi, %rax ; Inst 14: movq %rax, %rdi
; Inst 15: movq %rax, %rsi ; Inst 15: shrq $4, %rdi
; Inst 16: shrq $4, %rsi ; Inst 16: addq %rax, %rdi
; Inst 17: addq %rax, %rsi ; Inst 17: movabsq $1085102592571150095, %rax
; Inst 18: movabsq $1085102592571150095, %rdi ; Inst 18: andq %rax, %rdi
; Inst 19: andq %rdi, %rsi ; Inst 19: movabsq $72340172838076673, %rax
; Inst 20: movabsq $72340172838076673, %rdi ; Inst 20: imulq %rax, %rdi
; Inst 21: imulq %rdi, %rsi ; Inst 21: shrq $56, %rdi
; Inst 22: shrq $56, %rsi ; Inst 22: movq %rsi, %rcx
; Inst 23: movq %rdx, %rax ; Inst 23: shrq $1, %rcx
; Inst 24: shrq $1, %rax ; Inst 24: movabsq $8608480567731124087, %rax
; Inst 25: movabsq $8608480567731124087, %rcx ; Inst 25: andq %rax, %rcx
; Inst 26: andq %rcx, %rax ; Inst 26: subq %rcx, %rsi
; Inst 27: movq %rdx, %rdi ; Inst 27: shrq $1, %rcx
; Inst 28: subq %rax, %rdi ; Inst 28: andq %rax, %rcx
; Inst 29: shrq $1, %rax ; Inst 29: subq %rcx, %rsi
; Inst 30: andq %rcx, %rax ; Inst 30: shrq $1, %rcx
; Inst 31: subq %rax, %rdi ; Inst 31: andq %rax, %rcx
; Inst 32: shrq $1, %rax ; Inst 32: subq %rcx, %rsi
; Inst 33: andq %rcx, %rax ; Inst 33: movq %rsi, %rax
; Inst 34: subq %rax, %rdi ; Inst 34: shrq $4, %rax
; Inst 35: movq %rdi, %rax ; Inst 35: addq %rsi, %rax
; Inst 36: shrq $4, %rax ; Inst 36: movabsq $1085102592571150095, %rsi
; Inst 37: addq %rdi, %rax ; Inst 37: andq %rsi, %rax
; Inst 38: movabsq $1085102592571150095, %rdi ; Inst 38: movabsq $72340172838076673, %rsi
; Inst 39: andq %rdi, %rax ; Inst 39: imulq %rsi, %rax
; Inst 40: movabsq $72340172838076673, %rdi ; Inst 40: shrq $56, %rax
; Inst 41: imulq %rdi, %rax ; Inst 41: addq %rax, %rdi
; Inst 42: shrq $56, %rax ; Inst 42: xorq %rsi, %rsi
; Inst 43: addq %rax, %rsi ; Inst 43: movq %rdi, %rax
; Inst 44: xorq %rdi, %rdi ; Inst 44: movq %rsi, %rdx
; Inst 45: movq %rsi, %rax ; Inst 45: movq %rbp, %rsp
; Inst 46: movq %rdi, %rdx ; Inst 46: popq %rbp
; Inst 47: movq %rbp, %rsp ; Inst 47: ret
; Inst 48: popq %rbp
; Inst 49: ret
; }} ; }}
function %f20(i128) -> i128 { function %f20(i128) -> i128 {
@@ -663,108 +661,97 @@ block0(v0: i128):
; Entry block: 0 ; Entry block: 0
; Block 0: ; Block 0:
; (original IR block: block0) ; (original IR block: block0)
; (instruction range: 0 .. 101) ; (instruction range: 0 .. 90)
; Inst 0: pushq %rbp ; Inst 0: pushq %rbp
; Inst 1: movq %rsp, %rbp ; Inst 1: movq %rsp, %rbp
; Inst 2: movq %rdi, %rcx ; Inst 2: movq %rsi, %rcx
; Inst 3: movq %rcx, %rdi ; Inst 3: movabsq $6148914691236517205, %rsi
; Inst 4: movabsq $6148914691236517205, %rax ; Inst 4: movq %rcx, %rax
; Inst 5: shrq $1, %rdi ; Inst 5: andq %rsi, %rax
; Inst 6: andq %rax, %rdi ; Inst 6: shrq $1, %rcx
; Inst 7: andq %rcx, %rax ; Inst 7: andq %rsi, %rcx
; Inst 8: shlq $1, %rax ; Inst 8: shlq $1, %rax
; Inst 9: movq %rax, %rcx ; Inst 9: orq %rcx, %rax
; Inst 10: orq %rdi, %rcx ; Inst 10: movabsq $3689348814741910323, %rsi
; Inst 11: movq %rcx, %rdi ; Inst 11: movq %rax, %rcx
; Inst 12: movabsq $3689348814741910323, %rax ; Inst 12: andq %rsi, %rcx
; Inst 13: shrq $2, %rdi ; Inst 13: shrq $2, %rax
; Inst 14: andq %rax, %rdi ; Inst 14: andq %rsi, %rax
; Inst 15: andq %rcx, %rax ; Inst 15: shlq $2, %rcx
; Inst 16: shlq $2, %rax ; Inst 16: orq %rax, %rcx
; Inst 17: movq %rax, %rcx ; Inst 17: movabsq $1085102592571150095, %rsi
; Inst 18: orq %rdi, %rcx ; Inst 18: movq %rcx, %rax
; Inst 19: movq %rcx, %rdi ; Inst 19: andq %rsi, %rax
; Inst 20: movabsq $1085102592571150095, %rax ; Inst 20: shrq $4, %rcx
; Inst 21: shrq $4, %rdi ; Inst 21: andq %rsi, %rcx
; Inst 22: andq %rax, %rdi ; Inst 22: shlq $4, %rax
; Inst 23: andq %rcx, %rax ; Inst 23: orq %rcx, %rax
; Inst 24: shlq $4, %rax ; Inst 24: movabsq $71777214294589695, %rsi
; Inst 25: movq %rax, %rcx ; Inst 25: movq %rax, %rcx
; Inst 26: orq %rdi, %rcx ; Inst 26: andq %rsi, %rcx
; Inst 27: movq %rcx, %rdi ; Inst 27: shrq $8, %rax
; Inst 28: movabsq $71777214294589695, %rax ; Inst 28: andq %rsi, %rax
; Inst 29: shrq $8, %rdi ; Inst 29: shlq $8, %rcx
; Inst 30: andq %rax, %rdi ; Inst 30: orq %rax, %rcx
; Inst 31: andq %rcx, %rax ; Inst 31: movabsq $281470681808895, %rsi
; Inst 32: shlq $8, %rax ; Inst 32: movq %rcx, %rax
; Inst 33: movq %rax, %rcx ; Inst 33: andq %rsi, %rax
; Inst 34: orq %rdi, %rcx ; Inst 34: shrq $16, %rcx
; Inst 35: movq %rcx, %rdi ; Inst 35: andq %rsi, %rcx
; Inst 36: movabsq $281470681808895, %rax ; Inst 36: shlq $16, %rax
; Inst 37: shrq $16, %rdi ; Inst 37: orq %rcx, %rax
; Inst 38: andq %rax, %rdi ; Inst 38: movabsq $4294967295, %rcx
; Inst 39: andq %rcx, %rax ; Inst 39: movq %rax, %rsi
; Inst 40: shlq $16, %rax ; Inst 40: andq %rcx, %rsi
; Inst 41: orq %rdi, %rax ; Inst 41: shrq $32, %rax
; Inst 42: movq %rax, %rcx ; Inst 42: shlq $32, %rsi
; Inst 43: movl $-1, %edi ; Inst 43: orq %rax, %rsi
; Inst 44: shrq $32, %rcx ; Inst 44: movabsq $6148914691236517205, %rax
; Inst 45: andq %rdi, %rcx ; Inst 45: movq %rdi, %rcx
; Inst 46: andq %rax, %rdi ; Inst 46: andq %rax, %rcx
; Inst 47: shlq $32, %rdi ; Inst 47: shrq $1, %rdi
; Inst 48: orq %rcx, %rdi ; Inst 48: andq %rax, %rdi
; Inst 49: movq %rsi, %rcx ; Inst 49: shlq $1, %rcx
; Inst 50: movq %rcx, %rsi ; Inst 50: orq %rdi, %rcx
; Inst 51: movabsq $6148914691236517205, %rax ; Inst 51: movabsq $3689348814741910323, %rdi
; Inst 52: shrq $1, %rsi ; Inst 52: movq %rcx, %rax
; Inst 53: andq %rax, %rsi ; Inst 53: andq %rdi, %rax
; Inst 54: andq %rcx, %rax ; Inst 54: shrq $2, %rcx
; Inst 55: shlq $1, %rax ; Inst 55: andq %rdi, %rcx
; Inst 56: movq %rax, %rcx ; Inst 56: shlq $2, %rax
; Inst 57: orq %rsi, %rcx ; Inst 57: orq %rcx, %rax
; Inst 58: movq %rcx, %rsi ; Inst 58: movabsq $1085102592571150095, %rdi
; Inst 59: movabsq $3689348814741910323, %rax ; Inst 59: movq %rax, %rcx
; Inst 60: shrq $2, %rsi ; Inst 60: andq %rdi, %rcx
; Inst 61: andq %rax, %rsi ; Inst 61: shrq $4, %rax
; Inst 62: andq %rcx, %rax ; Inst 62: andq %rdi, %rax
; Inst 63: shlq $2, %rax ; Inst 63: shlq $4, %rcx
; Inst 64: movq %rax, %rcx ; Inst 64: orq %rax, %rcx
; Inst 65: orq %rsi, %rcx ; Inst 65: movabsq $71777214294589695, %rdi
; Inst 66: movq %rcx, %rsi ; Inst 66: movq %rcx, %rax
; Inst 67: movabsq $1085102592571150095, %rax ; Inst 67: andq %rdi, %rax
; Inst 68: shrq $4, %rsi ; Inst 68: shrq $8, %rcx
; Inst 69: andq %rax, %rsi ; Inst 69: andq %rdi, %rcx
; Inst 70: andq %rcx, %rax ; Inst 70: shlq $8, %rax
; Inst 71: shlq $4, %rax ; Inst 71: orq %rcx, %rax
; Inst 72: movq %rax, %rcx ; Inst 72: movabsq $281470681808895, %rdi
; Inst 73: orq %rsi, %rcx ; Inst 73: movq %rax, %rcx
; Inst 74: movq %rcx, %rsi ; Inst 74: andq %rdi, %rcx
; Inst 75: movabsq $71777214294589695, %rax ; Inst 75: shrq $16, %rax
; Inst 76: shrq $8, %rsi ; Inst 76: andq %rdi, %rax
; Inst 77: andq %rax, %rsi ; Inst 77: shlq $16, %rcx
; Inst 78: andq %rcx, %rax ; Inst 78: orq %rax, %rcx
; Inst 79: shlq $8, %rax ; Inst 79: movabsq $4294967295, %rax
; Inst 80: movq %rax, %rcx ; Inst 80: movq %rcx, %rdi
; Inst 81: orq %rsi, %rcx ; Inst 81: andq %rax, %rdi
; Inst 82: movq %rcx, %rsi ; Inst 82: shrq $32, %rcx
; Inst 83: movabsq $281470681808895, %rax ; Inst 83: shlq $32, %rdi
; Inst 84: shrq $16, %rsi ; Inst 84: orq %rcx, %rdi
; Inst 85: andq %rax, %rsi ; Inst 85: movq %rsi, %rax
; Inst 86: andq %rcx, %rax ; Inst 86: movq %rdi, %rdx
; Inst 87: shlq $16, %rax ; Inst 87: movq %rbp, %rsp
; Inst 88: orq %rsi, %rax ; Inst 88: popq %rbp
; Inst 89: movq %rax, %rsi ; Inst 89: ret
; Inst 90: movl $-1, %ecx
; Inst 91: shrq $32, %rsi
; Inst 92: andq %rcx, %rsi
; Inst 93: andq %rax, %rcx
; Inst 94: shlq $32, %rcx
; Inst 95: orq %rsi, %rcx
; Inst 96: movq %rcx, %rax
; Inst 97: movq %rdi, %rdx
; Inst 98: movq %rbp, %rsp
; Inst 99: popq %rbp
; Inst 100: ret
; }} ; }}
function %f21(i128, i64) { function %f21(i128, i64) {
@@ -1020,11 +1007,11 @@ block0(v0: i128):
; Inst 4: cmovzq %rcx, %rax ; Inst 4: cmovzq %rcx, %rax
; Inst 5: movl $63, %esi ; Inst 5: movl $63, %esi
; Inst 6: subq %rax, %rsi ; Inst 6: subq %rax, %rsi
; Inst 7: movabsq $-1, %rcx ; Inst 7: movabsq $-1, %rax
; Inst 8: bsrq %rdi, %rax ; Inst 8: bsrq %rdi, %rcx
; Inst 9: cmovzq %rcx, %rax ; Inst 9: cmovzq %rax, %rcx
; Inst 10: movl $63, %edi ; Inst 10: movl $63, %edi
; Inst 11: subq %rax, %rdi ; Inst 11: subq %rcx, %rdi
; Inst 12: addq $64, %rdi ; Inst 12: addq $64, %rdi
; Inst 13: cmpq $64, %rsi ; Inst 13: cmpq $64, %rsi
; Inst 14: cmovnzq %rsi, %rdi ; Inst 14: cmovnzq %rsi, %rdi
@@ -1098,7 +1085,7 @@ block0(v0: i128, v1: i128):
; Entry block: 0 ; Entry block: 0
; Block 0: ; Block 0:
; (original IR block: block0) ; (original IR block: block0)
; (instruction range: 0 .. 25) ; (instruction range: 0 .. 24)
; Inst 0: pushq %rbp ; Inst 0: pushq %rbp
; Inst 1: movq %rsp, %rbp ; Inst 1: movq %rsp, %rbp
; Inst 2: movq %rdi, %rax ; Inst 2: movq %rdi, %rax
@@ -1116,14 +1103,13 @@ block0(v0: i128, v1: i128):
; Inst 14: cmovzq %rcx, %rax ; Inst 14: cmovzq %rcx, %rax
; Inst 15: orq %rdi, %rax ; Inst 15: orq %rdi, %rax
; Inst 16: testq $64, %rdx ; Inst 16: testq $64, %rdx
; Inst 17: movq %rsi, %rdi ; Inst 17: cmovzq %rsi, %rcx
; Inst 18: cmovzq %rax, %rdi ; Inst 18: cmovzq %rax, %rsi
; Inst 19: cmovzq %rsi, %rcx ; Inst 19: movq %rcx, %rax
; Inst 20: movq %rcx, %rax ; Inst 20: movq %rsi, %rdx
; Inst 21: movq %rdi, %rdx ; Inst 21: movq %rbp, %rsp
; Inst 22: movq %rbp, %rsp ; Inst 22: popq %rbp
; Inst 23: popq %rbp ; Inst 23: ret
; Inst 24: ret
; }} ; }}
function %f31(i128, i128) -> i128 { function %f31(i128, i128) -> i128 {
@@ -1136,7 +1122,7 @@ block0(v0: i128, v1: i128):
; Entry block: 0 ; Entry block: 0
; Block 0: ; Block 0:
; (original IR block: block0) ; (original IR block: block0)
; (instruction range: 0 .. 24) ; (instruction range: 0 .. 25)
; Inst 0: pushq %rbp ; Inst 0: pushq %rbp
; Inst 1: movq %rsp, %rbp ; Inst 1: movq %rsp, %rbp
; Inst 2: movq %rsi, %rax ; Inst 2: movq %rsi, %rax
@@ -1152,15 +1138,16 @@ block0(v0: i128, v1: i128):
; Inst 12: testq $127, %rdx ; Inst 12: testq $127, %rdx
; Inst 13: cmovzq %rcx, %rax ; Inst 13: cmovzq %rcx, %rax
; Inst 14: orq %rdi, %rax ; Inst 14: orq %rdi, %rax
; Inst 15: xorq %rdi, %rdi ; Inst 15: xorq %rcx, %rcx
; Inst 16: testq $64, %rdx ; Inst 16: testq $64, %rdx
; Inst 17: cmovzq %rsi, %rdi ; Inst 17: movq %rsi, %rdi
; Inst 18: cmovzq %rax, %rsi ; Inst 18: cmovzq %rax, %rdi
; Inst 19: movq %rsi, %rax ; Inst 19: cmovzq %rsi, %rcx
; Inst 20: movq %rdi, %rdx ; Inst 20: movq %rdi, %rax
; Inst 21: movq %rbp, %rsp ; Inst 21: movq %rcx, %rdx
; Inst 22: popq %rbp ; Inst 22: movq %rbp, %rsp
; Inst 23: ret ; Inst 23: popq %rbp
; Inst 24: ret
; }} ; }}
function %f32(i128, i128) -> i128 { function %f32(i128, i128) -> i128 {
@@ -1173,7 +1160,7 @@ block0(v0: i128, v1: i128):
; Entry block: 0 ; Entry block: 0
; Block 0: ; Block 0:
; (original IR block: block0) ; (original IR block: block0)
; (instruction range: 0 .. 25) ; (instruction range: 0 .. 26)
; Inst 0: pushq %rbp ; Inst 0: pushq %rbp
; Inst 1: movq %rsp, %rbp ; Inst 1: movq %rsp, %rbp
; Inst 2: movq %rdi, %rax ; Inst 2: movq %rdi, %rax
@@ -1192,13 +1179,14 @@ block0(v0: i128, v1: i128):
; Inst 15: orq %r8, %rax ; Inst 15: orq %r8, %rax
; Inst 16: sarq $63, %rsi ; Inst 16: sarq $63, %rsi
; Inst 17: testq $64, %rdx ; Inst 17: testq $64, %rdx
; Inst 18: cmovzq %rdi, %rsi ; Inst 18: movq %rdi, %rcx
; Inst 19: cmovzq %rax, %rdi ; Inst 19: cmovzq %rax, %rcx
; Inst 20: movq %rdi, %rax ; Inst 20: cmovzq %rdi, %rsi
; Inst 21: movq %rsi, %rdx ; Inst 21: movq %rcx, %rax
; Inst 22: movq %rbp, %rsp ; Inst 22: movq %rsi, %rdx
; Inst 23: popq %rbp ; Inst 23: movq %rbp, %rsp
; Inst 24: ret ; Inst 24: popq %rbp
; Inst 25: ret
; }} ; }}
function %f33(i128, i128) -> i128 { function %f33(i128, i128) -> i128 {
@@ -1211,27 +1199,27 @@ block0(v0: i128, v1: i128):
; Entry block: 0 ; Entry block: 0
; Block 0: ; Block 0:
; (original IR block: block0) ; (original IR block: block0)
; (instruction range: 0 .. 46) ; (instruction range: 0 .. 48)
; Inst 0: pushq %rbp ; Inst 0: pushq %rbp
; Inst 1: movq %rsp, %rbp ; Inst 1: movq %rsp, %rbp
; Inst 2: movq %rdi, %r9 ; Inst 2: movq %rdi, %rax
; Inst 3: movq %rdx, %rcx ; Inst 3: movq %rdx, %rcx
; Inst 4: shlq %cl, %r9 ; Inst 4: shlq %cl, %rax
; Inst 5: movq %rsi, %rax ; Inst 5: movq %rsi, %r8
; Inst 6: movq %rdx, %rcx ; Inst 6: movq %rdx, %rcx
; Inst 7: shlq %cl, %rax ; Inst 7: shlq %cl, %r8
; Inst 8: movl $64, %ecx ; Inst 8: movl $64, %ecx
; Inst 9: subq %rdx, %rcx ; Inst 9: subq %rdx, %rcx
; Inst 10: movq %rdi, %r10 ; Inst 10: movq %rdi, %r9
; Inst 11: shrq %cl, %r10 ; Inst 11: shrq %cl, %r9
; Inst 12: xorq %r8, %r8 ; Inst 12: xorq %rcx, %rcx
; Inst 13: testq $127, %rdx ; Inst 13: testq $127, %rdx
; Inst 14: cmovzq %r8, %r10 ; Inst 14: cmovzq %rcx, %r9
; Inst 15: orq %rax, %r10 ; Inst 15: orq %r8, %r9
; Inst 16: testq $64, %rdx ; Inst 16: testq $64, %rdx
; Inst 17: movq %r9, %rax ; Inst 17: movq %rcx, %r8
; Inst 18: cmovzq %r10, %rax ; Inst 18: cmovzq %rax, %r8
; Inst 19: cmovzq %r9, %r8 ; Inst 19: cmovzq %r9, %rax
; Inst 20: movl $128, %r9d ; Inst 20: movl $128, %r9d
; Inst 21: subq %rdx, %r9 ; Inst 21: subq %rdx, %r9
; Inst 22: movq %rdi, %rdx ; Inst 22: movq %rdi, %rdx
@@ -1247,17 +1235,19 @@ block0(v0: i128, v1: i128):
; Inst 32: testq $127, %r9 ; Inst 32: testq $127, %r9
; Inst 33: cmovzq %rcx, %rsi ; Inst 33: cmovzq %rcx, %rsi
; Inst 34: orq %rdx, %rsi ; Inst 34: orq %rdx, %rsi
; Inst 35: xorq %rcx, %rcx ; Inst 35: xorq %rdx, %rdx
; Inst 36: testq $64, %r9 ; Inst 36: testq $64, %r9
; Inst 37: cmovzq %rdi, %rcx ; Inst 37: movq %rdi, %rcx
; Inst 38: cmovzq %rsi, %rdi ; Inst 38: cmovzq %rsi, %rcx
; Inst 39: orq %rdi, %r8 ; Inst 39: movq %rdx, %rsi
; Inst 40: orq %rcx, %rax ; Inst 40: cmovzq %rdi, %rsi
; Inst 41: movq %rax, %rdx ; Inst 41: orq %rcx, %r8
; Inst 42: movq %r8, %rax ; Inst 42: orq %rsi, %rax
; Inst 43: movq %rbp, %rsp ; Inst 43: movq %rax, %rdx
; Inst 44: popq %rbp ; Inst 44: movq %r8, %rax
; Inst 45: ret ; Inst 45: movq %rbp, %rsp
; Inst 46: popq %rbp
; Inst 47: ret
; }} ; }}
function %f34(i128, i128) -> i128 { function %f34(i128, i128) -> i128 {
@@ -1270,52 +1260,51 @@ block0(v0: i128, v1: i128):
; Entry block: 0 ; Entry block: 0
; Block 0: ; Block 0:
; (original IR block: block0) ; (original IR block: block0)
; (instruction range: 0 .. 46) ; (instruction range: 0 .. 45)
; Inst 0: pushq %rbp ; Inst 0: pushq %rbp
; Inst 1: movq %rsp, %rbp ; Inst 1: movq %rsp, %rbp
; Inst 2: movq %rdi, %rax ; Inst 2: movq %rdi, %rax
; Inst 3: movq %rdx, %rcx ; Inst 3: movq %rdx, %rcx
; Inst 4: shrq %cl, %rax ; Inst 4: shrq %cl, %rax
; Inst 5: movq %rsi, %r8 ; Inst 5: movq %rsi, %r9
; Inst 6: movq %rdx, %rcx ; Inst 6: movq %rdx, %rcx
; Inst 7: shrq %cl, %r8 ; Inst 7: shrq %cl, %r9
; Inst 8: movl $64, %ecx ; Inst 8: movl $64, %ecx
; Inst 9: subq %rdx, %rcx ; Inst 9: subq %rdx, %rcx
; Inst 10: movq %rsi, %r9 ; Inst 10: movq %rsi, %r8
; Inst 11: shlq %cl, %r9 ; Inst 11: shlq %cl, %r8
; Inst 12: xorq %rcx, %rcx ; Inst 12: xorq %rcx, %rcx
; Inst 13: testq $127, %rdx ; Inst 13: testq $127, %rdx
; Inst 14: cmovzq %rcx, %r9 ; Inst 14: cmovzq %rcx, %r8
; Inst 15: movq %r9, %rcx ; Inst 15: orq %rax, %r8
; Inst 16: orq %rax, %rcx ; Inst 16: xorq %rcx, %rcx
; Inst 17: xorq %rax, %rax ; Inst 17: testq $64, %rdx
; Inst 18: testq $64, %rdx ; Inst 18: movq %r9, %rax
; Inst 19: cmovzq %r8, %rax ; Inst 19: cmovzq %r8, %rax
; Inst 20: cmovzq %rcx, %r8 ; Inst 20: movq %rcx, %r8
; Inst 21: movl $128, %r9d ; Inst 21: cmovzq %r9, %r8
; Inst 22: subq %rdx, %r9 ; Inst 22: movl $128, %r9d
; Inst 23: movq %rdi, %rdx ; Inst 23: subq %rdx, %r9
; Inst 24: movq %r9, %rcx ; Inst 24: movq %rdi, %rdx
; Inst 25: shlq %cl, %rdx ; Inst 25: movq %r9, %rcx
; Inst 26: movq %r9, %rcx ; Inst 26: shlq %cl, %rdx
; Inst 27: shlq %cl, %rsi ; Inst 27: movq %r9, %rcx
; Inst 28: movl $64, %ecx ; Inst 28: shlq %cl, %rsi
; Inst 29: subq %r9, %rcx ; Inst 29: movl $64, %ecx
; Inst 30: shrq %cl, %rdi ; Inst 30: subq %r9, %rcx
; Inst 31: xorq %rcx, %rcx ; Inst 31: shrq %cl, %rdi
; Inst 32: testq $127, %r9 ; Inst 32: xorq %rcx, %rcx
; Inst 33: cmovzq %rcx, %rdi ; Inst 33: testq $127, %r9
; Inst 34: orq %rsi, %rdi ; Inst 34: cmovzq %rcx, %rdi
; Inst 35: testq $64, %r9 ; Inst 35: orq %rsi, %rdi
; Inst 36: movq %rdx, %rsi ; Inst 36: testq $64, %r9
; Inst 37: cmovzq %rdi, %rsi ; Inst 37: cmovzq %rdx, %rcx
; Inst 38: cmovzq %rdx, %rcx ; Inst 38: cmovzq %rdi, %rdx
; Inst 39: orq %rcx, %r8 ; Inst 39: orq %rcx, %rax
; Inst 40: orq %rsi, %rax ; Inst 40: orq %rdx, %r8
; Inst 41: movq %rax, %rdx ; Inst 41: movq %r8, %rdx
; Inst 42: movq %r8, %rax ; Inst 42: movq %rbp, %rsp
; Inst 43: movq %rbp, %rsp ; Inst 43: popq %rbp
; Inst 44: popq %rbp ; Inst 44: ret
; Inst 45: ret
; }} ; }}

View File

@@ -14,17 +14,17 @@ block0(v0: i64):
; (instruction range: 0 .. 25) ; (instruction range: 0 .. 25)
; Inst 0: pushq %rbp ; Inst 0: pushq %rbp
; Inst 1: movq %rsp, %rbp ; Inst 1: movq %rsp, %rbp
; Inst 2: movq %rdi, %rsi ; Inst 2: movq %rdi, %rax
; Inst 3: shrq $1, %rsi ; Inst 3: shrq $1, %rax
; Inst 4: movabsq $8608480567731124087, %rax ; Inst 4: movabsq $8608480567731124087, %rsi
; Inst 5: andq %rax, %rsi ; Inst 5: andq %rsi, %rax
; Inst 6: subq %rsi, %rdi ; Inst 6: subq %rax, %rdi
; Inst 7: shrq $1, %rsi ; Inst 7: shrq $1, %rax
; Inst 8: andq %rax, %rsi ; Inst 8: andq %rsi, %rax
; Inst 9: subq %rsi, %rdi ; Inst 9: subq %rax, %rdi
; Inst 10: shrq $1, %rsi ; Inst 10: shrq $1, %rax
; Inst 11: andq %rax, %rsi ; Inst 11: andq %rsi, %rax
; Inst 12: subq %rsi, %rdi ; Inst 12: subq %rax, %rdi
; Inst 13: movq %rdi, %rsi ; Inst 13: movq %rdi, %rsi
; Inst 14: shrq $4, %rsi ; Inst 14: shrq $4, %rsi
; Inst 15: addq %rdi, %rsi ; Inst 15: addq %rdi, %rsi
@@ -54,17 +54,17 @@ block0(v0: i64):
; Inst 0: pushq %rbp ; Inst 0: pushq %rbp
; Inst 1: movq %rsp, %rbp ; Inst 1: movq %rsp, %rbp
; Inst 2: movq 0(%rdi), %rdi ; Inst 2: movq 0(%rdi), %rdi
; Inst 3: movq %rdi, %rsi ; Inst 3: movq %rdi, %rax
; Inst 4: shrq $1, %rsi ; Inst 4: shrq $1, %rax
; Inst 5: movabsq $8608480567731124087, %rax ; Inst 5: movabsq $8608480567731124087, %rsi
; Inst 6: andq %rax, %rsi ; Inst 6: andq %rsi, %rax
; Inst 7: subq %rsi, %rdi ; Inst 7: subq %rax, %rdi
; Inst 8: shrq $1, %rsi ; Inst 8: shrq $1, %rax
; Inst 9: andq %rax, %rsi ; Inst 9: andq %rsi, %rax
; Inst 10: subq %rsi, %rdi ; Inst 10: subq %rax, %rdi
; Inst 11: shrq $1, %rsi ; Inst 11: shrq $1, %rax
; Inst 12: andq %rax, %rsi ; Inst 12: andq %rsi, %rax
; Inst 13: subq %rsi, %rdi ; Inst 13: subq %rax, %rdi
; Inst 14: movq %rdi, %rsi ; Inst 14: movq %rdi, %rsi
; Inst 15: shrq $4, %rsi ; Inst 15: shrq $4, %rsi
; Inst 16: addq %rdi, %rsi ; Inst 16: addq %rdi, %rsi
@@ -89,29 +89,30 @@ block0(v0: i32):
; Entry block: 0 ; Entry block: 0
; Block 0: ; Block 0:
; (original IR block: block0) ; (original IR block: block0)
; (instruction range: 0 .. 22) ; (instruction range: 0 .. 23)
; Inst 0: pushq %rbp ; Inst 0: pushq %rbp
; Inst 1: movq %rsp, %rbp ; Inst 1: movq %rsp, %rbp
; Inst 2: movq %rdi, %rsi ; Inst 2: movq %rdi, %rax
; Inst 3: shrl $1, %esi ; Inst 3: shrl $1, %eax
; Inst 4: andl $2004318071, %esi ; Inst 4: movl $2004318071, %esi
; Inst 5: subl %esi, %edi ; Inst 5: andl %esi, %eax
; Inst 6: shrl $1, %esi ; Inst 6: subl %eax, %edi
; Inst 7: andl $2004318071, %esi ; Inst 7: shrl $1, %eax
; Inst 8: subl %esi, %edi ; Inst 8: andl %esi, %eax
; Inst 9: shrl $1, %esi ; Inst 9: subl %eax, %edi
; Inst 10: andl $2004318071, %esi ; Inst 10: shrl $1, %eax
; Inst 11: subl %esi, %edi ; Inst 11: andl %esi, %eax
; Inst 12: movq %rdi, %rsi ; Inst 12: subl %eax, %edi
; Inst 13: shrl $4, %esi ; Inst 13: movq %rdi, %rsi
; Inst 14: addl %edi, %esi ; Inst 14: shrl $4, %esi
; Inst 15: andl $252645135, %esi ; Inst 15: addl %edi, %esi
; Inst 16: imull $16843009, %esi ; Inst 16: andl $252645135, %esi
; Inst 17: shrl $24, %esi ; Inst 17: imull $16843009, %esi
; Inst 18: movq %rsi, %rax ; Inst 18: shrl $24, %esi
; Inst 19: movq %rbp, %rsp ; Inst 19: movq %rsi, %rax
; Inst 20: popq %rbp ; Inst 20: movq %rbp, %rsp
; Inst 21: ret ; Inst 21: popq %rbp
; Inst 22: ret
; }} ; }}
function %popcnt32load(i64) -> i32 { function %popcnt32load(i64) -> i32 {
@@ -125,29 +126,30 @@ block0(v0: i64):
; Entry block: 0 ; Entry block: 0
; Block 0: ; Block 0:
; (original IR block: block0) ; (original IR block: block0)
; (instruction range: 0 .. 23) ; (instruction range: 0 .. 24)
; Inst 0: pushq %rbp ; Inst 0: pushq %rbp
; Inst 1: movq %rsp, %rbp ; Inst 1: movq %rsp, %rbp
; Inst 2: movl 0(%rdi), %edi ; Inst 2: movl 0(%rdi), %edi
; Inst 3: movq %rdi, %rsi ; Inst 3: movq %rdi, %rax
; Inst 4: shrl $1, %esi ; Inst 4: shrl $1, %eax
; Inst 5: andl $2004318071, %esi ; Inst 5: movl $2004318071, %esi
; Inst 6: subl %esi, %edi ; Inst 6: andl %esi, %eax
; Inst 7: shrl $1, %esi ; Inst 7: subl %eax, %edi
; Inst 8: andl $2004318071, %esi ; Inst 8: shrl $1, %eax
; Inst 9: subl %esi, %edi ; Inst 9: andl %esi, %eax
; Inst 10: shrl $1, %esi ; Inst 10: subl %eax, %edi
; Inst 11: andl $2004318071, %esi ; Inst 11: shrl $1, %eax
; Inst 12: subl %esi, %edi ; Inst 12: andl %esi, %eax
; Inst 13: movq %rdi, %rsi ; Inst 13: subl %eax, %edi
; Inst 14: shrl $4, %esi ; Inst 14: movq %rdi, %rsi
; Inst 15: addl %edi, %esi ; Inst 15: shrl $4, %esi
; Inst 16: andl $252645135, %esi ; Inst 16: addl %edi, %esi
; Inst 17: imull $16843009, %esi ; Inst 17: andl $252645135, %esi
; Inst 18: shrl $24, %esi ; Inst 18: imull $16843009, %esi
; Inst 19: movq %rsi, %rax ; Inst 19: shrl $24, %esi
; Inst 20: movq %rbp, %rsp ; Inst 20: movq %rsi, %rax
; Inst 21: popq %rbp ; Inst 21: movq %rbp, %rsp
; Inst 22: ret ; Inst 22: popq %rbp
; Inst 23: ret
; }} ; }}