Migrate clz, ctz, popcnt, bitrev, is_null, is_invalid on x64 to ISLE. (#3848)

This commit is contained in:
Chris Fallin
2022-02-28 09:45:13 -08:00
committed by GitHub
parent 2a6969d2bd
commit 24f145cd1e
19 changed files with 2812 additions and 1990 deletions

View File

@@ -1,4 +1,4 @@
src/clif.isle 9ea75a6f790b5c03
src/prelude.isle 9830498351ddf6a3
src/prelude.isle 6b0160bfcac86902
src/isa/aarch64/inst.isle 3678d0a37bdb4cff
src/isa/aarch64/lower.isle 90accbfcadaea46d

View File

@@ -39,8 +39,14 @@ pub trait Context {
fn u8_as_u64(&mut self, arg0: u8) -> u64;
fn u16_as_u64(&mut self, arg0: u16) -> u64;
fn u32_as_u64(&mut self, arg0: u32) -> u64;
fn i64_as_u64(&mut self, arg0: i64) -> u64;
fn u64_add(&mut self, arg0: u64, arg1: u64) -> u64;
fn u64_sub(&mut self, arg0: u64, arg1: u64) -> u64;
fn u64_and(&mut self, arg0: u64, arg1: u64) -> u64;
fn ty_bits(&mut self, arg0: Type) -> u8;
fn ty_bits_u16(&mut self, arg0: Type) -> u16;
fn ty_bits_u64(&mut self, arg0: Type) -> u64;
fn ty_mask(&mut self, arg0: Type) -> u64;
fn ty_bytes(&mut self, arg0: Type) -> u16;
fn lane_type(&mut self, arg0: Type) -> Type;
fn fits_in_16(&mut self, arg0: Type) -> Option<Type>;
@@ -110,13 +116,13 @@ pub trait Context {
fn rotr_opposite_amount(&mut self, arg0: Type, arg1: ImmShift) -> ImmShift;
}
/// Internal type SideEffectNoResult: defined at src/prelude.isle line 363.
/// Internal type SideEffectNoResult: defined at src/prelude.isle line 385.
#[derive(Clone, Debug)]
pub enum SideEffectNoResult {
Inst { inst: MInst },
}
/// Internal type ProducesFlags: defined at src/prelude.isle line 385.
/// Internal type ProducesFlags: defined at src/prelude.isle line 407.
#[derive(Clone, Debug)]
pub enum ProducesFlags {
ProducesFlagsSideEffect { inst: MInst },
@@ -124,7 +130,7 @@ pub enum ProducesFlags {
ProducesFlagsReturnsResultWithConsumer { inst: MInst, result: Reg },
}
/// Internal type ConsumesFlags: defined at src/prelude.isle line 396.
/// Internal type ConsumesFlags: defined at src/prelude.isle line 418.
#[derive(Clone, Debug)]
pub enum ConsumesFlags {
ConsumesFlagsReturnsResultWithProducer {
@@ -140,6 +146,13 @@ pub enum ConsumesFlags {
inst2: MInst,
result: ValueRegs,
},
ConsumesFlagsFourTimesReturnsValueRegs {
inst1: MInst,
inst2: MInst,
inst3: MInst,
inst4: MInst,
result: ValueRegs,
},
}
/// Internal type MInst: defined at src/isa/aarch64/inst.isle line 2.
@@ -1050,7 +1063,7 @@ pub fn constructor_side_effect<C: Context>(
inst: ref pattern1_0,
} = pattern0_0
{
// Rule at src/prelude.isle line 368.
// Rule at src/prelude.isle line 390.
let expr0_0 = C::emit(ctx, pattern1_0);
let expr1_0 = C::output_none(ctx);
return Some(expr1_0);
@@ -1068,7 +1081,7 @@ pub fn constructor_safepoint<C: Context>(
inst: ref pattern1_0,
} = pattern0_0
{
// Rule at src/prelude.isle line 374.
// Rule at src/prelude.isle line 396.
let expr0_0 = C::emit_safepoint(ctx, pattern1_0);
let expr1_0 = C::output_none(ctx);
return Some(expr1_0);
@@ -1076,6 +1089,55 @@ pub fn constructor_safepoint<C: Context>(
return None;
}
// Generated as internal constructor for term produces_flags_get_reg.
pub fn constructor_produces_flags_get_reg<C: Context>(
ctx: &mut C,
arg0: &ProducesFlags,
) -> Option<Reg> {
let pattern0_0 = arg0;
if let &ProducesFlags::ProducesFlagsReturnsReg {
inst: ref pattern1_0,
result: pattern1_1,
} = pattern0_0
{
// Rule at src/prelude.isle line 434.
return Some(pattern1_1);
}
return None;
}
// Generated as internal constructor for term produces_flags_ignore.
pub fn constructor_produces_flags_ignore<C: Context>(
ctx: &mut C,
arg0: &ProducesFlags,
) -> Option<ProducesFlags> {
let pattern0_0 = arg0;
match pattern0_0 {
&ProducesFlags::ProducesFlagsReturnsReg {
inst: ref pattern1_0,
result: pattern1_1,
} => {
// Rule at src/prelude.isle line 439.
let expr0_0 = ProducesFlags::ProducesFlagsSideEffect {
inst: pattern1_0.clone(),
};
return Some(expr0_0);
}
&ProducesFlags::ProducesFlagsReturnsResultWithConsumer {
inst: ref pattern1_0,
result: pattern1_1,
} => {
// Rule at src/prelude.isle line 441.
let expr0_0 = ProducesFlags::ProducesFlagsSideEffect {
inst: pattern1_0.clone(),
};
return Some(expr0_0);
}
_ => {}
}
return None;
}
// Generated as internal constructor for term consumes_flags_concat.
pub fn constructor_consumes_flags_concat<C: Context>(
ctx: &mut C,
@@ -1094,7 +1156,7 @@ pub fn constructor_consumes_flags_concat<C: Context>(
result: pattern3_1,
} = pattern2_0
{
// Rule at src/prelude.isle line 408.
// Rule at src/prelude.isle line 448.
let expr0_0 = C::value_regs(ctx, pattern1_1, pattern3_1);
let expr1_0 = ConsumesFlags::ConsumesFlagsTwiceReturnsValueRegs {
inst1: pattern1_0.clone(),
@@ -1124,7 +1186,7 @@ pub fn constructor_with_flags<C: Context>(
inst: ref pattern3_0,
result: pattern3_1,
} => {
// Rule at src/prelude.isle line 433.
// Rule at src/prelude.isle line 473.
let expr0_0 = C::emit(ctx, pattern1_0);
let expr1_0 = C::emit(ctx, pattern3_0);
let expr2_0 = C::value_reg(ctx, pattern3_1);
@@ -1135,12 +1197,27 @@ pub fn constructor_with_flags<C: Context>(
inst2: ref pattern3_1,
result: pattern3_2,
} => {
// Rule at src/prelude.isle line 439.
// Rule at src/prelude.isle line 479.
let expr0_0 = C::emit(ctx, pattern1_0);
let expr1_0 = C::emit(ctx, pattern3_1);
let expr2_0 = C::emit(ctx, pattern3_0);
let expr1_0 = C::emit(ctx, pattern3_0);
let expr2_0 = C::emit(ctx, pattern3_1);
return Some(pattern3_2);
}
&ConsumesFlags::ConsumesFlagsFourTimesReturnsValueRegs {
inst1: ref pattern3_0,
inst2: ref pattern3_1,
inst3: ref pattern3_2,
inst4: ref pattern3_3,
result: pattern3_4,
} => {
// Rule at src/prelude.isle line 491.
let expr0_0 = C::emit(ctx, pattern1_0);
let expr1_0 = C::emit(ctx, pattern3_0);
let expr2_0 = C::emit(ctx, pattern3_1);
let expr3_0 = C::emit(ctx, pattern3_2);
let expr4_0 = C::emit(ctx, pattern3_3);
return Some(pattern3_4);
}
_ => {}
}
}
@@ -1154,7 +1231,7 @@ pub fn constructor_with_flags<C: Context>(
result: pattern3_1,
} = pattern2_0
{
// Rule at src/prelude.isle line 427.
// Rule at src/prelude.isle line 467.
let expr0_0 = C::emit(ctx, pattern1_0);
let expr1_0 = C::emit(ctx, pattern3_0);
let expr2_0 = C::value_regs(ctx, pattern1_1, pattern3_1);
@@ -1174,7 +1251,7 @@ pub fn constructor_with_flags_reg<C: Context>(
) -> Option<Reg> {
let pattern0_0 = arg0;
let pattern1_0 = arg1;
// Rule at src/prelude.isle line 452.
// Rule at src/prelude.isle line 508.
let expr0_0 = constructor_with_flags(ctx, pattern0_0, pattern1_0)?;
let expr1_0: usize = 0;
let expr2_0 = C::value_regs_get(ctx, expr0_0, expr1_0);

View File

@@ -1,4 +1,4 @@
src/clif.isle 9ea75a6f790b5c03
src/prelude.isle 9830498351ddf6a3
src/prelude.isle 6b0160bfcac86902
src/isa/s390x/inst.isle d91a16074ab186a8
src/isa/s390x/lower.isle 1cc5a12adc8c75f9

View File

@@ -39,8 +39,14 @@ pub trait Context {
fn u8_as_u64(&mut self, arg0: u8) -> u64;
fn u16_as_u64(&mut self, arg0: u16) -> u64;
fn u32_as_u64(&mut self, arg0: u32) -> u64;
fn i64_as_u64(&mut self, arg0: i64) -> u64;
fn u64_add(&mut self, arg0: u64, arg1: u64) -> u64;
fn u64_sub(&mut self, arg0: u64, arg1: u64) -> u64;
fn u64_and(&mut self, arg0: u64, arg1: u64) -> u64;
fn ty_bits(&mut self, arg0: Type) -> u8;
fn ty_bits_u16(&mut self, arg0: Type) -> u16;
fn ty_bits_u64(&mut self, arg0: Type) -> u64;
fn ty_mask(&mut self, arg0: Type) -> u64;
fn ty_bytes(&mut self, arg0: Type) -> u16;
fn lane_type(&mut self, arg0: Type) -> Type;
fn fits_in_16(&mut self, arg0: Type) -> Option<Type>;
@@ -144,13 +150,13 @@ pub trait Context {
fn same_reg(&mut self, arg0: Reg, arg1: WritableReg) -> Option<()>;
}
/// Internal type SideEffectNoResult: defined at src/prelude.isle line 363.
/// Internal type SideEffectNoResult: defined at src/prelude.isle line 385.
#[derive(Clone, Debug)]
pub enum SideEffectNoResult {
Inst { inst: MInst },
}
/// Internal type ProducesFlags: defined at src/prelude.isle line 385.
/// Internal type ProducesFlags: defined at src/prelude.isle line 407.
#[derive(Clone, Debug)]
pub enum ProducesFlags {
ProducesFlagsSideEffect { inst: MInst },
@@ -158,7 +164,7 @@ pub enum ProducesFlags {
ProducesFlagsReturnsResultWithConsumer { inst: MInst, result: Reg },
}
/// Internal type ConsumesFlags: defined at src/prelude.isle line 396.
/// Internal type ConsumesFlags: defined at src/prelude.isle line 418.
#[derive(Clone, Debug)]
pub enum ConsumesFlags {
ConsumesFlagsReturnsResultWithProducer {
@@ -174,6 +180,13 @@ pub enum ConsumesFlags {
inst2: MInst,
result: ValueRegs,
},
ConsumesFlagsFourTimesReturnsValueRegs {
inst1: MInst,
inst2: MInst,
inst3: MInst,
inst4: MInst,
result: ValueRegs,
},
}
/// Internal type MInst: defined at src/isa/s390x/inst.isle line 2.
@@ -941,7 +954,7 @@ pub fn constructor_side_effect<C: Context>(
inst: ref pattern1_0,
} = pattern0_0
{
// Rule at src/prelude.isle line 368.
// Rule at src/prelude.isle line 390.
let expr0_0 = C::emit(ctx, pattern1_0);
let expr1_0 = C::output_none(ctx);
return Some(expr1_0);
@@ -959,7 +972,7 @@ pub fn constructor_safepoint<C: Context>(
inst: ref pattern1_0,
} = pattern0_0
{
// Rule at src/prelude.isle line 374.
// Rule at src/prelude.isle line 396.
let expr0_0 = C::emit_safepoint(ctx, pattern1_0);
let expr1_0 = C::output_none(ctx);
return Some(expr1_0);
@@ -967,6 +980,55 @@ pub fn constructor_safepoint<C: Context>(
return None;
}
// Generated as internal constructor for term produces_flags_get_reg.
pub fn constructor_produces_flags_get_reg<C: Context>(
ctx: &mut C,
arg0: &ProducesFlags,
) -> Option<Reg> {
let pattern0_0 = arg0;
if let &ProducesFlags::ProducesFlagsReturnsReg {
inst: ref pattern1_0,
result: pattern1_1,
} = pattern0_0
{
// Rule at src/prelude.isle line 434.
return Some(pattern1_1);
}
return None;
}
// Generated as internal constructor for term produces_flags_ignore.
pub fn constructor_produces_flags_ignore<C: Context>(
ctx: &mut C,
arg0: &ProducesFlags,
) -> Option<ProducesFlags> {
let pattern0_0 = arg0;
match pattern0_0 {
&ProducesFlags::ProducesFlagsReturnsReg {
inst: ref pattern1_0,
result: pattern1_1,
} => {
// Rule at src/prelude.isle line 439.
let expr0_0 = ProducesFlags::ProducesFlagsSideEffect {
inst: pattern1_0.clone(),
};
return Some(expr0_0);
}
&ProducesFlags::ProducesFlagsReturnsResultWithConsumer {
inst: ref pattern1_0,
result: pattern1_1,
} => {
// Rule at src/prelude.isle line 441.
let expr0_0 = ProducesFlags::ProducesFlagsSideEffect {
inst: pattern1_0.clone(),
};
return Some(expr0_0);
}
_ => {}
}
return None;
}
// Generated as internal constructor for term consumes_flags_concat.
pub fn constructor_consumes_flags_concat<C: Context>(
ctx: &mut C,
@@ -985,7 +1047,7 @@ pub fn constructor_consumes_flags_concat<C: Context>(
result: pattern3_1,
} = pattern2_0
{
// Rule at src/prelude.isle line 408.
// Rule at src/prelude.isle line 448.
let expr0_0 = C::value_regs(ctx, pattern1_1, pattern3_1);
let expr1_0 = ConsumesFlags::ConsumesFlagsTwiceReturnsValueRegs {
inst1: pattern1_0.clone(),
@@ -1015,7 +1077,7 @@ pub fn constructor_with_flags<C: Context>(
inst: ref pattern3_0,
result: pattern3_1,
} => {
// Rule at src/prelude.isle line 433.
// Rule at src/prelude.isle line 473.
let expr0_0 = C::emit(ctx, pattern1_0);
let expr1_0 = C::emit(ctx, pattern3_0);
let expr2_0 = C::value_reg(ctx, pattern3_1);
@@ -1026,12 +1088,27 @@ pub fn constructor_with_flags<C: Context>(
inst2: ref pattern3_1,
result: pattern3_2,
} => {
// Rule at src/prelude.isle line 439.
// Rule at src/prelude.isle line 479.
let expr0_0 = C::emit(ctx, pattern1_0);
let expr1_0 = C::emit(ctx, pattern3_1);
let expr2_0 = C::emit(ctx, pattern3_0);
let expr1_0 = C::emit(ctx, pattern3_0);
let expr2_0 = C::emit(ctx, pattern3_1);
return Some(pattern3_2);
}
&ConsumesFlags::ConsumesFlagsFourTimesReturnsValueRegs {
inst1: ref pattern3_0,
inst2: ref pattern3_1,
inst3: ref pattern3_2,
inst4: ref pattern3_3,
result: pattern3_4,
} => {
// Rule at src/prelude.isle line 491.
let expr0_0 = C::emit(ctx, pattern1_0);
let expr1_0 = C::emit(ctx, pattern3_0);
let expr2_0 = C::emit(ctx, pattern3_1);
let expr3_0 = C::emit(ctx, pattern3_2);
let expr4_0 = C::emit(ctx, pattern3_3);
return Some(pattern3_4);
}
_ => {}
}
}
@@ -1045,7 +1122,7 @@ pub fn constructor_with_flags<C: Context>(
result: pattern3_1,
} = pattern2_0
{
// Rule at src/prelude.isle line 427.
// Rule at src/prelude.isle line 467.
let expr0_0 = C::emit(ctx, pattern1_0);
let expr1_0 = C::emit(ctx, pattern3_0);
let expr2_0 = C::value_regs(ctx, pattern1_1, pattern3_1);
@@ -1065,7 +1142,7 @@ pub fn constructor_with_flags_reg<C: Context>(
) -> Option<Reg> {
let pattern0_0 = arg0;
let pattern1_0 = arg1;
// Rule at src/prelude.isle line 452.
// Rule at src/prelude.isle line 508.
let expr0_0 = constructor_with_flags(ctx, pattern0_0, pattern1_0)?;
let expr1_0: usize = 0;
let expr2_0 = C::value_regs_get(ctx, expr0_0, expr1_0);

View File

@@ -159,15 +159,6 @@
(alternative Gpr)
(dst WritableGpr))
;; GPR conditional move with the `OR` of two conditions; overwrites
;; the destination register.
(CmoveOr (size OperandSize)
(cc1 CC)
(cc2 CC)
(consequent GprMem)
(alternative Gpr)
(dst WritableGpr))
;; XMM conditional move; overwrites the destination register.
(XmmCmove (size OperandSize)
(cc CC)
@@ -175,15 +166,6 @@
(alternative Xmm)
(dst WritableXmm))
;; XMM conditional move with the `OR` of two conditions; overwrites
;; the destination register.
(XmmCmoveOr (size OperandSize)
(cc1 CC)
(cc2 CC)
(consequent XmmMem)
(alternative Xmm)
(dst WritableXmm))
;; =========================================
;; Stack manipulation.
@@ -1074,6 +1056,18 @@
(decl avx512f_enabled () Type)
(extern extractor avx512f_enabled avx512f_enabled)
(decl avx512bitalg_enabled () Type)
(extern extractor avx512bitalg_enabled avx512bitalg_enabled)
(decl use_lzcnt () Type)
(extern extractor use_lzcnt use_lzcnt)
(decl use_bmi1 () Type)
(extern extractor use_bmi1 use_bmi1)
(decl use_popcnt () Type)
(extern extractor use_popcnt use_popcnt)
;;;; Helpers for Merging and Sinking Immediates/Loads ;;;;;;;;;;;;;;;;;;;;;;;;;
;; Extract a constant `Imm8Reg.Imm8` from a value operand.
@@ -1266,6 +1260,13 @@
(xmm_unary_rm_r (SseOpcode.Movdqu)
addr))
;; Load a constant into an XMM register.
(decl xmm_load_const (Type VCodeConstant) Xmm)
(rule (xmm_load_const ty const)
(let ((dst WritableXmm (temp_writable_xmm))
(_ Unit (emit (MInst.XmmLoadConst const dst ty))))
dst))
;;;; Instruction Constructors ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; These constructors create SSA-style `MInst`s. It is their responsibility to
@@ -1398,6 +1399,13 @@
(imm $I64 bits)
(OperandSize.Size64)))
;; Helper for emitting immediates with an `i64` value. Note that
;; integer constants in ISLE are always parsed as `i64`s; this enables
;; negative numbers to be used as immediates.
(decl imm_i64 (Type i64) Reg)
(rule (imm_i64 ty value)
(imm ty (i64_as_u64 value)))
(decl nonzero_u64_fits_in_u32 (u64) u64)
(extern extractor nonzero_u64_fits_in_u32 nonzero_u64_fits_in_u32)
@@ -1504,6 +1512,11 @@
(rule (cmp size src1 src2)
(cmp_rmi_r size (CmpOpcode.Cmp) src1 src2))
;; Helper for creating `cmp` instructions with an immediate.
(decl cmp_imm (OperandSize u32 Gpr) ProducesFlags)
(rule (cmp_imm size src1 src2)
(cmp_rmi_r size (CmpOpcode.Cmp) (RegMemImm.Imm src1) src2))
;; Helper for creating `MInst.XmmCmpRmR` instructions.
(decl xmm_cmp_rm_r (SseOpcode XmmMem Xmm) ProducesFlags)
(rule (xmm_cmp_rm_r opcode src1 src2)
@@ -1579,17 +1592,25 @@
(decl cmove_or (Type CC CC GprMem Gpr) ConsumesFlags)
(rule (cmove_or ty cc1 cc2 consequent alternative)
(let ((dst WritableGpr (temp_writable_gpr))
(size OperandSize (operand_size_of_type_32_64 ty)))
(ConsumesFlags.ConsumesFlagsReturnsReg
(MInst.CmoveOr size cc1 cc2 consequent alternative dst)
(tmp WritableGpr (temp_writable_gpr))
(size OperandSize (operand_size_of_type_32_64 ty))
(cmove1 MInst (MInst.Cmove size cc1 consequent alternative tmp))
(cmove2 MInst (MInst.Cmove size cc2 consequent tmp dst)))
(ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs
cmove1
cmove2
dst)))
(decl cmove_or_xmm (Type CC CC XmmMem Xmm) ConsumesFlags)
(rule (cmove_or_xmm ty cc1 cc2 consequent alternative)
(let ((dst WritableXmm (temp_writable_xmm))
(size OperandSize (operand_size_of_type_32_64 ty)))
(ConsumesFlags.ConsumesFlagsReturnsReg
(MInst.XmmCmoveOr size cc1 cc2 consequent alternative dst)
(tmp WritableXmm (temp_writable_xmm))
(size OperandSize (operand_size_of_type_32_64 ty))
(cmove1 MInst (MInst.XmmCmove size cc1 consequent alternative tmp))
(cmove2 MInst (MInst.XmmCmove size cc2 consequent tmp dst)))
(ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs
cmove1
cmove2
dst)))
;; Helper for creating `cmove_or` instructions directly from values. This allows
@@ -1601,12 +1622,18 @@
(alt ValueRegs alternative)
(dst1 WritableGpr (temp_writable_gpr))
(dst2 WritableGpr (temp_writable_gpr))
(tmp1 WritableGpr (temp_writable_gpr))
(tmp2 WritableGpr (temp_writable_gpr))
(size OperandSize (OperandSize.Size64))
(lower_cmove MInst (MInst.CmoveOr size cc1 cc2 (value_regs_get_gpr cons 0) (value_regs_get_gpr alt 0) dst1))
(upper_cmove MInst (MInst.CmoveOr size cc1 cc2 (value_regs_get_gpr cons 1) (value_regs_get_gpr alt 1) dst2)))
(ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs
lower_cmove
upper_cmove
(cmove1 MInst (MInst.Cmove size cc1 (value_regs_get_gpr cons 0) (value_regs_get_gpr alt 0) tmp1))
(cmove2 MInst (MInst.Cmove size cc1 (value_regs_get_gpr cons 0) tmp1 dst1))
(cmove3 MInst (MInst.Cmove size cc1 (value_regs_get_gpr cons 1) (value_regs_get_gpr alt 1) tmp2))
(cmove4 MInst (MInst.Cmove size cc1 (value_regs_get_gpr cons 1) tmp2 dst2)))
(ConsumesFlags.ConsumesFlagsFourTimesReturnsValueRegs
cmove1
cmove2
cmove3
cmove4
(value_regs dst1 dst2))))
(rule (cmove_or_from_values (is_gpr_type (is_single_register_type ty)) cc1 cc2 consequent alternative)
@@ -1615,6 +1642,14 @@
(rule (cmove_or_from_values (is_xmm_type (is_single_register_type ty)) cc1 cc2 consequent alternative)
(cmove_or_xmm ty cc1 cc2 consequent alternative))
;; Helper for creating `MInst.Setcc` instructions.
(decl setcc (CC) ConsumesFlags)
(rule (setcc cc)
(let ((dst WritableGpr (temp_writable_gpr)))
(ConsumesFlags.ConsumesFlagsReturnsReg
(MInst.Setcc cc dst)
dst)))
;; Helper for creating `MInst.MovzxRmR` instructions.
(decl movzx (Type ExtMode GprMem) Gpr)
(rule (movzx ty mode src)
@@ -2027,6 +2062,16 @@
size))))
dst))
;; Helper for creating `pshufb` instructions.
(decl pshufb (Xmm XmmMem) Xmm)
(rule (pshufb src1 src2)
(let ((dst WritableXmm (temp_writable_xmm))
(_ Unit (emit (MInst.XmmRmR (SseOpcode.Pshufb)
src1
src2
dst))))
dst))
;; Helper for creating `MInst.XmmUnaryRmR` instructions.
(decl xmm_unary_rm_r (SseOpcode XmmMem) Xmm)
(rule (xmm_unary_rm_r op src)
@@ -2071,6 +2116,11 @@
(rule (vpabsq src)
(xmm_unary_rm_r_evex (Avx512Opcode.Vpabsq) src))
;; Helper for creating `vpopcntb` instructions.
(decl vpopcntb (XmmMem) Xmm)
(rule (vpopcntb src)
(xmm_unary_rm_r_evex (Avx512Opcode.Vpopcntb) src))
;; Helper for creating `MInst.XmmRmREvex` instructions.
(decl xmm_rm_r_evex (Avx512Opcode XmmMem Xmm) Xmm)
(rule (xmm_rm_r_evex op src1 src2)
@@ -2221,6 +2271,70 @@
(rule (ud2 code)
(SideEffectNoResult.Inst (MInst.Ud2 code)))
;; Helper for creating `lzcnt` instructions.
(decl lzcnt (Type Gpr) Gpr)
(rule (lzcnt ty src)
(let ((dst WritableGpr (temp_writable_gpr))
(size OperandSize (operand_size_of_type_32_64 ty))
(_ Unit (emit (MInst.UnaryRmR size (UnaryRmROpcode.Lzcnt) src dst))))
dst))
;; Helper for creating `tzcnt` instructions.
(decl tzcnt (Type Gpr) Gpr)
(rule (tzcnt ty src)
(let ((dst WritableGpr (temp_writable_gpr))
(size OperandSize (operand_size_of_type_32_64 ty))
(_ Unit (emit (MInst.UnaryRmR size (UnaryRmROpcode.Tzcnt) src dst))))
dst))
;; Helper for creating `bsr` instructions.
(decl bsr (Type Gpr) ProducesFlags)
(rule (bsr ty src)
(let ((dst WritableGpr (temp_writable_gpr))
(size OperandSize (operand_size_of_type_32_64 ty))
(inst MInst (MInst.UnaryRmR size (UnaryRmROpcode.Bsr) src dst)))
(ProducesFlags.ProducesFlagsReturnsReg inst dst)))
;; Helper for creating `bsr + cmov` instruction pairs that produce the
;; result of the `bsr`, or `alt` if the input was zero.
(decl bsr_or_else (Type Gpr Gpr) Gpr)
(rule (bsr_or_else ty src alt)
(let ((bsr ProducesFlags (bsr ty src))
;; Manually extract the result from the bsr, then ignore
;; it below, since we need to thread it into the cmove
;; before we pass the cmove to with_flags_reg.
(bsr_result Gpr (produces_flags_get_reg bsr))
(cmove ConsumesFlags (cmove ty (CC.Z) alt bsr_result)))
(with_flags_reg (produces_flags_ignore bsr) cmove)))
;; Helper for creating `bsf` instructions.
(decl bsf (Type Gpr) ProducesFlags)
(rule (bsf ty src)
(let ((dst WritableGpr (temp_writable_gpr))
(size OperandSize (operand_size_of_type_32_64 ty))
(inst MInst (MInst.UnaryRmR size (UnaryRmROpcode.Bsf) src dst)))
(ProducesFlags.ProducesFlagsReturnsReg inst dst)))
;; Helper for creating `bsf + cmov` instruction pairs that produce the
;; result of the `bsf`, or `alt` if the input was zero.
(decl bsf_or_else (Type Gpr Gpr) Gpr)
(rule (bsf_or_else ty src alt)
(let ((bsf ProducesFlags (bsf ty src))
;; Manually extract the result from the bsf, then ignore
;; it below, since we need to thread it into the cmove
;; before we pass the cmove to with_flags_reg.
(bsf_result Gpr (produces_flags_get_reg bsf))
(cmove ConsumesFlags (cmove ty (CC.Z) alt bsf_result)))
(with_flags_reg (produces_flags_ignore bsf) cmove)))
;; Helper for creating `popcnt` instructions.
(decl x64_popcnt (Type Gpr) Gpr)
(rule (x64_popcnt ty src)
(let ((dst WritableGpr (temp_writable_gpr))
(size OperandSize (operand_size_of_type_32_64 ty))
(_ Unit (emit (MInst.UnaryRmR size (UnaryRmROpcode.Popcnt) src dst))))
dst))
;;;; Automatic conversions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(convert Gpr InstOutput output_gpr)
@@ -2241,6 +2355,8 @@
(convert Reg GprMemImm reg_to_gpr_mem_imm)
(convert WritableGpr WritableReg writable_gpr_to_reg)
(convert WritableGpr Reg writable_gpr_to_r_reg)
(convert WritableGpr GprMem writable_gpr_to_gpr_mem)
(convert WritableGpr ValueRegs writable_gpr_to_value_regs)
(convert Xmm InstOutput output_xmm)
(convert Value Xmm put_in_xmm)
@@ -2259,8 +2375,10 @@
(convert WritableXmm WritableReg writable_xmm_to_reg)
(convert WritableXmm Reg writable_xmm_to_r_reg)
(convert WritableXmm XmmMem writable_xmm_to_xmm_mem)
(convert WritableXmm ValueRegs writable_xmm_to_value_regs)
(convert Gpr Imm8Gpr gpr_to_imm8_gpr)
(convert Imm8Reg Imm8Gpr imm8_reg_to_imm8_gpr)
(convert Amode SyntheticAmode amode_to_synthetic_amode)
(convert SyntheticAmode GprMem synthetic_amode_to_gpr_mem)
@@ -2276,12 +2394,21 @@
(decl writable_gpr_to_r_reg (WritableGpr) Reg)
(rule (writable_gpr_to_r_reg w_gpr)
(writable_reg_to_reg (writable_gpr_to_reg w_gpr)))
(decl writable_gpr_to_gpr_mem (WritableGpr) GprMem)
(rule (writable_gpr_to_gpr_mem w_gpr)
(gpr_to_gpr_mem w_gpr))
(decl writable_gpr_to_value_regs (WritableGpr) ValueRegs)
(rule (writable_gpr_to_value_regs w_gpr)
(value_reg w_gpr))
(decl writable_xmm_to_r_reg (WritableXmm) Reg)
(rule (writable_xmm_to_r_reg w_xmm)
(writable_reg_to_reg (writable_xmm_to_reg w_xmm)))
(decl writable_xmm_to_xmm_mem (WritableXmm) XmmMem)
(rule (writable_xmm_to_xmm_mem w_xmm)
(xmm_to_xmm_mem (writable_xmm_to_xmm w_xmm)))
(decl writable_xmm_to_value_regs (WritableXmm) ValueRegs)
(rule (writable_xmm_to_value_regs w_xmm)
(value_reg w_xmm))
(decl synthetic_amode_to_gpr_mem (SyntheticAmode) GprMem)
(rule (synthetic_amode_to_gpr_mem amode)

View File

@@ -1104,33 +1104,6 @@ pub(crate) fn emit(
}
}
Inst::CmoveOr {
size,
cc1,
cc2,
consequent,
alternative,
dst,
} => {
let first_cmove = Inst::Cmove {
cc: *cc1,
size: *size,
consequent: consequent.clone(),
alternative: alternative.clone(),
dst: dst.clone(),
};
first_cmove.emit(sink, info, state);
let second_cmove = Inst::Cmove {
cc: *cc2,
size: *size,
consequent: consequent.clone(),
alternative: alternative.clone(),
dst: dst.clone(),
};
second_cmove.emit(sink, info, state);
}
Inst::XmmCmove {
size,
cc,
@@ -1159,39 +1132,6 @@ pub(crate) fn emit(
sink.bind_label(next);
}
Inst::XmmCmoveOr {
size,
cc1,
cc2,
consequent,
alternative,
dst,
} => {
debug_assert_eq!(*alternative, dst.to_reg());
let op = if *size == OperandSize::Size64 {
SseOpcode::Movsd
} else {
SseOpcode::Movss
};
let second_test = sink.get_label();
let next_instruction = sink.get_label();
// Jump to second test if `cc1` is *not* set.
one_way_jmp(sink, cc1.invert(), next_instruction);
let inst =
Inst::xmm_unary_rm_r(op, consequent.clone().to_reg_mem(), dst.to_writable_reg());
inst.emit(sink, info, state);
sink.bind_label(second_test);
// Jump to next instruction if `cc2` is *not* set.
one_way_jmp(sink, cc2.invert(), next_instruction);
let inst =
Inst::xmm_unary_rm_r(op, consequent.clone().to_reg_mem(), dst.to_writable_reg());
inst.emit(sink, info, state);
sink.bind_label(next_instruction);
}
Inst::Push64 { src } => {
if info.flags.enable_probestack() {
sink.add_trap(state.cur_srcloc(), TrapCode::StackOverflow);

View File

@@ -52,7 +52,6 @@ impl Inst {
| Inst::CallUnknown { .. }
| Inst::CheckedDivOrRemSeq { .. }
| Inst::Cmove { .. }
| Inst::CmoveOr { .. }
| Inst::CmpRmiR { .. }
| Inst::CvtFloatToSintSeq { .. }
| Inst::CvtFloatToUintSeq { .. }
@@ -89,7 +88,6 @@ impl Inst {
| Inst::Ud2 { .. }
| Inst::VirtualSPOffsetAdj { .. }
| Inst::XmmCmove { .. }
| Inst::XmmCmoveOr { .. }
| Inst::XmmCmpRmR { .. }
| Inst::XmmLoadConst { .. }
| Inst::XmmMinMaxSeq { .. }
@@ -141,6 +139,7 @@ impl Inst {
}
}
#[allow(dead_code)]
pub(crate) fn unary_rm_r(
size: OperandSize,
op: UnaryRmROpcode,
@@ -906,12 +905,6 @@ impl Inst {
alternative,
dst,
..
}
| Inst::CmoveOr {
size,
alternative,
dst,
..
} => {
if *alternative != dst.to_reg() {
debug_assert!(alternative.is_virtual());
@@ -926,9 +919,6 @@ impl Inst {
}
Inst::XmmCmove {
alternative, dst, ..
}
| Inst::XmmCmoveOr {
alternative, dst, ..
} => {
if *alternative != dst.to_reg() {
debug_assert!(alternative.is_virtual());
@@ -1619,27 +1609,6 @@ impl PrettyPrint for Inst {
show_ireg_sized(dst.to_reg().to_reg(), mb_rru, size.to_bytes())
),
Inst::CmoveOr {
size,
cc1,
cc2,
consequent: src,
alternative: _,
dst,
} => {
let src = src.show_rru_sized(mb_rru, size.to_bytes());
let dst = show_ireg_sized(dst.to_reg().to_reg(), mb_rru, size.to_bytes());
format!(
"{} {}, {}; {} {}, {}",
ljustify(format!("cmov{}{}", cc1.to_string(), suffix_bwlq(*size))),
src,
dst,
ljustify(format!("cmov{}{}", cc2.to_string(), suffix_bwlq(*size))),
src,
dst,
)
}
Inst::XmmCmove {
size,
cc,
@@ -1660,34 +1629,6 @@ impl PrettyPrint for Inst {
)
}
Inst::XmmCmoveOr {
size,
cc1,
cc2,
consequent: src,
dst,
..
} => {
let suffix = if *size == OperandSize::Size64 {
"sd"
} else {
"ss"
};
let src = src.show_rru_sized(mb_rru, size.to_bytes());
let dst = show_ireg_sized(dst.to_reg().to_reg(), mb_rru, size.to_bytes());
format!(
"j{} $check; mov{} {}, {}; $check: j{} $next; mov{} {}, {}; $next",
cc1.invert().to_string(),
suffix,
src,
dst,
cc2.invert().to_string(),
suffix,
src,
dst,
)
}
Inst::Push64 { src } => {
format!("{} {}", ljustify("pushq".to_string()), src.show_rru(mb_rru))
}
@@ -2086,11 +2027,6 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
consequent: src,
dst,
..
}
| Inst::CmoveOr {
consequent: src,
dst,
..
} => {
src.get_regs_as_uses(collector);
collector.add_mod(dst.to_writable_reg());
@@ -2099,11 +2035,6 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
consequent: src,
dst,
..
}
| Inst::XmmCmoveOr {
consequent: src,
dst,
..
} => {
src.get_regs_as_uses(collector);
collector.add_mod(dst.to_writable_reg());
@@ -2554,12 +2485,6 @@ pub(crate) fn x64_map_regs<RM: RegMapper>(inst: &mut Inst, mapper: &RM) {
ref mut dst,
ref mut alternative,
..
}
| Inst::CmoveOr {
consequent: ref mut src,
ref mut dst,
ref mut alternative,
..
} => {
src.map_uses(mapper);
dst.map_mod(mapper);
@@ -2570,12 +2495,6 @@ pub(crate) fn x64_map_regs<RM: RegMapper>(inst: &mut Inst, mapper: &RM) {
ref mut dst,
ref mut alternative,
..
}
| Inst::XmmCmoveOr {
consequent: ref mut src,
ref mut dst,
ref mut alternative,
..
} => {
src.map_uses(mapper);
dst.map_mod(mapper);

View File

@@ -1467,22 +1467,22 @@
;; - `CC.BE -> C = 1 OR Z = 1` (below or equal)
;; - `CC.NBE -> C = 0 AND Z = 0` (not below or equal)
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.Ordered) a b)) x y)))
(rule (lower (has_type ty (select (fcmp (FloatCC.Ordered) a b) x y)))
(with_flags (fpcmp b a) (cmove_from_values ty (CC.NP) x y)))
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.Unordered) a b)) x y)))
(rule (lower (has_type ty (select (fcmp (FloatCC.Unordered) a b) x y)))
(with_flags (fpcmp b a) (cmove_from_values ty (CC.P) x y)))
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.GreaterThan) a b)) x y)))
(rule (lower (has_type ty (select (fcmp (FloatCC.GreaterThan) a b) x y)))
(with_flags (fpcmp b a) (cmove_from_values ty (CC.NBE) x y)))
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.GreaterThanOrEqual) a b)) x y)))
(rule (lower (has_type ty (select (fcmp (FloatCC.GreaterThanOrEqual) a b) x y)))
(with_flags (fpcmp b a) (cmove_from_values ty (CC.NB) x y)))
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.UnorderedOrLessThan) a b)) x y)))
(rule (lower (has_type ty (select (fcmp (FloatCC.UnorderedOrLessThan) a b) x y)))
(with_flags (fpcmp b a) (cmove_from_values ty (CC.B) x y)))
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.UnorderedOrLessThanOrEqual) a b)) x y)))
(rule (lower (has_type ty (select (fcmp (FloatCC.UnorderedOrLessThanOrEqual) a b) x y)))
(with_flags (fpcmp b a) (cmove_from_values ty (CC.BE) x y)))
;; Certain FloatCC variants are implemented by flipping the operands of the
@@ -1496,16 +1496,16 @@
;; not `LT | UNO`. By flipping the operands AND inverting the comparison (e.g.,
;; to `CC.NBE`), we also avoid these unordered cases.
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.LessThan) a b)) x y)))
(rule (lower (has_type ty (select (fcmp (FloatCC.LessThan) a b) x y)))
(with_flags (fpcmp a b) (cmove_from_values ty (CC.NBE) x y)))
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.LessThanOrEqual) a b)) x y)))
(rule (lower (has_type ty (select (fcmp (FloatCC.LessThanOrEqual) a b) x y)))
(with_flags (fpcmp a b) (cmove_from_values ty (CC.NB) x y)))
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.UnorderedOrGreaterThan) a b)) x y)))
(rule (lower (has_type ty (select (fcmp (FloatCC.UnorderedOrGreaterThan) a b) x y)))
(with_flags (fpcmp a b) (cmove_from_values ty (CC.B) x y)))
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.UnorderedOrGreaterThanOrEqual) a b)) x y)))
(rule (lower (has_type ty (select (fcmp (FloatCC.UnorderedOrGreaterThanOrEqual) a b) x y)))
(with_flags (fpcmp a b) (cmove_from_values ty (CC.BE) x y)))
;; `FloatCC.Equal` and `FloatCC.NotEqual` can only be implemented with multiple
@@ -1521,8 +1521,341 @@
;; More details about the CLIF semantics for `fcmp` are available at
;; https://docs.rs/cranelift-codegen/latest/cranelift_codegen/ir/trait.InstBuilder.html#method.fcmp.
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.Equal) a b)) x y)))
(rule (lower (has_type ty (select (fcmp (FloatCC.Equal) a b) x y)))
(with_flags (fpcmp a b) (cmove_or_from_values ty (CC.NZ) (CC.P) y x)))
(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.NotEqual) a b)) x y)))
(rule (lower (has_type ty (select (fcmp (FloatCC.NotEqual) a b) x y)))
(with_flags (fpcmp a b) (cmove_or_from_values ty (CC.NZ) (CC.P) x y)))
;; Rules for `clz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; If available, we can use a plain lzcnt instruction here. Note no
;; special handling is required for zero inputs, because the machine
;; instruction does what the CLIF expects for zero, i.e. it returns
;; zero.
(rule 1 (lower
(has_type (and
(ty_32_or_64 ty)
(use_lzcnt))
(clz src)))
(lzcnt ty src))
(rule (lower
(has_type (ty_32_or_64 ty)
(clz src)))
(do_clz ty ty src))
(rule (lower
(has_type (ty_8_or_16 ty)
(clz src)))
(do_clz $I32 ty (extend_to_gpr src $I32 (ExtendKind.Zero))))
(rule (lower
(has_type $I128
(clz src)))
(let ((upper Gpr (do_clz $I64 $I64 (value_regs_get_gpr src 1)))
(lower Gpr (add $I64
(do_clz $I64 $I64 (value_regs_get_gpr src 0))
(RegMemImm.Imm 64)))
(result_lo Gpr
(with_flags_reg
(cmp_imm (OperandSize.Size64) 64 upper)
(cmove $I64 (CC.NZ) upper lower))))
(value_regs result_lo (imm $I64 0))))
;; Implementation helper for clz; operates on 32 or 64-bit units.
(decl do_clz (Type Type Gpr) Gpr)
(rule (do_clz ty orig_ty src)
(let ((highest_bit_index Reg (bsr_or_else ty src (imm_i64 $I64 -1)))
(bits_minus_1 Reg (imm ty (u64_sub (ty_bits_u64 orig_ty) 1))))
(sub ty bits_minus_1 highest_bit_index)))
;; Rules for `ctz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Analogous to `clz` cases above, but using mirror instructions
;; (tzcnt vs lzcnt, bsf vs bsr).
(rule 1 (lower
(has_type (and
(ty_32_or_64 ty)
(use_bmi1))
(ctz src)))
(tzcnt ty src))
(rule (lower
(has_type (ty_32_or_64 ty)
(ctz src)))
(do_ctz ty ty src))
(rule (lower
(has_type (ty_8_or_16 ty)
(ctz src)))
(do_ctz $I32 ty (extend_to_gpr src $I32 (ExtendKind.Zero))))
(rule (lower
(has_type $I128
(ctz src)))
(let ((lower Gpr (do_ctz $I64 $I64 (value_regs_get_gpr src 0)))
(upper Gpr (add $I64
(do_ctz $I64 $I64 (value_regs_get_gpr src 1))
(RegMemImm.Imm 64)))
(result_lo Gpr
(with_flags_reg
(cmp_imm (OperandSize.Size64) 64 lower)
(cmove $I64 (CC.Z) upper lower))))
(value_regs result_lo (imm $I64 0))))
(decl do_ctz (Type Type Gpr) Gpr)
(rule (do_ctz ty orig_ty src)
(bsf_or_else ty src (imm $I64 (ty_bits_u64 orig_ty))))
;; Rules for `popcnt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 1 (lower
(has_type (and
(ty_32_or_64 ty)
(use_popcnt))
(popcnt src)))
(x64_popcnt ty src))
(rule 1 (lower
(has_type (and
(ty_8_or_16 ty)
(use_popcnt))
(popcnt src)))
(x64_popcnt $I32 (extend_to_gpr src $I32 (ExtendKind.Zero))))
(rule 1 (lower
(has_type (and
$I128
(use_popcnt))
(popcnt src)))
(let ((lo_count Gpr (x64_popcnt $I64 (value_regs_get_gpr src 0)))
(hi_count Gpr (x64_popcnt $I64 (value_regs_get_gpr src 1))))
(value_regs (add $I64 lo_count hi_count) (imm $I64 0))))
(rule (lower
(has_type (ty_32_or_64 ty)
(popcnt src)))
(do_popcnt ty src))
(rule (lower
(has_type (ty_8_or_16 ty)
(popcnt src)))
(do_popcnt $I32 (extend_to_gpr src $I32 (ExtendKind.Zero))))
(rule (lower
(has_type $I128
(popcnt src)))
(let ((lo_count Gpr (do_popcnt $I64 (value_regs_get_gpr src 0)))
(hi_count Gpr (do_popcnt $I64 (value_regs_get_gpr src 1))))
(value_regs (add $I64 lo_count hi_count) (imm $I64 0))))
;; Implementation of popcount when we don't nave a native popcount
;; instruction.
(decl do_popcnt (Type Gpr) Gpr)
(rule (do_popcnt $I64 src)
(let ((shifted1 Gpr (shr $I64 src (Imm8Reg.Imm8 1)))
(sevens Gpr (imm $I64 0x7777777777777777))
(masked1 Gpr (x64_and $I64 shifted1 sevens))
;; diff1 := src - ((src >> 1) & 0b0111_0111_0111...)
(diff1 Gpr (sub $I64 src masked1))
(shifted2 Gpr (shr $I64 masked1 (Imm8Reg.Imm8 1)))
(masked2 Gpr (x64_and $I64 shifted2 sevens))
;; diff2 := diff1 - ((diff1 >> 1) & 0b0111_0111_0111...)
(diff2 Gpr (sub $I64 diff1 masked2))
(shifted3 Gpr (shr $I64 masked2 (Imm8Reg.Imm8 1)))
(masked3 Gpr (x64_and $I64 shifted3 sevens))
;; diff3 := diff2 - ((diff2 >> 1) & 0b0111_0111_0111...)
;;
;; At this point, each nibble of diff3 is the popcount of
;; that nibble. This works because at each step above, we
;; are basically subtracting floor(value / 2) from the
;; running value; the leftover remainder is 1 if the LSB
;; was 1. After three steps, we have (nibble / 8) -- 0 or
;; 1 for the MSB of the nibble -- plus three possible
;; additions for the three other bits.
(diff3 Gpr (sub $I64 diff2 masked3))
;; Add the two nibbles of each byte together.
(sum1 Gpr (add $I64
(shr $I64 diff3 (Imm8Reg.Imm8 4))
diff3))
;; Mask the above sum to have the popcount for each byte
;; in the lower nibble of that byte.
(ofof Gpr (imm $I64 0x0f0f0f0f0f0f0f0f))
(masked4 Gpr (x64_and $I64 sum1 ofof))
(ones Gpr (imm $I64 0x0101010101010101))
;; Use a multiply to sum all of the bytes' popcounts into
;; the top byte. Consider the binomial expansion for the
;; top byte: it is the sum of the bytes (masked4 >> 56) *
;; 0x01 + (masked4 >> 48) * 0x01 + (masked4 >> 40) * 0x01
;; + ... + (masked4 >> 0).
(mul Gpr (mul $I64 masked4 ones))
;; Now take that top byte and return it as the popcount.
(final Gpr (shr $I64 mul (Imm8Reg.Imm8 56))))
final))
;; This is the 32-bit version of the above; the steps for each nibble
;; are the same, we just use constants half as wide.
(rule (do_popcnt $I32 src)
(let ((shifted1 Gpr (shr $I32 src (Imm8Reg.Imm8 1)))
(sevens Gpr (imm $I32 0x77777777))
(masked1 Gpr (x64_and $I32 shifted1 sevens))
(diff1 Gpr (sub $I32 src masked1))
(shifted2 Gpr (shr $I32 masked1 (Imm8Reg.Imm8 1)))
(masked2 Gpr (x64_and $I32 shifted2 sevens))
(diff2 Gpr (sub $I32 diff1 masked2))
(shifted3 Gpr (shr $I32 masked2 (Imm8Reg.Imm8 1)))
(masked3 Gpr (x64_and $I32 shifted3 sevens))
(diff3 Gpr (sub $I32 diff2 masked3))
(sum1 Gpr (add $I32
(shr $I32 diff3 (Imm8Reg.Imm8 4))
diff3))
(masked4 Gpr (x64_and $I32 sum1 (RegMemImm.Imm 0x0f0f0f0f)))
(mul Gpr (mul $I32 masked4 (RegMemImm.Imm 0x01010101)))
(final Gpr (shr $I32 mul (Imm8Reg.Imm8 24))))
final))
(rule 1 (lower (has_type (and
$I8X16
(avx512vl_enabled)
(avx512bitalg_enabled))
(popcnt src)))
(vpopcntb src))
;; For SSE 4.2 we use Mula's algorithm (https://arxiv.org/pdf/1611.07612.pdf):
;;
;; __m128i count_bytes ( __m128i v) {
;; __m128i lookup = _mm_setr_epi8(0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4);
;; __m128i low_mask = _mm_set1_epi8 (0x0f);
;; __m128i lo = _mm_and_si128 (v, low_mask);
;; __m128i hi = _mm_and_si128 (_mm_srli_epi16 (v, 4), low_mask);
;; __m128i cnt1 = _mm_shuffle_epi8 (lookup, lo);
;; __m128i cnt2 = _mm_shuffle_epi8 (lookup, hi);
;; return _mm_add_epi8 (cnt1, cnt2);
;; }
;;
;; Details of the above algorithm can be found in the reference noted above, but the basics
;; are to create a lookup table that pre populates the popcnt values for each number [0,15].
;; The algorithm uses shifts to isolate 4 bit sections of the vector, pshufb as part of the
;; lookup process, and adds together the results.
;;
;; __m128i lookup = _mm_setr_epi8(0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4);
(decl popcount_4bit_table () VCodeConstant) ;; bits-per-nibble table `lookup` above
(extern constructor popcount_4bit_table popcount_4bit_table)
(decl popcount_low_mask () VCodeConstant) ;; mask for low nibbles: 0x0f * 16
(extern constructor popcount_low_mask popcount_low_mask)
(rule (lower (has_type $I8X16
(popcnt src)))
(let ((nibble_table_const VCodeConstant (popcount_4bit_table))
(low_mask Xmm (xmm_load_const $I8X16 (popcount_low_mask)))
(low_nibbles Xmm (sse_and $I8X16 src low_mask))
;; Note that this is a 16x8 shift, but that's OK; we mask
;; off anything that traverses from one byte to the next
;; with the low_mask below.
(shifted_src Xmm (psrlw src (RegMemImm.Imm 4)))
(high_nibbles Xmm (sse_and $I8X16 shifted_src low_mask))
(lookup Xmm (xmm_load_const $I8X16 (popcount_4bit_table)))
(bit_counts_low Xmm (pshufb lookup low_nibbles))
(bit_counts_high Xmm (pshufb lookup high_nibbles)))
(paddb bit_counts_low bit_counts_high)))
;; Rules for `bitrev` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I8 (bitrev src)))
(do_bitrev8 $I32 src))
(rule (lower (has_type $I16 (bitrev src)))
(do_bitrev16 $I32 src))
(rule (lower (has_type $I32 (bitrev src)))
(do_bitrev32 $I32 src))
(rule (lower (has_type $I64 (bitrev src)))
(do_bitrev64 $I64 src))
(rule (lower (has_type $I128 (bitrev src)))
(value_regs
(do_bitrev64 $I64 (value_regs_get_gpr src 1))
(do_bitrev64 $I64 (value_regs_get_gpr src 0))))
(decl do_bitrev8 (Type Gpr) Gpr)
(rule (do_bitrev8 ty src)
(let ((tymask u64 (ty_mask ty))
(mask1 Gpr (imm ty (u64_and tymask 0x5555555555555555)))
(lo1 Gpr (x64_and ty src mask1))
(hi1 Gpr (x64_and ty (shr ty src (Imm8Reg.Imm8 1)) mask1))
(swap1 Gpr (or ty
(shl ty lo1 (Imm8Reg.Imm8 1))
hi1))
(mask2 Gpr (imm ty (u64_and tymask 0x3333333333333333)))
(lo2 Gpr (x64_and ty swap1 mask2))
(hi2 Gpr (x64_and ty (shr ty swap1 (Imm8Reg.Imm8 2)) mask2))
(swap2 Gpr (or ty
(shl ty lo2 (Imm8Reg.Imm8 2))
hi2))
(mask4 Gpr (imm ty (u64_and tymask 0x0f0f0f0f0f0f0f0f)))
(lo4 Gpr (x64_and ty swap2 mask4))
(hi4 Gpr (x64_and ty (shr ty swap2 (Imm8Reg.Imm8 4)) mask4))
(swap4 Gpr (or ty
(shl ty lo4 (Imm8Reg.Imm8 4))
hi4)))
swap4))
(decl do_bitrev16 (Type Gpr) Gpr)
(rule (do_bitrev16 ty src)
(let ((src_ Gpr (do_bitrev8 ty src))
(tymask u64 (ty_mask ty))
(mask8 Gpr (imm ty (u64_and tymask 0x00ff00ff00ff00ff)))
(lo8 Gpr (x64_and ty src_ mask8))
(hi8 Gpr (x64_and ty (shr ty src_ (Imm8Reg.Imm8 8)) mask8))
(swap8 Gpr (or ty
(shl ty lo8 (Imm8Reg.Imm8 8))
hi8)))
swap8))
(decl do_bitrev32 (Type Gpr) Gpr)
(rule (do_bitrev32 ty src)
(let ((src_ Gpr (do_bitrev16 ty src))
(tymask u64 (ty_mask ty))
(mask16 Gpr (imm ty (u64_and tymask 0x0000ffff0000ffff)))
(lo16 Gpr (x64_and ty src_ mask16))
(hi16 Gpr (x64_and ty (shr ty src_ (Imm8Reg.Imm8 16)) mask16))
(swap16 Gpr (or ty
(shl ty lo16 (Imm8Reg.Imm8 16))
hi16)))
swap16))
(decl do_bitrev64 (Type Gpr) Gpr)
(rule (do_bitrev64 ty @ $I64 src)
(let ((src_ Gpr (do_bitrev32 ty src))
(mask32 Gpr (imm ty 0xffffffff))
(lo32 Gpr (x64_and ty src_ mask32))
(hi32 Gpr (shr ty src_ (Imm8Reg.Imm8 32)))
(swap32 Gpr (or ty
(shl ty lo32 (Imm8Reg.Imm8 32))
hi32)))
swap32))
;; Rules for `is_null` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Null references are represented by the constant value `0`.
(rule (lower (is_null src @ (value_type $R64)))
(with_flags
(cmp_imm (OperandSize.Size64) 0 src)
(setcc (CC.Z))))
;; Rules for `is_invalid` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Null references are represented by the constant value `-1`.
(rule (lower (is_invalid src @ (value_type $R64)))
(with_flags
(cmp_imm (OperandSize.Size64) 0xffffffff src) ;; simm32 0xffff_ffff is sign-extended to -1.
(setcc (CC.Z))))

File diff suppressed because it is too large Load Diff

View File

@@ -171,6 +171,42 @@ where
}
}
#[inline]
fn avx512bitalg_enabled(&mut self, _: Type) -> Option<()> {
if self.isa_flags.use_avx512bitalg_simd() {
Some(())
} else {
None
}
}
#[inline]
fn use_lzcnt(&mut self, _: Type) -> Option<()> {
if self.isa_flags.use_lzcnt() {
Some(())
} else {
None
}
}
#[inline]
fn use_bmi1(&mut self, _: Type) -> Option<()> {
if self.isa_flags.use_bmi1() {
Some(())
} else {
None
}
}
#[inline]
fn use_popcnt(&mut self, _: Type) -> Option<()> {
if self.isa_flags.use_popcnt() {
Some(())
} else {
None
}
}
#[inline]
fn imm8_from_value(&mut self, val: Value) -> Option<Imm8Reg> {
let inst = self.lower_ctx.dfg().value_def(val).inst()?;
@@ -326,6 +362,16 @@ where
SyntheticAmode::ConstantOffset(mask_table)
}
fn popcount_4bit_table(&mut self) -> VCodeConstant {
self.lower_ctx
.use_constant(VCodeConstantData::WellKnown(&POPCOUNT_4BIT_TABLE))
}
fn popcount_low_mask(&mut self) -> VCodeConstant {
self.lower_ctx
.use_constant(VCodeConstantData::WellKnown(&POPCOUNT_LOW_MASK))
}
#[inline]
fn writable_reg_to_xmm(&mut self, r: WritableReg) -> WritableXmm {
Writable::from_reg(Xmm::new(r.to_reg()).unwrap())
@@ -499,6 +545,18 @@ const I8X16_USHR_MASKS: [u8; 128] = [
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
];
/// Number of bits set in a given nibble (4-bit value). Used in the
/// vector implementation of popcount.
#[rustfmt::skip] // Preserve 4x4 layout.
const POPCOUNT_4BIT_TABLE: [u8; 16] = [
0x00, 0x01, 0x01, 0x02,
0x01, 0x02, 0x02, 0x03,
0x01, 0x02, 0x02, 0x03,
0x02, 0x03, 0x03, 0x04,
];
const POPCOUNT_LOW_MASK: [u8; 16] = [0x0f; 16];
#[inline]
fn to_simm32(constant: i64) -> Option<GprMemImm> {
if constant == ((constant << 32) >> 32) {

View File

@@ -1,4 +1,4 @@
src/clif.isle 9ea75a6f790b5c03
src/prelude.isle 9830498351ddf6a3
src/isa/x64/inst.isle 5ee89205e6e9a46b
src/isa/x64/lower.isle 348a808ea5de4cdb
src/prelude.isle 6b0160bfcac86902
src/isa/x64/inst.isle 67eb719e568c2a81
src/isa/x64/lower.isle 142626fe062fd7d7

File diff suppressed because it is too large Load Diff

View File

@@ -140,6 +140,26 @@ macro_rules! isle_prelude_methods {
x.into()
}
#[inline]
fn i64_as_u64(&mut self, x: i64) -> u64 {
x as u64
}
#[inline]
fn u64_add(&mut self, x: u64, y: u64) -> u64 {
x.wrapping_add(y)
}
#[inline]
fn u64_sub(&mut self, x: u64, y: u64) -> u64 {
x.wrapping_sub(y)
}
#[inline]
fn u64_and(&mut self, x: u64, y: u64) -> u64 {
x & y
}
#[inline]
fn ty_bits(&mut self, ty: Type) -> u8 {
use std::convert::TryInto;
@@ -151,11 +171,28 @@ macro_rules! isle_prelude_methods {
ty.bits()
}
#[inline]
fn ty_bits_u64(&mut self, ty: Type) -> u64 {
ty.bits() as u64
}
#[inline]
fn ty_bytes(&mut self, ty: Type) -> u16 {
u16::try_from(ty.bytes()).unwrap()
}
#[inline]
fn ty_mask(&mut self, ty: Type) -> u64 {
match ty.bits() {
1 => 1,
8 => 0xff,
16 => 0xffff,
32 => 0xffff_ffff,
64 => 0xffff_ffff_ffff_ffff,
_ => unimplemented!(),
}
}
fn fits_in_16(&mut self, ty: Type) -> Option<Type> {
if ty.bits() <= 16 {
Some(ty)

View File

@@ -167,6 +167,20 @@
(decl u32_as_u64 (u32) u64)
(extern constructor u32_as_u64 u32_as_u64)
(decl i64_as_u64 (i64) u64)
(extern constructor i64_as_u64 i64_as_u64)
;;;; Primitive Arithmetic ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(decl u64_add (u64 u64) u64)
(extern constructor u64_add u64_add)
(decl u64_sub (u64 u64) u64)
(extern constructor u64_sub u64_sub)
(decl u64_and (u64 u64) u64)
(extern constructor u64_and u64_and)
;;;; `cranelift_codegen::ir::Type` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(extern const $B1 Type)
@@ -209,6 +223,14 @@
(decl ty_bits_u16 (Type) u16)
(extern constructor ty_bits_u16 ty_bits_u16)
;; Get the bit width of a given type.
(decl ty_bits_u64 (Type) u64)
(extern constructor ty_bits_u64 ty_bits_u64)
;; Get a mask for the width of a given type.
(decl ty_mask (Type) u64)
(extern constructor ty_mask ty_mask)
;; Get the byte width of a given type.
(decl ty_bytes (Type) u16)
(extern constructor ty_bytes ty_bytes)
@@ -398,9 +420,27 @@
(ConsumesFlagsReturnsReg (inst MInst) (result Reg))
(ConsumesFlagsTwiceReturnsValueRegs (inst1 MInst)
(inst2 MInst)
(result ValueRegs))))
(result ValueRegs))
(ConsumesFlagsFourTimesReturnsValueRegs (inst1 MInst)
(inst2 MInst)
(inst3 MInst)
(inst4 MInst)
(result ValueRegs))))
;; Get the produced register out of a ProducesFlags.
(decl produces_flags_get_reg (ProducesFlags) Reg)
(rule (produces_flags_get_reg (ProducesFlags.ProducesFlagsReturnsReg _ reg)) reg)
;; Modify a ProducesFlags to use it only for its side-effect, ignoring
;; its result.
(decl produces_flags_ignore (ProducesFlags) ProducesFlags)
(rule (produces_flags_ignore (ProducesFlags.ProducesFlagsReturnsReg inst _))
(ProducesFlags.ProducesFlagsSideEffect inst))
(rule (produces_flags_ignore (ProducesFlags.ProducesFlagsReturnsResultWithConsumer inst _))
(ProducesFlags.ProducesFlagsSideEffect inst))
;; Helper for combining two flags-consumer instructions that return a
;; single Reg, giving a ConsumesFlags that returns both values in a
;; ValueRegs.
@@ -440,12 +480,28 @@
(ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs consumer_inst_1
consumer_inst_2
consumer_result))
;; We must emit these instructions in order as the creator of
;; the ConsumesFlags may be relying on dataflow dependencies
;; amongst them.
(let ((_x Unit (emit producer_inst))
;; Note that the order of emission here is swapped, as this seems
;; to generate better register allocation for now with fewer
;; `mov` instructions.
(_y Unit (emit consumer_inst_2))
(_z Unit (emit consumer_inst_1)))
(_y Unit (emit consumer_inst_1))
(_z Unit (emit consumer_inst_2)))
consumer_result))
(rule (with_flags (ProducesFlags.ProducesFlagsSideEffect producer_inst)
(ConsumesFlags.ConsumesFlagsFourTimesReturnsValueRegs consumer_inst_1
consumer_inst_2
consumer_inst_3
consumer_inst_4
consumer_result))
;; We must emit these instructions in order as the creator of
;; the ConsumesFlags may be relying on dataflow dependencies
;; amongst them.
(let ((_x Unit (emit producer_inst))
(_y Unit (emit consumer_inst_1))
(_z Unit (emit consumer_inst_2))
(_w Unit (emit consumer_inst_3))
(_v Unit (emit consumer_inst_4)))
consumer_result))
(decl with_flags_reg (ProducesFlags ConsumesFlags) Reg)

View File

@@ -1184,7 +1184,7 @@ block0(v0: i128, v1: i8):
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 10)
; (instruction range: 0 .. 13)
; Inst 0: lsl x4, x0, x2
; Inst 1: lsl x3, x1, x2
; Inst 2: orn w1, wzr, w2
@@ -1192,9 +1192,12 @@ block0(v0: i128, v1: i8):
; Inst 4: lsr x0, x0, x1
; Inst 5: orr x0, x3, x0
; Inst 6: ands xzr, x2, #64
; Inst 7: csel x1, x4, x0, ne
; Inst 8: csel x0, xzr, x4, ne
; Inst 9: ret
; Inst 7: csel x1, xzr, x4, ne
; Inst 8: csel x0, x4, x0, ne
; Inst 9: mov x2, x0
; Inst 10: mov x0, x1
; Inst 11: mov x1, x2
; Inst 12: ret
; }}
function %ishl_i128_i128(i128, i128) -> i128 {
@@ -1207,7 +1210,7 @@ block0(v0: i128, v1: i128):
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 10)
; (instruction range: 0 .. 13)
; Inst 0: lsl x3, x0, x2
; Inst 1: lsl x1, x1, x2
; Inst 2: orn w4, wzr, w2
@@ -1215,9 +1218,12 @@ block0(v0: i128, v1: i128):
; Inst 4: lsr x0, x0, x4
; Inst 5: orr x0, x1, x0
; Inst 6: ands xzr, x2, #64
; Inst 7: csel x1, x3, x0, ne
; Inst 8: csel x0, xzr, x3, ne
; Inst 9: ret
; Inst 7: csel x1, xzr, x3, ne
; Inst 8: csel x0, x3, x0, ne
; Inst 9: mov x2, x0
; Inst 10: mov x0, x1
; Inst 11: mov x1, x2
; Inst 12: ret
; }}
function %ushr_i128_i8(i128, i8) -> i128 {
@@ -1230,17 +1236,20 @@ block0(v0: i128, v1: i8):
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 10)
; (instruction range: 0 .. 13)
; Inst 0: lsr x3, x0, x2
; Inst 1: lsr x0, x1, x2
; Inst 2: orn w4, wzr, w2
; Inst 3: lsl x1, x1, #1
; Inst 4: lsl x1, x1, x4
; Inst 5: orr x3, x3, x1
; Inst 5: orr x1, x3, x1
; Inst 6: ands xzr, x2, #64
; Inst 7: csel x1, xzr, x0, ne
; Inst 8: csel x0, x0, x3, ne
; Inst 9: ret
; Inst 7: csel x1, x0, x1, ne
; Inst 8: csel x0, xzr, x0, ne
; Inst 9: mov x2, x0
; Inst 10: mov x0, x1
; Inst 11: mov x1, x2
; Inst 12: ret
; }}
function %ushr_i128_i128(i128, i128) -> i128 {
@@ -1253,17 +1262,20 @@ block0(v0: i128, v1: i128):
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 10)
; (instruction range: 0 .. 13)
; Inst 0: lsr x3, x0, x2
; Inst 1: lsr x0, x1, x2
; Inst 2: orn w4, wzr, w2
; Inst 3: lsl x1, x1, #1
; Inst 4: lsl x1, x1, x4
; Inst 5: orr x3, x3, x1
; Inst 5: orr x1, x3, x1
; Inst 6: ands xzr, x2, #64
; Inst 7: csel x1, xzr, x0, ne
; Inst 8: csel x0, x0, x3, ne
; Inst 9: ret
; Inst 7: csel x1, x0, x1, ne
; Inst 8: csel x0, xzr, x0, ne
; Inst 9: mov x2, x0
; Inst 10: mov x0, x1
; Inst 11: mov x1, x2
; Inst 12: ret
; }}
function %sshr_i128_i8(i128, i8) -> i128 {
@@ -1276,7 +1288,7 @@ block0(v0: i128, v1: i8):
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 11)
; (instruction range: 0 .. 13)
; Inst 0: lsr x3, x0, x2
; Inst 1: asr x0, x1, x2
; Inst 2: orn w4, wzr, w2
@@ -1285,9 +1297,11 @@ block0(v0: i128, v1: i8):
; Inst 5: asr x1, x1, #63
; Inst 6: orr x3, x3, x4
; Inst 7: ands xzr, x2, #64
; Inst 8: csel x1, x1, x0, ne
; Inst 9: csel x0, x0, x3, ne
; Inst 10: ret
; Inst 8: csel x2, x0, x3, ne
; Inst 9: csel x0, x1, x0, ne
; Inst 10: mov x1, x0
; Inst 11: mov x0, x2
; Inst 12: ret
; }}
function %sshr_i128_i128(i128, i128) -> i128 {
@@ -1300,7 +1314,7 @@ block0(v0: i128, v1: i128):
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 11)
; (instruction range: 0 .. 13)
; Inst 0: lsr x3, x0, x2
; Inst 1: asr x0, x1, x2
; Inst 2: orn w4, wzr, w2
@@ -1309,8 +1323,10 @@ block0(v0: i128, v1: i128):
; Inst 5: asr x1, x1, #63
; Inst 6: orr x3, x3, x4
; Inst 7: ands xzr, x2, #64
; Inst 8: csel x1, x1, x0, ne
; Inst 9: csel x0, x0, x3, ne
; Inst 10: ret
; Inst 8: csel x2, x0, x3, ne
; Inst 9: csel x0, x1, x0, ne
; Inst 10: mov x1, x0
; Inst 11: mov x0, x2
; Inst 12: ret
; }}

View File

@@ -16,19 +16,19 @@ block0(v0: i128, v1: i128):
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 24)
; (instruction range: 0 .. 25)
; Inst 0: mov x4, x1
; Inst 1: orr x1, xzr, #128
; Inst 2: sub x1, x1, x2
; Inst 3: lsr x3, x0, x2
; Inst 4: lsr x5, x4, x2
; Inst 3: lsr x5, x0, x2
; Inst 4: lsr x3, x4, x2
; Inst 5: orn w6, wzr, w2
; Inst 6: lsl x7, x4, #1
; Inst 7: lsl x6, x7, x6
; Inst 8: orr x6, x3, x6
; Inst 8: orr x5, x5, x6
; Inst 9: ands xzr, x2, #64
; Inst 10: csel x3, xzr, x5, ne
; Inst 11: csel x2, x5, x6, ne
; Inst 10: csel x2, x3, x5, ne
; Inst 11: csel x3, xzr, x3, ne
; Inst 12: lsl x5, x0, x1
; Inst 13: lsl x4, x4, x1
; Inst 14: orn w6, wzr, w1
@@ -36,11 +36,12 @@ block0(v0: i128, v1: i128):
; Inst 16: lsr x0, x0, x6
; Inst 17: orr x0, x4, x0
; Inst 18: ands xzr, x1, #64
; Inst 19: csel x1, x5, x0, ne
; Inst 20: csel x0, xzr, x5, ne
; Inst 21: orr x1, x3, x1
; Inst 22: orr x0, x2, x0
; Inst 23: ret
; Inst 19: csel x1, xzr, x5, ne
; Inst 20: csel x0, x5, x0, ne
; Inst 21: orr x3, x3, x0
; Inst 22: orr x0, x2, x1
; Inst 23: mov x1, x3
; Inst 24: ret
; }}
function %f0(i64, i64) -> i64 {
@@ -125,7 +126,7 @@ block0(v0: i128, v1: i128):
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 27)
; (instruction range: 0 .. 24)
; Inst 0: mov x4, x0
; Inst 1: orr x0, xzr, #128
; Inst 2: sub x0, x0, x2
@@ -136,8 +137,8 @@ block0(v0: i128, v1: i128):
; Inst 7: lsr x6, x7, x6
; Inst 8: orr x5, x5, x6
; Inst 9: ands xzr, x2, #64
; Inst 10: csel x2, x3, x5, ne
; Inst 11: csel x3, xzr, x3, ne
; Inst 10: csel x2, xzr, x3, ne
; Inst 11: csel x3, x3, x5, ne
; Inst 12: lsr x5, x4, x0
; Inst 13: lsr x4, x1, x0
; Inst 14: orn w6, wzr, w0
@@ -145,14 +146,11 @@ block0(v0: i128, v1: i128):
; Inst 16: lsl x1, x1, x6
; Inst 17: orr x1, x5, x1
; Inst 18: ands xzr, x0, #64
; Inst 19: csel x0, xzr, x4, ne
; Inst 20: csel x1, x4, x1, ne
; Inst 21: orr x1, x3, x1
; Inst 22: orr x0, x2, x0
; Inst 23: mov x2, x0
; Inst 24: mov x0, x1
; Inst 25: mov x1, x2
; Inst 26: ret
; Inst 19: csel x0, x4, x1, ne
; Inst 20: csel x1, xzr, x4, ne
; Inst 21: orr x0, x2, x0
; Inst 22: orr x1, x3, x1
; Inst 23: ret
; }}
function %f4(i64, i64) -> i64 {

View File

@@ -43,7 +43,7 @@ block0(v0: f64, v1: i64):
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 16)
; (instruction range: 0 .. 17)
; Inst 0: pushq %rbp
; Inst 1: movq %rsp, %rbp
; Inst 2: movsd 0(%rdi), %xmm1
@@ -54,10 +54,12 @@ block0(v0: f64, v1: i64):
; Inst 7: andq $1, %rsi
; Inst 8: ucomisd %xmm0, %xmm1
; Inst 9: movaps %xmm0, %xmm1
; Inst 10: jz $check; movsd %xmm0, %xmm1; $check: jnp $next; movsd %xmm0, %xmm1; $next
; Inst 11: movq %rsi, %rax
; Inst 12: movaps %xmm1, %xmm0
; Inst 13: movq %rbp, %rsp
; Inst 14: popq %rbp
; Inst 15: ret
; Inst 10: jz $next; movsd %xmm0, %xmm1; $next:
; Inst 11: jnp $next; movsd %xmm0, %xmm1; $next:
; Inst 12: movq %rsi, %rax
; Inst 13: movaps %xmm1, %xmm0
; Inst 14: movq %rbp, %rsp
; Inst 15: popq %rbp
; Inst 16: ret
; }}

View File

@@ -600,57 +600,55 @@ block0(v0: i128):
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 50)
; (instruction range: 0 .. 48)
; Inst 0: pushq %rbp
; Inst 1: movq %rsp, %rbp
; Inst 2: movq %rsi, %rdx
; Inst 3: movq %rdi, %rsi
; Inst 4: shrq $1, %rsi
; Inst 5: movabsq $8608480567731124087, %rcx
; Inst 6: andq %rcx, %rsi
; Inst 7: movq %rdi, %rax
; Inst 8: subq %rsi, %rax
; Inst 9: shrq $1, %rsi
; Inst 10: andq %rcx, %rsi
; Inst 11: subq %rsi, %rax
; Inst 12: shrq $1, %rsi
; Inst 13: andq %rcx, %rsi
; Inst 14: subq %rsi, %rax
; Inst 15: movq %rax, %rsi
; Inst 16: shrq $4, %rsi
; Inst 17: addq %rax, %rsi
; Inst 18: movabsq $1085102592571150095, %rdi
; Inst 19: andq %rdi, %rsi
; Inst 20: movabsq $72340172838076673, %rdi
; Inst 21: imulq %rdi, %rsi
; Inst 22: shrq $56, %rsi
; Inst 23: movq %rdx, %rax
; Inst 24: shrq $1, %rax
; Inst 25: movabsq $8608480567731124087, %rcx
; Inst 26: andq %rcx, %rax
; Inst 27: movq %rdx, %rdi
; Inst 28: subq %rax, %rdi
; Inst 29: shrq $1, %rax
; Inst 30: andq %rcx, %rax
; Inst 31: subq %rax, %rdi
; Inst 32: shrq $1, %rax
; Inst 33: andq %rcx, %rax
; Inst 34: subq %rax, %rdi
; Inst 35: movq %rdi, %rax
; Inst 36: shrq $4, %rax
; Inst 37: addq %rdi, %rax
; Inst 38: movabsq $1085102592571150095, %rdi
; Inst 39: andq %rdi, %rax
; Inst 40: movabsq $72340172838076673, %rdi
; Inst 41: imulq %rdi, %rax
; Inst 42: shrq $56, %rax
; Inst 43: addq %rax, %rsi
; Inst 44: xorq %rdi, %rdi
; Inst 45: movq %rsi, %rax
; Inst 46: movq %rdi, %rdx
; Inst 47: movq %rbp, %rsp
; Inst 48: popq %rbp
; Inst 49: ret
; Inst 2: movq %rdi, %rax
; Inst 3: movq %rax, %rcx
; Inst 4: shrq $1, %rcx
; Inst 5: movabsq $8608480567731124087, %rdi
; Inst 6: andq %rdi, %rcx
; Inst 7: subq %rcx, %rax
; Inst 8: shrq $1, %rcx
; Inst 9: andq %rdi, %rcx
; Inst 10: subq %rcx, %rax
; Inst 11: shrq $1, %rcx
; Inst 12: andq %rdi, %rcx
; Inst 13: subq %rcx, %rax
; Inst 14: movq %rax, %rdi
; Inst 15: shrq $4, %rdi
; Inst 16: addq %rax, %rdi
; Inst 17: movabsq $1085102592571150095, %rax
; Inst 18: andq %rax, %rdi
; Inst 19: movabsq $72340172838076673, %rax
; Inst 20: imulq %rax, %rdi
; Inst 21: shrq $56, %rdi
; Inst 22: movq %rsi, %rcx
; Inst 23: shrq $1, %rcx
; Inst 24: movabsq $8608480567731124087, %rax
; Inst 25: andq %rax, %rcx
; Inst 26: subq %rcx, %rsi
; Inst 27: shrq $1, %rcx
; Inst 28: andq %rax, %rcx
; Inst 29: subq %rcx, %rsi
; Inst 30: shrq $1, %rcx
; Inst 31: andq %rax, %rcx
; Inst 32: subq %rcx, %rsi
; Inst 33: movq %rsi, %rax
; Inst 34: shrq $4, %rax
; Inst 35: addq %rsi, %rax
; Inst 36: movabsq $1085102592571150095, %rsi
; Inst 37: andq %rsi, %rax
; Inst 38: movabsq $72340172838076673, %rsi
; Inst 39: imulq %rsi, %rax
; Inst 40: shrq $56, %rax
; Inst 41: addq %rax, %rdi
; Inst 42: xorq %rsi, %rsi
; Inst 43: movq %rdi, %rax
; Inst 44: movq %rsi, %rdx
; Inst 45: movq %rbp, %rsp
; Inst 46: popq %rbp
; Inst 47: ret
; }}
function %f20(i128) -> i128 {
@@ -663,108 +661,97 @@ block0(v0: i128):
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 101)
; (instruction range: 0 .. 90)
; Inst 0: pushq %rbp
; Inst 1: movq %rsp, %rbp
; Inst 2: movq %rdi, %rcx
; Inst 3: movq %rcx, %rdi
; Inst 4: movabsq $6148914691236517205, %rax
; Inst 5: shrq $1, %rdi
; Inst 6: andq %rax, %rdi
; Inst 7: andq %rcx, %rax
; Inst 2: movq %rsi, %rcx
; Inst 3: movabsq $6148914691236517205, %rsi
; Inst 4: movq %rcx, %rax
; Inst 5: andq %rsi, %rax
; Inst 6: shrq $1, %rcx
; Inst 7: andq %rsi, %rcx
; Inst 8: shlq $1, %rax
; Inst 9: movq %rax, %rcx
; Inst 10: orq %rdi, %rcx
; Inst 11: movq %rcx, %rdi
; Inst 12: movabsq $3689348814741910323, %rax
; Inst 13: shrq $2, %rdi
; Inst 14: andq %rax, %rdi
; Inst 15: andq %rcx, %rax
; Inst 16: shlq $2, %rax
; Inst 17: movq %rax, %rcx
; Inst 18: orq %rdi, %rcx
; Inst 19: movq %rcx, %rdi
; Inst 20: movabsq $1085102592571150095, %rax
; Inst 21: shrq $4, %rdi
; Inst 22: andq %rax, %rdi
; Inst 23: andq %rcx, %rax
; Inst 24: shlq $4, %rax
; Inst 9: orq %rcx, %rax
; Inst 10: movabsq $3689348814741910323, %rsi
; Inst 11: movq %rax, %rcx
; Inst 12: andq %rsi, %rcx
; Inst 13: shrq $2, %rax
; Inst 14: andq %rsi, %rax
; Inst 15: shlq $2, %rcx
; Inst 16: orq %rax, %rcx
; Inst 17: movabsq $1085102592571150095, %rsi
; Inst 18: movq %rcx, %rax
; Inst 19: andq %rsi, %rax
; Inst 20: shrq $4, %rcx
; Inst 21: andq %rsi, %rcx
; Inst 22: shlq $4, %rax
; Inst 23: orq %rcx, %rax
; Inst 24: movabsq $71777214294589695, %rsi
; Inst 25: movq %rax, %rcx
; Inst 26: orq %rdi, %rcx
; Inst 27: movq %rcx, %rdi
; Inst 28: movabsq $71777214294589695, %rax
; Inst 29: shrq $8, %rdi
; Inst 30: andq %rax, %rdi
; Inst 31: andq %rcx, %rax
; Inst 32: shlq $8, %rax
; Inst 33: movq %rax, %rcx
; Inst 34: orq %rdi, %rcx
; Inst 35: movq %rcx, %rdi
; Inst 36: movabsq $281470681808895, %rax
; Inst 37: shrq $16, %rdi
; Inst 38: andq %rax, %rdi
; Inst 39: andq %rcx, %rax
; Inst 40: shlq $16, %rax
; Inst 41: orq %rdi, %rax
; Inst 42: movq %rax, %rcx
; Inst 43: movl $-1, %edi
; Inst 44: shrq $32, %rcx
; Inst 45: andq %rdi, %rcx
; Inst 46: andq %rax, %rdi
; Inst 47: shlq $32, %rdi
; Inst 48: orq %rcx, %rdi
; Inst 49: movq %rsi, %rcx
; Inst 50: movq %rcx, %rsi
; Inst 51: movabsq $6148914691236517205, %rax
; Inst 52: shrq $1, %rsi
; Inst 53: andq %rax, %rsi
; Inst 54: andq %rcx, %rax
; Inst 55: shlq $1, %rax
; Inst 56: movq %rax, %rcx
; Inst 57: orq %rsi, %rcx
; Inst 58: movq %rcx, %rsi
; Inst 59: movabsq $3689348814741910323, %rax
; Inst 60: shrq $2, %rsi
; Inst 61: andq %rax, %rsi
; Inst 62: andq %rcx, %rax
; Inst 63: shlq $2, %rax
; Inst 64: movq %rax, %rcx
; Inst 65: orq %rsi, %rcx
; Inst 66: movq %rcx, %rsi
; Inst 67: movabsq $1085102592571150095, %rax
; Inst 68: shrq $4, %rsi
; Inst 69: andq %rax, %rsi
; Inst 70: andq %rcx, %rax
; Inst 71: shlq $4, %rax
; Inst 72: movq %rax, %rcx
; Inst 73: orq %rsi, %rcx
; Inst 74: movq %rcx, %rsi
; Inst 75: movabsq $71777214294589695, %rax
; Inst 76: shrq $8, %rsi
; Inst 77: andq %rax, %rsi
; Inst 78: andq %rcx, %rax
; Inst 79: shlq $8, %rax
; Inst 80: movq %rax, %rcx
; Inst 81: orq %rsi, %rcx
; Inst 82: movq %rcx, %rsi
; Inst 83: movabsq $281470681808895, %rax
; Inst 84: shrq $16, %rsi
; Inst 85: andq %rax, %rsi
; Inst 86: andq %rcx, %rax
; Inst 87: shlq $16, %rax
; Inst 88: orq %rsi, %rax
; Inst 89: movq %rax, %rsi
; Inst 90: movl $-1, %ecx
; Inst 91: shrq $32, %rsi
; Inst 92: andq %rcx, %rsi
; Inst 93: andq %rax, %rcx
; Inst 94: shlq $32, %rcx
; Inst 95: orq %rsi, %rcx
; Inst 96: movq %rcx, %rax
; Inst 97: movq %rdi, %rdx
; Inst 98: movq %rbp, %rsp
; Inst 99: popq %rbp
; Inst 100: ret
; Inst 26: andq %rsi, %rcx
; Inst 27: shrq $8, %rax
; Inst 28: andq %rsi, %rax
; Inst 29: shlq $8, %rcx
; Inst 30: orq %rax, %rcx
; Inst 31: movabsq $281470681808895, %rsi
; Inst 32: movq %rcx, %rax
; Inst 33: andq %rsi, %rax
; Inst 34: shrq $16, %rcx
; Inst 35: andq %rsi, %rcx
; Inst 36: shlq $16, %rax
; Inst 37: orq %rcx, %rax
; Inst 38: movabsq $4294967295, %rcx
; Inst 39: movq %rax, %rsi
; Inst 40: andq %rcx, %rsi
; Inst 41: shrq $32, %rax
; Inst 42: shlq $32, %rsi
; Inst 43: orq %rax, %rsi
; Inst 44: movabsq $6148914691236517205, %rax
; Inst 45: movq %rdi, %rcx
; Inst 46: andq %rax, %rcx
; Inst 47: shrq $1, %rdi
; Inst 48: andq %rax, %rdi
; Inst 49: shlq $1, %rcx
; Inst 50: orq %rdi, %rcx
; Inst 51: movabsq $3689348814741910323, %rdi
; Inst 52: movq %rcx, %rax
; Inst 53: andq %rdi, %rax
; Inst 54: shrq $2, %rcx
; Inst 55: andq %rdi, %rcx
; Inst 56: shlq $2, %rax
; Inst 57: orq %rcx, %rax
; Inst 58: movabsq $1085102592571150095, %rdi
; Inst 59: movq %rax, %rcx
; Inst 60: andq %rdi, %rcx
; Inst 61: shrq $4, %rax
; Inst 62: andq %rdi, %rax
; Inst 63: shlq $4, %rcx
; Inst 64: orq %rax, %rcx
; Inst 65: movabsq $71777214294589695, %rdi
; Inst 66: movq %rcx, %rax
; Inst 67: andq %rdi, %rax
; Inst 68: shrq $8, %rcx
; Inst 69: andq %rdi, %rcx
; Inst 70: shlq $8, %rax
; Inst 71: orq %rcx, %rax
; Inst 72: movabsq $281470681808895, %rdi
; Inst 73: movq %rax, %rcx
; Inst 74: andq %rdi, %rcx
; Inst 75: shrq $16, %rax
; Inst 76: andq %rdi, %rax
; Inst 77: shlq $16, %rcx
; Inst 78: orq %rax, %rcx
; Inst 79: movabsq $4294967295, %rax
; Inst 80: movq %rcx, %rdi
; Inst 81: andq %rax, %rdi
; Inst 82: shrq $32, %rcx
; Inst 83: shlq $32, %rdi
; Inst 84: orq %rcx, %rdi
; Inst 85: movq %rsi, %rax
; Inst 86: movq %rdi, %rdx
; Inst 87: movq %rbp, %rsp
; Inst 88: popq %rbp
; Inst 89: ret
; }}
function %f21(i128, i64) {
@@ -1020,11 +1007,11 @@ block0(v0: i128):
; Inst 4: cmovzq %rcx, %rax
; Inst 5: movl $63, %esi
; Inst 6: subq %rax, %rsi
; Inst 7: movabsq $-1, %rcx
; Inst 8: bsrq %rdi, %rax
; Inst 9: cmovzq %rcx, %rax
; Inst 7: movabsq $-1, %rax
; Inst 8: bsrq %rdi, %rcx
; Inst 9: cmovzq %rax, %rcx
; Inst 10: movl $63, %edi
; Inst 11: subq %rax, %rdi
; Inst 11: subq %rcx, %rdi
; Inst 12: addq $64, %rdi
; Inst 13: cmpq $64, %rsi
; Inst 14: cmovnzq %rsi, %rdi
@@ -1098,7 +1085,7 @@ block0(v0: i128, v1: i128):
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 25)
; (instruction range: 0 .. 24)
; Inst 0: pushq %rbp
; Inst 1: movq %rsp, %rbp
; Inst 2: movq %rdi, %rax
@@ -1116,14 +1103,13 @@ block0(v0: i128, v1: i128):
; Inst 14: cmovzq %rcx, %rax
; Inst 15: orq %rdi, %rax
; Inst 16: testq $64, %rdx
; Inst 17: movq %rsi, %rdi
; Inst 18: cmovzq %rax, %rdi
; Inst 19: cmovzq %rsi, %rcx
; Inst 20: movq %rcx, %rax
; Inst 21: movq %rdi, %rdx
; Inst 22: movq %rbp, %rsp
; Inst 23: popq %rbp
; Inst 24: ret
; Inst 17: cmovzq %rsi, %rcx
; Inst 18: cmovzq %rax, %rsi
; Inst 19: movq %rcx, %rax
; Inst 20: movq %rsi, %rdx
; Inst 21: movq %rbp, %rsp
; Inst 22: popq %rbp
; Inst 23: ret
; }}
function %f31(i128, i128) -> i128 {
@@ -1136,7 +1122,7 @@ block0(v0: i128, v1: i128):
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 24)
; (instruction range: 0 .. 25)
; Inst 0: pushq %rbp
; Inst 1: movq %rsp, %rbp
; Inst 2: movq %rsi, %rax
@@ -1152,15 +1138,16 @@ block0(v0: i128, v1: i128):
; Inst 12: testq $127, %rdx
; Inst 13: cmovzq %rcx, %rax
; Inst 14: orq %rdi, %rax
; Inst 15: xorq %rdi, %rdi
; Inst 15: xorq %rcx, %rcx
; Inst 16: testq $64, %rdx
; Inst 17: cmovzq %rsi, %rdi
; Inst 18: cmovzq %rax, %rsi
; Inst 19: movq %rsi, %rax
; Inst 20: movq %rdi, %rdx
; Inst 21: movq %rbp, %rsp
; Inst 22: popq %rbp
; Inst 23: ret
; Inst 17: movq %rsi, %rdi
; Inst 18: cmovzq %rax, %rdi
; Inst 19: cmovzq %rsi, %rcx
; Inst 20: movq %rdi, %rax
; Inst 21: movq %rcx, %rdx
; Inst 22: movq %rbp, %rsp
; Inst 23: popq %rbp
; Inst 24: ret
; }}
function %f32(i128, i128) -> i128 {
@@ -1173,7 +1160,7 @@ block0(v0: i128, v1: i128):
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 25)
; (instruction range: 0 .. 26)
; Inst 0: pushq %rbp
; Inst 1: movq %rsp, %rbp
; Inst 2: movq %rdi, %rax
@@ -1192,13 +1179,14 @@ block0(v0: i128, v1: i128):
; Inst 15: orq %r8, %rax
; Inst 16: sarq $63, %rsi
; Inst 17: testq $64, %rdx
; Inst 18: cmovzq %rdi, %rsi
; Inst 19: cmovzq %rax, %rdi
; Inst 20: movq %rdi, %rax
; Inst 21: movq %rsi, %rdx
; Inst 22: movq %rbp, %rsp
; Inst 23: popq %rbp
; Inst 24: ret
; Inst 18: movq %rdi, %rcx
; Inst 19: cmovzq %rax, %rcx
; Inst 20: cmovzq %rdi, %rsi
; Inst 21: movq %rcx, %rax
; Inst 22: movq %rsi, %rdx
; Inst 23: movq %rbp, %rsp
; Inst 24: popq %rbp
; Inst 25: ret
; }}
function %f33(i128, i128) -> i128 {
@@ -1211,27 +1199,27 @@ block0(v0: i128, v1: i128):
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 46)
; (instruction range: 0 .. 48)
; Inst 0: pushq %rbp
; Inst 1: movq %rsp, %rbp
; Inst 2: movq %rdi, %r9
; Inst 2: movq %rdi, %rax
; Inst 3: movq %rdx, %rcx
; Inst 4: shlq %cl, %r9
; Inst 5: movq %rsi, %rax
; Inst 4: shlq %cl, %rax
; Inst 5: movq %rsi, %r8
; Inst 6: movq %rdx, %rcx
; Inst 7: shlq %cl, %rax
; Inst 7: shlq %cl, %r8
; Inst 8: movl $64, %ecx
; Inst 9: subq %rdx, %rcx
; Inst 10: movq %rdi, %r10
; Inst 11: shrq %cl, %r10
; Inst 12: xorq %r8, %r8
; Inst 10: movq %rdi, %r9
; Inst 11: shrq %cl, %r9
; Inst 12: xorq %rcx, %rcx
; Inst 13: testq $127, %rdx
; Inst 14: cmovzq %r8, %r10
; Inst 15: orq %rax, %r10
; Inst 14: cmovzq %rcx, %r9
; Inst 15: orq %r8, %r9
; Inst 16: testq $64, %rdx
; Inst 17: movq %r9, %rax
; Inst 18: cmovzq %r10, %rax
; Inst 19: cmovzq %r9, %r8
; Inst 17: movq %rcx, %r8
; Inst 18: cmovzq %rax, %r8
; Inst 19: cmovzq %r9, %rax
; Inst 20: movl $128, %r9d
; Inst 21: subq %rdx, %r9
; Inst 22: movq %rdi, %rdx
@@ -1247,17 +1235,19 @@ block0(v0: i128, v1: i128):
; Inst 32: testq $127, %r9
; Inst 33: cmovzq %rcx, %rsi
; Inst 34: orq %rdx, %rsi
; Inst 35: xorq %rcx, %rcx
; Inst 35: xorq %rdx, %rdx
; Inst 36: testq $64, %r9
; Inst 37: cmovzq %rdi, %rcx
; Inst 38: cmovzq %rsi, %rdi
; Inst 39: orq %rdi, %r8
; Inst 40: orq %rcx, %rax
; Inst 41: movq %rax, %rdx
; Inst 42: movq %r8, %rax
; Inst 43: movq %rbp, %rsp
; Inst 44: popq %rbp
; Inst 45: ret
; Inst 37: movq %rdi, %rcx
; Inst 38: cmovzq %rsi, %rcx
; Inst 39: movq %rdx, %rsi
; Inst 40: cmovzq %rdi, %rsi
; Inst 41: orq %rcx, %r8
; Inst 42: orq %rsi, %rax
; Inst 43: movq %rax, %rdx
; Inst 44: movq %r8, %rax
; Inst 45: movq %rbp, %rsp
; Inst 46: popq %rbp
; Inst 47: ret
; }}
function %f34(i128, i128) -> i128 {
@@ -1270,52 +1260,51 @@ block0(v0: i128, v1: i128):
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 46)
; (instruction range: 0 .. 45)
; Inst 0: pushq %rbp
; Inst 1: movq %rsp, %rbp
; Inst 2: movq %rdi, %rax
; Inst 3: movq %rdx, %rcx
; Inst 4: shrq %cl, %rax
; Inst 5: movq %rsi, %r8
; Inst 5: movq %rsi, %r9
; Inst 6: movq %rdx, %rcx
; Inst 7: shrq %cl, %r8
; Inst 7: shrq %cl, %r9
; Inst 8: movl $64, %ecx
; Inst 9: subq %rdx, %rcx
; Inst 10: movq %rsi, %r9
; Inst 11: shlq %cl, %r9
; Inst 10: movq %rsi, %r8
; Inst 11: shlq %cl, %r8
; Inst 12: xorq %rcx, %rcx
; Inst 13: testq $127, %rdx
; Inst 14: cmovzq %rcx, %r9
; Inst 15: movq %r9, %rcx
; Inst 16: orq %rax, %rcx
; Inst 17: xorq %rax, %rax
; Inst 18: testq $64, %rdx
; Inst 14: cmovzq %rcx, %r8
; Inst 15: orq %rax, %r8
; Inst 16: xorq %rcx, %rcx
; Inst 17: testq $64, %rdx
; Inst 18: movq %r9, %rax
; Inst 19: cmovzq %r8, %rax
; Inst 20: cmovzq %rcx, %r8
; Inst 21: movl $128, %r9d
; Inst 22: subq %rdx, %r9
; Inst 23: movq %rdi, %rdx
; Inst 24: movq %r9, %rcx
; Inst 25: shlq %cl, %rdx
; Inst 26: movq %r9, %rcx
; Inst 27: shlq %cl, %rsi
; Inst 28: movl $64, %ecx
; Inst 29: subq %r9, %rcx
; Inst 30: shrq %cl, %rdi
; Inst 31: xorq %rcx, %rcx
; Inst 32: testq $127, %r9
; Inst 33: cmovzq %rcx, %rdi
; Inst 34: orq %rsi, %rdi
; Inst 35: testq $64, %r9
; Inst 36: movq %rdx, %rsi
; Inst 37: cmovzq %rdi, %rsi
; Inst 38: cmovzq %rdx, %rcx
; Inst 39: orq %rcx, %r8
; Inst 40: orq %rsi, %rax
; Inst 41: movq %rax, %rdx
; Inst 42: movq %r8, %rax
; Inst 43: movq %rbp, %rsp
; Inst 44: popq %rbp
; Inst 45: ret
; Inst 20: movq %rcx, %r8
; Inst 21: cmovzq %r9, %r8
; Inst 22: movl $128, %r9d
; Inst 23: subq %rdx, %r9
; Inst 24: movq %rdi, %rdx
; Inst 25: movq %r9, %rcx
; Inst 26: shlq %cl, %rdx
; Inst 27: movq %r9, %rcx
; Inst 28: shlq %cl, %rsi
; Inst 29: movl $64, %ecx
; Inst 30: subq %r9, %rcx
; Inst 31: shrq %cl, %rdi
; Inst 32: xorq %rcx, %rcx
; Inst 33: testq $127, %r9
; Inst 34: cmovzq %rcx, %rdi
; Inst 35: orq %rsi, %rdi
; Inst 36: testq $64, %r9
; Inst 37: cmovzq %rdx, %rcx
; Inst 38: cmovzq %rdi, %rdx
; Inst 39: orq %rcx, %rax
; Inst 40: orq %rdx, %r8
; Inst 41: movq %r8, %rdx
; Inst 42: movq %rbp, %rsp
; Inst 43: popq %rbp
; Inst 44: ret
; }}

View File

@@ -14,17 +14,17 @@ block0(v0: i64):
; (instruction range: 0 .. 25)
; Inst 0: pushq %rbp
; Inst 1: movq %rsp, %rbp
; Inst 2: movq %rdi, %rsi
; Inst 3: shrq $1, %rsi
; Inst 4: movabsq $8608480567731124087, %rax
; Inst 5: andq %rax, %rsi
; Inst 6: subq %rsi, %rdi
; Inst 7: shrq $1, %rsi
; Inst 8: andq %rax, %rsi
; Inst 9: subq %rsi, %rdi
; Inst 10: shrq $1, %rsi
; Inst 11: andq %rax, %rsi
; Inst 12: subq %rsi, %rdi
; Inst 2: movq %rdi, %rax
; Inst 3: shrq $1, %rax
; Inst 4: movabsq $8608480567731124087, %rsi
; Inst 5: andq %rsi, %rax
; Inst 6: subq %rax, %rdi
; Inst 7: shrq $1, %rax
; Inst 8: andq %rsi, %rax
; Inst 9: subq %rax, %rdi
; Inst 10: shrq $1, %rax
; Inst 11: andq %rsi, %rax
; Inst 12: subq %rax, %rdi
; Inst 13: movq %rdi, %rsi
; Inst 14: shrq $4, %rsi
; Inst 15: addq %rdi, %rsi
@@ -54,17 +54,17 @@ block0(v0: i64):
; Inst 0: pushq %rbp
; Inst 1: movq %rsp, %rbp
; Inst 2: movq 0(%rdi), %rdi
; Inst 3: movq %rdi, %rsi
; Inst 4: shrq $1, %rsi
; Inst 5: movabsq $8608480567731124087, %rax
; Inst 6: andq %rax, %rsi
; Inst 7: subq %rsi, %rdi
; Inst 8: shrq $1, %rsi
; Inst 9: andq %rax, %rsi
; Inst 10: subq %rsi, %rdi
; Inst 11: shrq $1, %rsi
; Inst 12: andq %rax, %rsi
; Inst 13: subq %rsi, %rdi
; Inst 3: movq %rdi, %rax
; Inst 4: shrq $1, %rax
; Inst 5: movabsq $8608480567731124087, %rsi
; Inst 6: andq %rsi, %rax
; Inst 7: subq %rax, %rdi
; Inst 8: shrq $1, %rax
; Inst 9: andq %rsi, %rax
; Inst 10: subq %rax, %rdi
; Inst 11: shrq $1, %rax
; Inst 12: andq %rsi, %rax
; Inst 13: subq %rax, %rdi
; Inst 14: movq %rdi, %rsi
; Inst 15: shrq $4, %rsi
; Inst 16: addq %rdi, %rsi
@@ -89,29 +89,30 @@ block0(v0: i32):
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 22)
; (instruction range: 0 .. 23)
; Inst 0: pushq %rbp
; Inst 1: movq %rsp, %rbp
; Inst 2: movq %rdi, %rsi
; Inst 3: shrl $1, %esi
; Inst 4: andl $2004318071, %esi
; Inst 5: subl %esi, %edi
; Inst 6: shrl $1, %esi
; Inst 7: andl $2004318071, %esi
; Inst 8: subl %esi, %edi
; Inst 9: shrl $1, %esi
; Inst 10: andl $2004318071, %esi
; Inst 11: subl %esi, %edi
; Inst 12: movq %rdi, %rsi
; Inst 13: shrl $4, %esi
; Inst 14: addl %edi, %esi
; Inst 15: andl $252645135, %esi
; Inst 16: imull $16843009, %esi
; Inst 17: shrl $24, %esi
; Inst 18: movq %rsi, %rax
; Inst 19: movq %rbp, %rsp
; Inst 20: popq %rbp
; Inst 21: ret
; Inst 2: movq %rdi, %rax
; Inst 3: shrl $1, %eax
; Inst 4: movl $2004318071, %esi
; Inst 5: andl %esi, %eax
; Inst 6: subl %eax, %edi
; Inst 7: shrl $1, %eax
; Inst 8: andl %esi, %eax
; Inst 9: subl %eax, %edi
; Inst 10: shrl $1, %eax
; Inst 11: andl %esi, %eax
; Inst 12: subl %eax, %edi
; Inst 13: movq %rdi, %rsi
; Inst 14: shrl $4, %esi
; Inst 15: addl %edi, %esi
; Inst 16: andl $252645135, %esi
; Inst 17: imull $16843009, %esi
; Inst 18: shrl $24, %esi
; Inst 19: movq %rsi, %rax
; Inst 20: movq %rbp, %rsp
; Inst 21: popq %rbp
; Inst 22: ret
; }}
function %popcnt32load(i64) -> i32 {
@@ -125,29 +126,30 @@ block0(v0: i64):
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 23)
; (instruction range: 0 .. 24)
; Inst 0: pushq %rbp
; Inst 1: movq %rsp, %rbp
; Inst 2: movl 0(%rdi), %edi
; Inst 3: movq %rdi, %rsi
; Inst 4: shrl $1, %esi
; Inst 5: andl $2004318071, %esi
; Inst 6: subl %esi, %edi
; Inst 7: shrl $1, %esi
; Inst 8: andl $2004318071, %esi
; Inst 9: subl %esi, %edi
; Inst 10: shrl $1, %esi
; Inst 11: andl $2004318071, %esi
; Inst 12: subl %esi, %edi
; Inst 13: movq %rdi, %rsi
; Inst 14: shrl $4, %esi
; Inst 15: addl %edi, %esi
; Inst 16: andl $252645135, %esi
; Inst 17: imull $16843009, %esi
; Inst 18: shrl $24, %esi
; Inst 19: movq %rsi, %rax
; Inst 20: movq %rbp, %rsp
; Inst 21: popq %rbp
; Inst 22: ret
; Inst 3: movq %rdi, %rax
; Inst 4: shrl $1, %eax
; Inst 5: movl $2004318071, %esi
; Inst 6: andl %esi, %eax
; Inst 7: subl %eax, %edi
; Inst 8: shrl $1, %eax
; Inst 9: andl %esi, %eax
; Inst 10: subl %eax, %edi
; Inst 11: shrl $1, %eax
; Inst 12: andl %esi, %eax
; Inst 13: subl %eax, %edi
; Inst 14: movq %rdi, %rsi
; Inst 15: shrl $4, %esi
; Inst 16: addl %edi, %esi
; Inst 17: andl $252645135, %esi
; Inst 18: imull $16843009, %esi
; Inst 19: shrl $24, %esi
; Inst 20: movq %rsi, %rax
; Inst 21: movq %rbp, %rsp
; Inst 22: popq %rbp
; Inst 23: ret
; }}