From 24f145cd1e27a346b64f3d66db63a582435bcd5f Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Mon, 28 Feb 2022 09:45:13 -0800 Subject: [PATCH] Migrate clz, ctz, popcnt, bitrev, is_null, is_invalid on x64 to ISLE. (#3848) --- .../lower/isle/generated_code.manifest | 2 +- .../isa/aarch64/lower/isle/generated_code.rs | 101 +- .../s390x/lower/isle/generated_code.manifest | 2 +- .../isa/s390x/lower/isle/generated_code.rs | 101 +- cranelift/codegen/src/isa/x64/inst.isle | 185 +- cranelift/codegen/src/isa/x64/inst/emit.rs | 60 - cranelift/codegen/src/isa/x64/inst/mod.rs | 83 +- cranelift/codegen/src/isa/x64/lower.isle | 357 ++- cranelift/codegen/src/isa/x64/lower.rs | 1021 +-------- cranelift/codegen/src/isa/x64/lower/isle.rs | 58 + .../x64/lower/isle/generated_code.manifest | 6 +- .../src/isa/x64/lower/isle/generated_code.rs | 2004 +++++++++++++---- cranelift/codegen/src/machinst/isle.rs | 37 + cranelift/codegen/src/prelude.isle | 68 +- .../filetests/isa/aarch64/bitops.clif | 68 +- .../filetests/isa/aarch64/shift-rotate.clif | 42 +- .../filetests/isa/x64/cmp-mem-bug.clif | 16 +- .../filetests/filetests/isa/x64/i128.clif | 461 ++-- .../filetests/filetests/isa/x64/popcnt.clif | 130 +- 19 files changed, 2812 insertions(+), 1990 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest b/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest index 30cc41e9c4..78ce678fcc 100644 --- a/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest +++ b/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest @@ -1,4 +1,4 @@ src/clif.isle 9ea75a6f790b5c03 -src/prelude.isle 9830498351ddf6a3 +src/prelude.isle 6b0160bfcac86902 src/isa/aarch64/inst.isle 3678d0a37bdb4cff src/isa/aarch64/lower.isle 90accbfcadaea46d diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs b/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs index 33163212d3..85d9957a9a 100644 --- a/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs +++ b/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs @@ -39,8 +39,14 @@ pub trait Context { fn u8_as_u64(&mut self, arg0: u8) -> u64; fn u16_as_u64(&mut self, arg0: u16) -> u64; fn u32_as_u64(&mut self, arg0: u32) -> u64; + fn i64_as_u64(&mut self, arg0: i64) -> u64; + fn u64_add(&mut self, arg0: u64, arg1: u64) -> u64; + fn u64_sub(&mut self, arg0: u64, arg1: u64) -> u64; + fn u64_and(&mut self, arg0: u64, arg1: u64) -> u64; fn ty_bits(&mut self, arg0: Type) -> u8; fn ty_bits_u16(&mut self, arg0: Type) -> u16; + fn ty_bits_u64(&mut self, arg0: Type) -> u64; + fn ty_mask(&mut self, arg0: Type) -> u64; fn ty_bytes(&mut self, arg0: Type) -> u16; fn lane_type(&mut self, arg0: Type) -> Type; fn fits_in_16(&mut self, arg0: Type) -> Option; @@ -110,13 +116,13 @@ pub trait Context { fn rotr_opposite_amount(&mut self, arg0: Type, arg1: ImmShift) -> ImmShift; } -/// Internal type SideEffectNoResult: defined at src/prelude.isle line 363. +/// Internal type SideEffectNoResult: defined at src/prelude.isle line 385. #[derive(Clone, Debug)] pub enum SideEffectNoResult { Inst { inst: MInst }, } -/// Internal type ProducesFlags: defined at src/prelude.isle line 385. +/// Internal type ProducesFlags: defined at src/prelude.isle line 407. #[derive(Clone, Debug)] pub enum ProducesFlags { ProducesFlagsSideEffect { inst: MInst }, @@ -124,7 +130,7 @@ pub enum ProducesFlags { ProducesFlagsReturnsResultWithConsumer { inst: MInst, result: Reg }, } -/// Internal type ConsumesFlags: defined at src/prelude.isle line 396. +/// Internal type ConsumesFlags: defined at src/prelude.isle line 418. #[derive(Clone, Debug)] pub enum ConsumesFlags { ConsumesFlagsReturnsResultWithProducer { @@ -140,6 +146,13 @@ pub enum ConsumesFlags { inst2: MInst, result: ValueRegs, }, + ConsumesFlagsFourTimesReturnsValueRegs { + inst1: MInst, + inst2: MInst, + inst3: MInst, + inst4: MInst, + result: ValueRegs, + }, } /// Internal type MInst: defined at src/isa/aarch64/inst.isle line 2. @@ -1050,7 +1063,7 @@ pub fn constructor_side_effect( inst: ref pattern1_0, } = pattern0_0 { - // Rule at src/prelude.isle line 368. + // Rule at src/prelude.isle line 390. let expr0_0 = C::emit(ctx, pattern1_0); let expr1_0 = C::output_none(ctx); return Some(expr1_0); @@ -1068,7 +1081,7 @@ pub fn constructor_safepoint( inst: ref pattern1_0, } = pattern0_0 { - // Rule at src/prelude.isle line 374. + // Rule at src/prelude.isle line 396. let expr0_0 = C::emit_safepoint(ctx, pattern1_0); let expr1_0 = C::output_none(ctx); return Some(expr1_0); @@ -1076,6 +1089,55 @@ pub fn constructor_safepoint( return None; } +// Generated as internal constructor for term produces_flags_get_reg. +pub fn constructor_produces_flags_get_reg( + ctx: &mut C, + arg0: &ProducesFlags, +) -> Option { + let pattern0_0 = arg0; + if let &ProducesFlags::ProducesFlagsReturnsReg { + inst: ref pattern1_0, + result: pattern1_1, + } = pattern0_0 + { + // Rule at src/prelude.isle line 434. + return Some(pattern1_1); + } + return None; +} + +// Generated as internal constructor for term produces_flags_ignore. +pub fn constructor_produces_flags_ignore( + ctx: &mut C, + arg0: &ProducesFlags, +) -> Option { + let pattern0_0 = arg0; + match pattern0_0 { + &ProducesFlags::ProducesFlagsReturnsReg { + inst: ref pattern1_0, + result: pattern1_1, + } => { + // Rule at src/prelude.isle line 439. + let expr0_0 = ProducesFlags::ProducesFlagsSideEffect { + inst: pattern1_0.clone(), + }; + return Some(expr0_0); + } + &ProducesFlags::ProducesFlagsReturnsResultWithConsumer { + inst: ref pattern1_0, + result: pattern1_1, + } => { + // Rule at src/prelude.isle line 441. + let expr0_0 = ProducesFlags::ProducesFlagsSideEffect { + inst: pattern1_0.clone(), + }; + return Some(expr0_0); + } + _ => {} + } + return None; +} + // Generated as internal constructor for term consumes_flags_concat. pub fn constructor_consumes_flags_concat( ctx: &mut C, @@ -1094,7 +1156,7 @@ pub fn constructor_consumes_flags_concat( result: pattern3_1, } = pattern2_0 { - // Rule at src/prelude.isle line 408. + // Rule at src/prelude.isle line 448. let expr0_0 = C::value_regs(ctx, pattern1_1, pattern3_1); let expr1_0 = ConsumesFlags::ConsumesFlagsTwiceReturnsValueRegs { inst1: pattern1_0.clone(), @@ -1124,7 +1186,7 @@ pub fn constructor_with_flags( inst: ref pattern3_0, result: pattern3_1, } => { - // Rule at src/prelude.isle line 433. + // Rule at src/prelude.isle line 473. let expr0_0 = C::emit(ctx, pattern1_0); let expr1_0 = C::emit(ctx, pattern3_0); let expr2_0 = C::value_reg(ctx, pattern3_1); @@ -1135,12 +1197,27 @@ pub fn constructor_with_flags( inst2: ref pattern3_1, result: pattern3_2, } => { - // Rule at src/prelude.isle line 439. + // Rule at src/prelude.isle line 479. let expr0_0 = C::emit(ctx, pattern1_0); - let expr1_0 = C::emit(ctx, pattern3_1); - let expr2_0 = C::emit(ctx, pattern3_0); + let expr1_0 = C::emit(ctx, pattern3_0); + let expr2_0 = C::emit(ctx, pattern3_1); return Some(pattern3_2); } + &ConsumesFlags::ConsumesFlagsFourTimesReturnsValueRegs { + inst1: ref pattern3_0, + inst2: ref pattern3_1, + inst3: ref pattern3_2, + inst4: ref pattern3_3, + result: pattern3_4, + } => { + // Rule at src/prelude.isle line 491. + let expr0_0 = C::emit(ctx, pattern1_0); + let expr1_0 = C::emit(ctx, pattern3_0); + let expr2_0 = C::emit(ctx, pattern3_1); + let expr3_0 = C::emit(ctx, pattern3_2); + let expr4_0 = C::emit(ctx, pattern3_3); + return Some(pattern3_4); + } _ => {} } } @@ -1154,7 +1231,7 @@ pub fn constructor_with_flags( result: pattern3_1, } = pattern2_0 { - // Rule at src/prelude.isle line 427. + // Rule at src/prelude.isle line 467. let expr0_0 = C::emit(ctx, pattern1_0); let expr1_0 = C::emit(ctx, pattern3_0); let expr2_0 = C::value_regs(ctx, pattern1_1, pattern3_1); @@ -1174,7 +1251,7 @@ pub fn constructor_with_flags_reg( ) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/prelude.isle line 452. + // Rule at src/prelude.isle line 508. let expr0_0 = constructor_with_flags(ctx, pattern0_0, pattern1_0)?; let expr1_0: usize = 0; let expr2_0 = C::value_regs_get(ctx, expr0_0, expr1_0); diff --git a/cranelift/codegen/src/isa/s390x/lower/isle/generated_code.manifest b/cranelift/codegen/src/isa/s390x/lower/isle/generated_code.manifest index 4ae1e14569..b7a8bb8367 100644 --- a/cranelift/codegen/src/isa/s390x/lower/isle/generated_code.manifest +++ b/cranelift/codegen/src/isa/s390x/lower/isle/generated_code.manifest @@ -1,4 +1,4 @@ src/clif.isle 9ea75a6f790b5c03 -src/prelude.isle 9830498351ddf6a3 +src/prelude.isle 6b0160bfcac86902 src/isa/s390x/inst.isle d91a16074ab186a8 src/isa/s390x/lower.isle 1cc5a12adc8c75f9 diff --git a/cranelift/codegen/src/isa/s390x/lower/isle/generated_code.rs b/cranelift/codegen/src/isa/s390x/lower/isle/generated_code.rs index e9e94db681..844ff61c94 100644 --- a/cranelift/codegen/src/isa/s390x/lower/isle/generated_code.rs +++ b/cranelift/codegen/src/isa/s390x/lower/isle/generated_code.rs @@ -39,8 +39,14 @@ pub trait Context { fn u8_as_u64(&mut self, arg0: u8) -> u64; fn u16_as_u64(&mut self, arg0: u16) -> u64; fn u32_as_u64(&mut self, arg0: u32) -> u64; + fn i64_as_u64(&mut self, arg0: i64) -> u64; + fn u64_add(&mut self, arg0: u64, arg1: u64) -> u64; + fn u64_sub(&mut self, arg0: u64, arg1: u64) -> u64; + fn u64_and(&mut self, arg0: u64, arg1: u64) -> u64; fn ty_bits(&mut self, arg0: Type) -> u8; fn ty_bits_u16(&mut self, arg0: Type) -> u16; + fn ty_bits_u64(&mut self, arg0: Type) -> u64; + fn ty_mask(&mut self, arg0: Type) -> u64; fn ty_bytes(&mut self, arg0: Type) -> u16; fn lane_type(&mut self, arg0: Type) -> Type; fn fits_in_16(&mut self, arg0: Type) -> Option; @@ -144,13 +150,13 @@ pub trait Context { fn same_reg(&mut self, arg0: Reg, arg1: WritableReg) -> Option<()>; } -/// Internal type SideEffectNoResult: defined at src/prelude.isle line 363. +/// Internal type SideEffectNoResult: defined at src/prelude.isle line 385. #[derive(Clone, Debug)] pub enum SideEffectNoResult { Inst { inst: MInst }, } -/// Internal type ProducesFlags: defined at src/prelude.isle line 385. +/// Internal type ProducesFlags: defined at src/prelude.isle line 407. #[derive(Clone, Debug)] pub enum ProducesFlags { ProducesFlagsSideEffect { inst: MInst }, @@ -158,7 +164,7 @@ pub enum ProducesFlags { ProducesFlagsReturnsResultWithConsumer { inst: MInst, result: Reg }, } -/// Internal type ConsumesFlags: defined at src/prelude.isle line 396. +/// Internal type ConsumesFlags: defined at src/prelude.isle line 418. #[derive(Clone, Debug)] pub enum ConsumesFlags { ConsumesFlagsReturnsResultWithProducer { @@ -174,6 +180,13 @@ pub enum ConsumesFlags { inst2: MInst, result: ValueRegs, }, + ConsumesFlagsFourTimesReturnsValueRegs { + inst1: MInst, + inst2: MInst, + inst3: MInst, + inst4: MInst, + result: ValueRegs, + }, } /// Internal type MInst: defined at src/isa/s390x/inst.isle line 2. @@ -941,7 +954,7 @@ pub fn constructor_side_effect( inst: ref pattern1_0, } = pattern0_0 { - // Rule at src/prelude.isle line 368. + // Rule at src/prelude.isle line 390. let expr0_0 = C::emit(ctx, pattern1_0); let expr1_0 = C::output_none(ctx); return Some(expr1_0); @@ -959,7 +972,7 @@ pub fn constructor_safepoint( inst: ref pattern1_0, } = pattern0_0 { - // Rule at src/prelude.isle line 374. + // Rule at src/prelude.isle line 396. let expr0_0 = C::emit_safepoint(ctx, pattern1_0); let expr1_0 = C::output_none(ctx); return Some(expr1_0); @@ -967,6 +980,55 @@ pub fn constructor_safepoint( return None; } +// Generated as internal constructor for term produces_flags_get_reg. +pub fn constructor_produces_flags_get_reg( + ctx: &mut C, + arg0: &ProducesFlags, +) -> Option { + let pattern0_0 = arg0; + if let &ProducesFlags::ProducesFlagsReturnsReg { + inst: ref pattern1_0, + result: pattern1_1, + } = pattern0_0 + { + // Rule at src/prelude.isle line 434. + return Some(pattern1_1); + } + return None; +} + +// Generated as internal constructor for term produces_flags_ignore. +pub fn constructor_produces_flags_ignore( + ctx: &mut C, + arg0: &ProducesFlags, +) -> Option { + let pattern0_0 = arg0; + match pattern0_0 { + &ProducesFlags::ProducesFlagsReturnsReg { + inst: ref pattern1_0, + result: pattern1_1, + } => { + // Rule at src/prelude.isle line 439. + let expr0_0 = ProducesFlags::ProducesFlagsSideEffect { + inst: pattern1_0.clone(), + }; + return Some(expr0_0); + } + &ProducesFlags::ProducesFlagsReturnsResultWithConsumer { + inst: ref pattern1_0, + result: pattern1_1, + } => { + // Rule at src/prelude.isle line 441. + let expr0_0 = ProducesFlags::ProducesFlagsSideEffect { + inst: pattern1_0.clone(), + }; + return Some(expr0_0); + } + _ => {} + } + return None; +} + // Generated as internal constructor for term consumes_flags_concat. pub fn constructor_consumes_flags_concat( ctx: &mut C, @@ -985,7 +1047,7 @@ pub fn constructor_consumes_flags_concat( result: pattern3_1, } = pattern2_0 { - // Rule at src/prelude.isle line 408. + // Rule at src/prelude.isle line 448. let expr0_0 = C::value_regs(ctx, pattern1_1, pattern3_1); let expr1_0 = ConsumesFlags::ConsumesFlagsTwiceReturnsValueRegs { inst1: pattern1_0.clone(), @@ -1015,7 +1077,7 @@ pub fn constructor_with_flags( inst: ref pattern3_0, result: pattern3_1, } => { - // Rule at src/prelude.isle line 433. + // Rule at src/prelude.isle line 473. let expr0_0 = C::emit(ctx, pattern1_0); let expr1_0 = C::emit(ctx, pattern3_0); let expr2_0 = C::value_reg(ctx, pattern3_1); @@ -1026,12 +1088,27 @@ pub fn constructor_with_flags( inst2: ref pattern3_1, result: pattern3_2, } => { - // Rule at src/prelude.isle line 439. + // Rule at src/prelude.isle line 479. let expr0_0 = C::emit(ctx, pattern1_0); - let expr1_0 = C::emit(ctx, pattern3_1); - let expr2_0 = C::emit(ctx, pattern3_0); + let expr1_0 = C::emit(ctx, pattern3_0); + let expr2_0 = C::emit(ctx, pattern3_1); return Some(pattern3_2); } + &ConsumesFlags::ConsumesFlagsFourTimesReturnsValueRegs { + inst1: ref pattern3_0, + inst2: ref pattern3_1, + inst3: ref pattern3_2, + inst4: ref pattern3_3, + result: pattern3_4, + } => { + // Rule at src/prelude.isle line 491. + let expr0_0 = C::emit(ctx, pattern1_0); + let expr1_0 = C::emit(ctx, pattern3_0); + let expr2_0 = C::emit(ctx, pattern3_1); + let expr3_0 = C::emit(ctx, pattern3_2); + let expr4_0 = C::emit(ctx, pattern3_3); + return Some(pattern3_4); + } _ => {} } } @@ -1045,7 +1122,7 @@ pub fn constructor_with_flags( result: pattern3_1, } = pattern2_0 { - // Rule at src/prelude.isle line 427. + // Rule at src/prelude.isle line 467. let expr0_0 = C::emit(ctx, pattern1_0); let expr1_0 = C::emit(ctx, pattern3_0); let expr2_0 = C::value_regs(ctx, pattern1_1, pattern3_1); @@ -1065,7 +1142,7 @@ pub fn constructor_with_flags_reg( ) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/prelude.isle line 452. + // Rule at src/prelude.isle line 508. let expr0_0 = constructor_with_flags(ctx, pattern0_0, pattern1_0)?; let expr1_0: usize = 0; let expr2_0 = C::value_regs_get(ctx, expr0_0, expr1_0); diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index ac85666147..93676d7e2b 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -159,15 +159,6 @@ (alternative Gpr) (dst WritableGpr)) - ;; GPR conditional move with the `OR` of two conditions; overwrites - ;; the destination register. - (CmoveOr (size OperandSize) - (cc1 CC) - (cc2 CC) - (consequent GprMem) - (alternative Gpr) - (dst WritableGpr)) - ;; XMM conditional move; overwrites the destination register. (XmmCmove (size OperandSize) (cc CC) @@ -175,15 +166,6 @@ (alternative Xmm) (dst WritableXmm)) - ;; XMM conditional move with the `OR` of two conditions; overwrites - ;; the destination register. - (XmmCmoveOr (size OperandSize) - (cc1 CC) - (cc2 CC) - (consequent XmmMem) - (alternative Xmm) - (dst WritableXmm)) - ;; ========================================= ;; Stack manipulation. @@ -1074,6 +1056,18 @@ (decl avx512f_enabled () Type) (extern extractor avx512f_enabled avx512f_enabled) +(decl avx512bitalg_enabled () Type) +(extern extractor avx512bitalg_enabled avx512bitalg_enabled) + +(decl use_lzcnt () Type) +(extern extractor use_lzcnt use_lzcnt) + +(decl use_bmi1 () Type) +(extern extractor use_bmi1 use_bmi1) + +(decl use_popcnt () Type) +(extern extractor use_popcnt use_popcnt) + ;;;; Helpers for Merging and Sinking Immediates/Loads ;;;;;;;;;;;;;;;;;;;;;;;;; ;; Extract a constant `Imm8Reg.Imm8` from a value operand. @@ -1266,6 +1260,13 @@ (xmm_unary_rm_r (SseOpcode.Movdqu) addr)) +;; Load a constant into an XMM register. +(decl xmm_load_const (Type VCodeConstant) Xmm) +(rule (xmm_load_const ty const) + (let ((dst WritableXmm (temp_writable_xmm)) + (_ Unit (emit (MInst.XmmLoadConst const dst ty)))) + dst)) + ;;;; Instruction Constructors ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; These constructors create SSA-style `MInst`s. It is their responsibility to @@ -1398,6 +1399,13 @@ (imm $I64 bits) (OperandSize.Size64))) +;; Helper for emitting immediates with an `i64` value. Note that +;; integer constants in ISLE are always parsed as `i64`s; this enables +;; negative numbers to be used as immediates. +(decl imm_i64 (Type i64) Reg) +(rule (imm_i64 ty value) + (imm ty (i64_as_u64 value))) + (decl nonzero_u64_fits_in_u32 (u64) u64) (extern extractor nonzero_u64_fits_in_u32 nonzero_u64_fits_in_u32) @@ -1504,6 +1512,11 @@ (rule (cmp size src1 src2) (cmp_rmi_r size (CmpOpcode.Cmp) src1 src2)) +;; Helper for creating `cmp` instructions with an immediate. +(decl cmp_imm (OperandSize u32 Gpr) ProducesFlags) +(rule (cmp_imm size src1 src2) + (cmp_rmi_r size (CmpOpcode.Cmp) (RegMemImm.Imm src1) src2)) + ;; Helper for creating `MInst.XmmCmpRmR` instructions. (decl xmm_cmp_rm_r (SseOpcode XmmMem Xmm) ProducesFlags) (rule (xmm_cmp_rm_r opcode src1 src2) @@ -1579,17 +1592,25 @@ (decl cmove_or (Type CC CC GprMem Gpr) ConsumesFlags) (rule (cmove_or ty cc1 cc2 consequent alternative) (let ((dst WritableGpr (temp_writable_gpr)) - (size OperandSize (operand_size_of_type_32_64 ty))) - (ConsumesFlags.ConsumesFlagsReturnsReg - (MInst.CmoveOr size cc1 cc2 consequent alternative dst) + (tmp WritableGpr (temp_writable_gpr)) + (size OperandSize (operand_size_of_type_32_64 ty)) + (cmove1 MInst (MInst.Cmove size cc1 consequent alternative tmp)) + (cmove2 MInst (MInst.Cmove size cc2 consequent tmp dst))) + (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs + cmove1 + cmove2 dst))) (decl cmove_or_xmm (Type CC CC XmmMem Xmm) ConsumesFlags) (rule (cmove_or_xmm ty cc1 cc2 consequent alternative) (let ((dst WritableXmm (temp_writable_xmm)) - (size OperandSize (operand_size_of_type_32_64 ty))) - (ConsumesFlags.ConsumesFlagsReturnsReg - (MInst.XmmCmoveOr size cc1 cc2 consequent alternative dst) + (tmp WritableXmm (temp_writable_xmm)) + (size OperandSize (operand_size_of_type_32_64 ty)) + (cmove1 MInst (MInst.XmmCmove size cc1 consequent alternative tmp)) + (cmove2 MInst (MInst.XmmCmove size cc2 consequent tmp dst))) + (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs + cmove1 + cmove2 dst))) ;; Helper for creating `cmove_or` instructions directly from values. This allows @@ -1601,12 +1622,18 @@ (alt ValueRegs alternative) (dst1 WritableGpr (temp_writable_gpr)) (dst2 WritableGpr (temp_writable_gpr)) + (tmp1 WritableGpr (temp_writable_gpr)) + (tmp2 WritableGpr (temp_writable_gpr)) (size OperandSize (OperandSize.Size64)) - (lower_cmove MInst (MInst.CmoveOr size cc1 cc2 (value_regs_get_gpr cons 0) (value_regs_get_gpr alt 0) dst1)) - (upper_cmove MInst (MInst.CmoveOr size cc1 cc2 (value_regs_get_gpr cons 1) (value_regs_get_gpr alt 1) dst2))) - (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs - lower_cmove - upper_cmove + (cmove1 MInst (MInst.Cmove size cc1 (value_regs_get_gpr cons 0) (value_regs_get_gpr alt 0) tmp1)) + (cmove2 MInst (MInst.Cmove size cc1 (value_regs_get_gpr cons 0) tmp1 dst1)) + (cmove3 MInst (MInst.Cmove size cc1 (value_regs_get_gpr cons 1) (value_regs_get_gpr alt 1) tmp2)) + (cmove4 MInst (MInst.Cmove size cc1 (value_regs_get_gpr cons 1) tmp2 dst2))) + (ConsumesFlags.ConsumesFlagsFourTimesReturnsValueRegs + cmove1 + cmove2 + cmove3 + cmove4 (value_regs dst1 dst2)))) (rule (cmove_or_from_values (is_gpr_type (is_single_register_type ty)) cc1 cc2 consequent alternative) @@ -1615,6 +1642,14 @@ (rule (cmove_or_from_values (is_xmm_type (is_single_register_type ty)) cc1 cc2 consequent alternative) (cmove_or_xmm ty cc1 cc2 consequent alternative)) +;; Helper for creating `MInst.Setcc` instructions. +(decl setcc (CC) ConsumesFlags) +(rule (setcc cc) + (let ((dst WritableGpr (temp_writable_gpr))) + (ConsumesFlags.ConsumesFlagsReturnsReg + (MInst.Setcc cc dst) + dst))) + ;; Helper for creating `MInst.MovzxRmR` instructions. (decl movzx (Type ExtMode GprMem) Gpr) (rule (movzx ty mode src) @@ -2027,6 +2062,16 @@ size)))) dst)) +;; Helper for creating `pshufb` instructions. +(decl pshufb (Xmm XmmMem) Xmm) +(rule (pshufb src1 src2) + (let ((dst WritableXmm (temp_writable_xmm)) + (_ Unit (emit (MInst.XmmRmR (SseOpcode.Pshufb) + src1 + src2 + dst)))) + dst)) + ;; Helper for creating `MInst.XmmUnaryRmR` instructions. (decl xmm_unary_rm_r (SseOpcode XmmMem) Xmm) (rule (xmm_unary_rm_r op src) @@ -2071,6 +2116,11 @@ (rule (vpabsq src) (xmm_unary_rm_r_evex (Avx512Opcode.Vpabsq) src)) +;; Helper for creating `vpopcntb` instructions. +(decl vpopcntb (XmmMem) Xmm) +(rule (vpopcntb src) + (xmm_unary_rm_r_evex (Avx512Opcode.Vpopcntb) src)) + ;; Helper for creating `MInst.XmmRmREvex` instructions. (decl xmm_rm_r_evex (Avx512Opcode XmmMem Xmm) Xmm) (rule (xmm_rm_r_evex op src1 src2) @@ -2221,6 +2271,70 @@ (rule (ud2 code) (SideEffectNoResult.Inst (MInst.Ud2 code))) +;; Helper for creating `lzcnt` instructions. +(decl lzcnt (Type Gpr) Gpr) +(rule (lzcnt ty src) + (let ((dst WritableGpr (temp_writable_gpr)) + (size OperandSize (operand_size_of_type_32_64 ty)) + (_ Unit (emit (MInst.UnaryRmR size (UnaryRmROpcode.Lzcnt) src dst)))) + dst)) + +;; Helper for creating `tzcnt` instructions. +(decl tzcnt (Type Gpr) Gpr) +(rule (tzcnt ty src) + (let ((dst WritableGpr (temp_writable_gpr)) + (size OperandSize (operand_size_of_type_32_64 ty)) + (_ Unit (emit (MInst.UnaryRmR size (UnaryRmROpcode.Tzcnt) src dst)))) + dst)) + +;; Helper for creating `bsr` instructions. +(decl bsr (Type Gpr) ProducesFlags) +(rule (bsr ty src) + (let ((dst WritableGpr (temp_writable_gpr)) + (size OperandSize (operand_size_of_type_32_64 ty)) + (inst MInst (MInst.UnaryRmR size (UnaryRmROpcode.Bsr) src dst))) + (ProducesFlags.ProducesFlagsReturnsReg inst dst))) + +;; Helper for creating `bsr + cmov` instruction pairs that produce the +;; result of the `bsr`, or `alt` if the input was zero. +(decl bsr_or_else (Type Gpr Gpr) Gpr) +(rule (bsr_or_else ty src alt) + (let ((bsr ProducesFlags (bsr ty src)) + ;; Manually extract the result from the bsr, then ignore + ;; it below, since we need to thread it into the cmove + ;; before we pass the cmove to with_flags_reg. + (bsr_result Gpr (produces_flags_get_reg bsr)) + (cmove ConsumesFlags (cmove ty (CC.Z) alt bsr_result))) + (with_flags_reg (produces_flags_ignore bsr) cmove))) + +;; Helper for creating `bsf` instructions. +(decl bsf (Type Gpr) ProducesFlags) +(rule (bsf ty src) + (let ((dst WritableGpr (temp_writable_gpr)) + (size OperandSize (operand_size_of_type_32_64 ty)) + (inst MInst (MInst.UnaryRmR size (UnaryRmROpcode.Bsf) src dst))) + (ProducesFlags.ProducesFlagsReturnsReg inst dst))) + +;; Helper for creating `bsf + cmov` instruction pairs that produce the +;; result of the `bsf`, or `alt` if the input was zero. +(decl bsf_or_else (Type Gpr Gpr) Gpr) +(rule (bsf_or_else ty src alt) + (let ((bsf ProducesFlags (bsf ty src)) + ;; Manually extract the result from the bsf, then ignore + ;; it below, since we need to thread it into the cmove + ;; before we pass the cmove to with_flags_reg. + (bsf_result Gpr (produces_flags_get_reg bsf)) + (cmove ConsumesFlags (cmove ty (CC.Z) alt bsf_result))) + (with_flags_reg (produces_flags_ignore bsf) cmove))) + +;; Helper for creating `popcnt` instructions. +(decl x64_popcnt (Type Gpr) Gpr) +(rule (x64_popcnt ty src) + (let ((dst WritableGpr (temp_writable_gpr)) + (size OperandSize (operand_size_of_type_32_64 ty)) + (_ Unit (emit (MInst.UnaryRmR size (UnaryRmROpcode.Popcnt) src dst)))) + dst)) + ;;;; Automatic conversions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (convert Gpr InstOutput output_gpr) @@ -2241,6 +2355,8 @@ (convert Reg GprMemImm reg_to_gpr_mem_imm) (convert WritableGpr WritableReg writable_gpr_to_reg) (convert WritableGpr Reg writable_gpr_to_r_reg) +(convert WritableGpr GprMem writable_gpr_to_gpr_mem) +(convert WritableGpr ValueRegs writable_gpr_to_value_regs) (convert Xmm InstOutput output_xmm) (convert Value Xmm put_in_xmm) @@ -2259,8 +2375,10 @@ (convert WritableXmm WritableReg writable_xmm_to_reg) (convert WritableXmm Reg writable_xmm_to_r_reg) (convert WritableXmm XmmMem writable_xmm_to_xmm_mem) +(convert WritableXmm ValueRegs writable_xmm_to_value_regs) (convert Gpr Imm8Gpr gpr_to_imm8_gpr) +(convert Imm8Reg Imm8Gpr imm8_reg_to_imm8_gpr) (convert Amode SyntheticAmode amode_to_synthetic_amode) (convert SyntheticAmode GprMem synthetic_amode_to_gpr_mem) @@ -2276,12 +2394,21 @@ (decl writable_gpr_to_r_reg (WritableGpr) Reg) (rule (writable_gpr_to_r_reg w_gpr) (writable_reg_to_reg (writable_gpr_to_reg w_gpr))) +(decl writable_gpr_to_gpr_mem (WritableGpr) GprMem) +(rule (writable_gpr_to_gpr_mem w_gpr) + (gpr_to_gpr_mem w_gpr)) +(decl writable_gpr_to_value_regs (WritableGpr) ValueRegs) +(rule (writable_gpr_to_value_regs w_gpr) + (value_reg w_gpr)) (decl writable_xmm_to_r_reg (WritableXmm) Reg) (rule (writable_xmm_to_r_reg w_xmm) (writable_reg_to_reg (writable_xmm_to_reg w_xmm))) (decl writable_xmm_to_xmm_mem (WritableXmm) XmmMem) (rule (writable_xmm_to_xmm_mem w_xmm) (xmm_to_xmm_mem (writable_xmm_to_xmm w_xmm))) +(decl writable_xmm_to_value_regs (WritableXmm) ValueRegs) +(rule (writable_xmm_to_value_regs w_xmm) + (value_reg w_xmm)) (decl synthetic_amode_to_gpr_mem (SyntheticAmode) GprMem) (rule (synthetic_amode_to_gpr_mem amode) diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 4bd8fb5d70..7177109d36 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1104,33 +1104,6 @@ pub(crate) fn emit( } } - Inst::CmoveOr { - size, - cc1, - cc2, - consequent, - alternative, - dst, - } => { - let first_cmove = Inst::Cmove { - cc: *cc1, - size: *size, - consequent: consequent.clone(), - alternative: alternative.clone(), - dst: dst.clone(), - }; - first_cmove.emit(sink, info, state); - - let second_cmove = Inst::Cmove { - cc: *cc2, - size: *size, - consequent: consequent.clone(), - alternative: alternative.clone(), - dst: dst.clone(), - }; - second_cmove.emit(sink, info, state); - } - Inst::XmmCmove { size, cc, @@ -1159,39 +1132,6 @@ pub(crate) fn emit( sink.bind_label(next); } - Inst::XmmCmoveOr { - size, - cc1, - cc2, - consequent, - alternative, - dst, - } => { - debug_assert_eq!(*alternative, dst.to_reg()); - - let op = if *size == OperandSize::Size64 { - SseOpcode::Movsd - } else { - SseOpcode::Movss - }; - let second_test = sink.get_label(); - let next_instruction = sink.get_label(); - - // Jump to second test if `cc1` is *not* set. - one_way_jmp(sink, cc1.invert(), next_instruction); - let inst = - Inst::xmm_unary_rm_r(op, consequent.clone().to_reg_mem(), dst.to_writable_reg()); - inst.emit(sink, info, state); - sink.bind_label(second_test); - - // Jump to next instruction if `cc2` is *not* set. - one_way_jmp(sink, cc2.invert(), next_instruction); - let inst = - Inst::xmm_unary_rm_r(op, consequent.clone().to_reg_mem(), dst.to_writable_reg()); - inst.emit(sink, info, state); - sink.bind_label(next_instruction); - } - Inst::Push64 { src } => { if info.flags.enable_probestack() { sink.add_trap(state.cur_srcloc(), TrapCode::StackOverflow); diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 6532f23273..3e4ada4553 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -52,7 +52,6 @@ impl Inst { | Inst::CallUnknown { .. } | Inst::CheckedDivOrRemSeq { .. } | Inst::Cmove { .. } - | Inst::CmoveOr { .. } | Inst::CmpRmiR { .. } | Inst::CvtFloatToSintSeq { .. } | Inst::CvtFloatToUintSeq { .. } @@ -89,7 +88,6 @@ impl Inst { | Inst::Ud2 { .. } | Inst::VirtualSPOffsetAdj { .. } | Inst::XmmCmove { .. } - | Inst::XmmCmoveOr { .. } | Inst::XmmCmpRmR { .. } | Inst::XmmLoadConst { .. } | Inst::XmmMinMaxSeq { .. } @@ -141,6 +139,7 @@ impl Inst { } } + #[allow(dead_code)] pub(crate) fn unary_rm_r( size: OperandSize, op: UnaryRmROpcode, @@ -906,12 +905,6 @@ impl Inst { alternative, dst, .. - } - | Inst::CmoveOr { - size, - alternative, - dst, - .. } => { if *alternative != dst.to_reg() { debug_assert!(alternative.is_virtual()); @@ -926,9 +919,6 @@ impl Inst { } Inst::XmmCmove { alternative, dst, .. - } - | Inst::XmmCmoveOr { - alternative, dst, .. } => { if *alternative != dst.to_reg() { debug_assert!(alternative.is_virtual()); @@ -1619,27 +1609,6 @@ impl PrettyPrint for Inst { show_ireg_sized(dst.to_reg().to_reg(), mb_rru, size.to_bytes()) ), - Inst::CmoveOr { - size, - cc1, - cc2, - consequent: src, - alternative: _, - dst, - } => { - let src = src.show_rru_sized(mb_rru, size.to_bytes()); - let dst = show_ireg_sized(dst.to_reg().to_reg(), mb_rru, size.to_bytes()); - format!( - "{} {}, {}; {} {}, {}", - ljustify(format!("cmov{}{}", cc1.to_string(), suffix_bwlq(*size))), - src, - dst, - ljustify(format!("cmov{}{}", cc2.to_string(), suffix_bwlq(*size))), - src, - dst, - ) - } - Inst::XmmCmove { size, cc, @@ -1660,34 +1629,6 @@ impl PrettyPrint for Inst { ) } - Inst::XmmCmoveOr { - size, - cc1, - cc2, - consequent: src, - dst, - .. - } => { - let suffix = if *size == OperandSize::Size64 { - "sd" - } else { - "ss" - }; - let src = src.show_rru_sized(mb_rru, size.to_bytes()); - let dst = show_ireg_sized(dst.to_reg().to_reg(), mb_rru, size.to_bytes()); - format!( - "j{} $check; mov{} {}, {}; $check: j{} $next; mov{} {}, {}; $next", - cc1.invert().to_string(), - suffix, - src, - dst, - cc2.invert().to_string(), - suffix, - src, - dst, - ) - } - Inst::Push64 { src } => { format!("{} {}", ljustify("pushq".to_string()), src.show_rru(mb_rru)) } @@ -2086,11 +2027,6 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { consequent: src, dst, .. - } - | Inst::CmoveOr { - consequent: src, - dst, - .. } => { src.get_regs_as_uses(collector); collector.add_mod(dst.to_writable_reg()); @@ -2099,11 +2035,6 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { consequent: src, dst, .. - } - | Inst::XmmCmoveOr { - consequent: src, - dst, - .. } => { src.get_regs_as_uses(collector); collector.add_mod(dst.to_writable_reg()); @@ -2554,12 +2485,6 @@ pub(crate) fn x64_map_regs(inst: &mut Inst, mapper: &RM) { ref mut dst, ref mut alternative, .. - } - | Inst::CmoveOr { - consequent: ref mut src, - ref mut dst, - ref mut alternative, - .. } => { src.map_uses(mapper); dst.map_mod(mapper); @@ -2570,12 +2495,6 @@ pub(crate) fn x64_map_regs(inst: &mut Inst, mapper: &RM) { ref mut dst, ref mut alternative, .. - } - | Inst::XmmCmoveOr { - consequent: ref mut src, - ref mut dst, - ref mut alternative, - .. } => { src.map_uses(mapper); dst.map_mod(mapper); diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index 6efe439484..bc8b25db0c 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -1467,22 +1467,22 @@ ;; - `CC.BE -> C = 1 OR Z = 1` (below or equal) ;; - `CC.NBE -> C = 0 AND Z = 0` (not below or equal) -(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.Ordered) a b)) x y))) +(rule (lower (has_type ty (select (fcmp (FloatCC.Ordered) a b) x y))) (with_flags (fpcmp b a) (cmove_from_values ty (CC.NP) x y))) -(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.Unordered) a b)) x y))) +(rule (lower (has_type ty (select (fcmp (FloatCC.Unordered) a b) x y))) (with_flags (fpcmp b a) (cmove_from_values ty (CC.P) x y))) -(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.GreaterThan) a b)) x y))) +(rule (lower (has_type ty (select (fcmp (FloatCC.GreaterThan) a b) x y))) (with_flags (fpcmp b a) (cmove_from_values ty (CC.NBE) x y))) -(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.GreaterThanOrEqual) a b)) x y))) +(rule (lower (has_type ty (select (fcmp (FloatCC.GreaterThanOrEqual) a b) x y))) (with_flags (fpcmp b a) (cmove_from_values ty (CC.NB) x y))) -(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.UnorderedOrLessThan) a b)) x y))) +(rule (lower (has_type ty (select (fcmp (FloatCC.UnorderedOrLessThan) a b) x y))) (with_flags (fpcmp b a) (cmove_from_values ty (CC.B) x y))) -(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.UnorderedOrLessThanOrEqual) a b)) x y))) +(rule (lower (has_type ty (select (fcmp (FloatCC.UnorderedOrLessThanOrEqual) a b) x y))) (with_flags (fpcmp b a) (cmove_from_values ty (CC.BE) x y))) ;; Certain FloatCC variants are implemented by flipping the operands of the @@ -1496,16 +1496,16 @@ ;; not `LT | UNO`. By flipping the operands AND inverting the comparison (e.g., ;; to `CC.NBE`), we also avoid these unordered cases. -(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.LessThan) a b)) x y))) +(rule (lower (has_type ty (select (fcmp (FloatCC.LessThan) a b) x y))) (with_flags (fpcmp a b) (cmove_from_values ty (CC.NBE) x y))) -(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.LessThanOrEqual) a b)) x y))) +(rule (lower (has_type ty (select (fcmp (FloatCC.LessThanOrEqual) a b) x y))) (with_flags (fpcmp a b) (cmove_from_values ty (CC.NB) x y))) -(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.UnorderedOrGreaterThan) a b)) x y))) +(rule (lower (has_type ty (select (fcmp (FloatCC.UnorderedOrGreaterThan) a b) x y))) (with_flags (fpcmp a b) (cmove_from_values ty (CC.B) x y))) -(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.UnorderedOrGreaterThanOrEqual) a b)) x y))) +(rule (lower (has_type ty (select (fcmp (FloatCC.UnorderedOrGreaterThanOrEqual) a b) x y))) (with_flags (fpcmp a b) (cmove_from_values ty (CC.BE) x y))) ;; `FloatCC.Equal` and `FloatCC.NotEqual` can only be implemented with multiple @@ -1521,8 +1521,341 @@ ;; More details about the CLIF semantics for `fcmp` are available at ;; https://docs.rs/cranelift-codegen/latest/cranelift_codegen/ir/trait.InstBuilder.html#method.fcmp. -(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.Equal) a b)) x y))) +(rule (lower (has_type ty (select (fcmp (FloatCC.Equal) a b) x y))) (with_flags (fpcmp a b) (cmove_or_from_values ty (CC.NZ) (CC.P) y x))) -(rule (lower (has_type ty (select (def_inst (fcmp (FloatCC.NotEqual) a b)) x y))) +(rule (lower (has_type ty (select (fcmp (FloatCC.NotEqual) a b) x y))) (with_flags (fpcmp a b) (cmove_or_from_values ty (CC.NZ) (CC.P) x y))) + +;; Rules for `clz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; If available, we can use a plain lzcnt instruction here. Note no +;; special handling is required for zero inputs, because the machine +;; instruction does what the CLIF expects for zero, i.e. it returns +;; zero. +(rule 1 (lower + (has_type (and + (ty_32_or_64 ty) + (use_lzcnt)) + (clz src))) + (lzcnt ty src)) + +(rule (lower + (has_type (ty_32_or_64 ty) + (clz src))) + (do_clz ty ty src)) + +(rule (lower + (has_type (ty_8_or_16 ty) + (clz src))) + (do_clz $I32 ty (extend_to_gpr src $I32 (ExtendKind.Zero)))) + +(rule (lower + (has_type $I128 + (clz src))) + (let ((upper Gpr (do_clz $I64 $I64 (value_regs_get_gpr src 1))) + (lower Gpr (add $I64 + (do_clz $I64 $I64 (value_regs_get_gpr src 0)) + (RegMemImm.Imm 64))) + (result_lo Gpr + (with_flags_reg + (cmp_imm (OperandSize.Size64) 64 upper) + (cmove $I64 (CC.NZ) upper lower)))) + (value_regs result_lo (imm $I64 0)))) + +;; Implementation helper for clz; operates on 32 or 64-bit units. +(decl do_clz (Type Type Gpr) Gpr) +(rule (do_clz ty orig_ty src) + (let ((highest_bit_index Reg (bsr_or_else ty src (imm_i64 $I64 -1))) + (bits_minus_1 Reg (imm ty (u64_sub (ty_bits_u64 orig_ty) 1)))) + (sub ty bits_minus_1 highest_bit_index))) + +;; Rules for `ctz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Analogous to `clz` cases above, but using mirror instructions +;; (tzcnt vs lzcnt, bsf vs bsr). + +(rule 1 (lower + (has_type (and + (ty_32_or_64 ty) + (use_bmi1)) + (ctz src))) + (tzcnt ty src)) + +(rule (lower + (has_type (ty_32_or_64 ty) + (ctz src))) + (do_ctz ty ty src)) + +(rule (lower + (has_type (ty_8_or_16 ty) + (ctz src))) + (do_ctz $I32 ty (extend_to_gpr src $I32 (ExtendKind.Zero)))) + +(rule (lower + (has_type $I128 + (ctz src))) + (let ((lower Gpr (do_ctz $I64 $I64 (value_regs_get_gpr src 0))) + (upper Gpr (add $I64 + (do_ctz $I64 $I64 (value_regs_get_gpr src 1)) + (RegMemImm.Imm 64))) + (result_lo Gpr + (with_flags_reg + (cmp_imm (OperandSize.Size64) 64 lower) + (cmove $I64 (CC.Z) upper lower)))) + (value_regs result_lo (imm $I64 0)))) + +(decl do_ctz (Type Type Gpr) Gpr) +(rule (do_ctz ty orig_ty src) + (bsf_or_else ty src (imm $I64 (ty_bits_u64 orig_ty)))) + +;; Rules for `popcnt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 1 (lower + (has_type (and + (ty_32_or_64 ty) + (use_popcnt)) + (popcnt src))) + (x64_popcnt ty src)) + +(rule 1 (lower + (has_type (and + (ty_8_or_16 ty) + (use_popcnt)) + (popcnt src))) + (x64_popcnt $I32 (extend_to_gpr src $I32 (ExtendKind.Zero)))) + +(rule 1 (lower + (has_type (and + $I128 + (use_popcnt)) + (popcnt src))) + (let ((lo_count Gpr (x64_popcnt $I64 (value_regs_get_gpr src 0))) + (hi_count Gpr (x64_popcnt $I64 (value_regs_get_gpr src 1)))) + (value_regs (add $I64 lo_count hi_count) (imm $I64 0)))) + +(rule (lower + (has_type (ty_32_or_64 ty) + (popcnt src))) + (do_popcnt ty src)) + +(rule (lower + (has_type (ty_8_or_16 ty) + (popcnt src))) + (do_popcnt $I32 (extend_to_gpr src $I32 (ExtendKind.Zero)))) + +(rule (lower + (has_type $I128 + (popcnt src))) + (let ((lo_count Gpr (do_popcnt $I64 (value_regs_get_gpr src 0))) + (hi_count Gpr (do_popcnt $I64 (value_regs_get_gpr src 1)))) + (value_regs (add $I64 lo_count hi_count) (imm $I64 0)))) + +;; Implementation of popcount when we don't nave a native popcount +;; instruction. +(decl do_popcnt (Type Gpr) Gpr) +(rule (do_popcnt $I64 src) + (let ((shifted1 Gpr (shr $I64 src (Imm8Reg.Imm8 1))) + (sevens Gpr (imm $I64 0x7777777777777777)) + (masked1 Gpr (x64_and $I64 shifted1 sevens)) + ;; diff1 := src - ((src >> 1) & 0b0111_0111_0111...) + (diff1 Gpr (sub $I64 src masked1)) + (shifted2 Gpr (shr $I64 masked1 (Imm8Reg.Imm8 1))) + (masked2 Gpr (x64_and $I64 shifted2 sevens)) + ;; diff2 := diff1 - ((diff1 >> 1) & 0b0111_0111_0111...) + (diff2 Gpr (sub $I64 diff1 masked2)) + (shifted3 Gpr (shr $I64 masked2 (Imm8Reg.Imm8 1))) + (masked3 Gpr (x64_and $I64 shifted3 sevens)) + ;; diff3 := diff2 - ((diff2 >> 1) & 0b0111_0111_0111...) + ;; + ;; At this point, each nibble of diff3 is the popcount of + ;; that nibble. This works because at each step above, we + ;; are basically subtracting floor(value / 2) from the + ;; running value; the leftover remainder is 1 if the LSB + ;; was 1. After three steps, we have (nibble / 8) -- 0 or + ;; 1 for the MSB of the nibble -- plus three possible + ;; additions for the three other bits. + (diff3 Gpr (sub $I64 diff2 masked3)) + ;; Add the two nibbles of each byte together. + (sum1 Gpr (add $I64 + (shr $I64 diff3 (Imm8Reg.Imm8 4)) + diff3)) + ;; Mask the above sum to have the popcount for each byte + ;; in the lower nibble of that byte. + (ofof Gpr (imm $I64 0x0f0f0f0f0f0f0f0f)) + (masked4 Gpr (x64_and $I64 sum1 ofof)) + (ones Gpr (imm $I64 0x0101010101010101)) + ;; Use a multiply to sum all of the bytes' popcounts into + ;; the top byte. Consider the binomial expansion for the + ;; top byte: it is the sum of the bytes (masked4 >> 56) * + ;; 0x01 + (masked4 >> 48) * 0x01 + (masked4 >> 40) * 0x01 + ;; + ... + (masked4 >> 0). + (mul Gpr (mul $I64 masked4 ones)) + ;; Now take that top byte and return it as the popcount. + (final Gpr (shr $I64 mul (Imm8Reg.Imm8 56)))) + final)) + +;; This is the 32-bit version of the above; the steps for each nibble +;; are the same, we just use constants half as wide. +(rule (do_popcnt $I32 src) + (let ((shifted1 Gpr (shr $I32 src (Imm8Reg.Imm8 1))) + (sevens Gpr (imm $I32 0x77777777)) + (masked1 Gpr (x64_and $I32 shifted1 sevens)) + (diff1 Gpr (sub $I32 src masked1)) + (shifted2 Gpr (shr $I32 masked1 (Imm8Reg.Imm8 1))) + (masked2 Gpr (x64_and $I32 shifted2 sevens)) + (diff2 Gpr (sub $I32 diff1 masked2)) + (shifted3 Gpr (shr $I32 masked2 (Imm8Reg.Imm8 1))) + (masked3 Gpr (x64_and $I32 shifted3 sevens)) + (diff3 Gpr (sub $I32 diff2 masked3)) + (sum1 Gpr (add $I32 + (shr $I32 diff3 (Imm8Reg.Imm8 4)) + diff3)) + (masked4 Gpr (x64_and $I32 sum1 (RegMemImm.Imm 0x0f0f0f0f))) + (mul Gpr (mul $I32 masked4 (RegMemImm.Imm 0x01010101))) + (final Gpr (shr $I32 mul (Imm8Reg.Imm8 24)))) + final)) + + +(rule 1 (lower (has_type (and + $I8X16 + (avx512vl_enabled) + (avx512bitalg_enabled)) + (popcnt src))) + (vpopcntb src)) + + + +;; For SSE 4.2 we use Mula's algorithm (https://arxiv.org/pdf/1611.07612.pdf): +;; +;; __m128i count_bytes ( __m128i v) { +;; __m128i lookup = _mm_setr_epi8(0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4); +;; __m128i low_mask = _mm_set1_epi8 (0x0f); +;; __m128i lo = _mm_and_si128 (v, low_mask); +;; __m128i hi = _mm_and_si128 (_mm_srli_epi16 (v, 4), low_mask); +;; __m128i cnt1 = _mm_shuffle_epi8 (lookup, lo); +;; __m128i cnt2 = _mm_shuffle_epi8 (lookup, hi); +;; return _mm_add_epi8 (cnt1, cnt2); +;; } +;; +;; Details of the above algorithm can be found in the reference noted above, but the basics +;; are to create a lookup table that pre populates the popcnt values for each number [0,15]. +;; The algorithm uses shifts to isolate 4 bit sections of the vector, pshufb as part of the +;; lookup process, and adds together the results. +;; +;; __m128i lookup = _mm_setr_epi8(0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4); + +(decl popcount_4bit_table () VCodeConstant) ;; bits-per-nibble table `lookup` above +(extern constructor popcount_4bit_table popcount_4bit_table) + +(decl popcount_low_mask () VCodeConstant) ;; mask for low nibbles: 0x0f * 16 +(extern constructor popcount_low_mask popcount_low_mask) + +(rule (lower (has_type $I8X16 + (popcnt src))) + (let ((nibble_table_const VCodeConstant (popcount_4bit_table)) + (low_mask Xmm (xmm_load_const $I8X16 (popcount_low_mask))) + (low_nibbles Xmm (sse_and $I8X16 src low_mask)) + ;; Note that this is a 16x8 shift, but that's OK; we mask + ;; off anything that traverses from one byte to the next + ;; with the low_mask below. + (shifted_src Xmm (psrlw src (RegMemImm.Imm 4))) + (high_nibbles Xmm (sse_and $I8X16 shifted_src low_mask)) + (lookup Xmm (xmm_load_const $I8X16 (popcount_4bit_table))) + (bit_counts_low Xmm (pshufb lookup low_nibbles)) + (bit_counts_high Xmm (pshufb lookup high_nibbles))) + (paddb bit_counts_low bit_counts_high))) + +;; Rules for `bitrev` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type $I8 (bitrev src))) + (do_bitrev8 $I32 src)) + +(rule (lower (has_type $I16 (bitrev src))) + (do_bitrev16 $I32 src)) + +(rule (lower (has_type $I32 (bitrev src))) + (do_bitrev32 $I32 src)) + +(rule (lower (has_type $I64 (bitrev src))) + (do_bitrev64 $I64 src)) + +(rule (lower (has_type $I128 (bitrev src))) + (value_regs + (do_bitrev64 $I64 (value_regs_get_gpr src 1)) + (do_bitrev64 $I64 (value_regs_get_gpr src 0)))) + +(decl do_bitrev8 (Type Gpr) Gpr) +(rule (do_bitrev8 ty src) + (let ((tymask u64 (ty_mask ty)) + (mask1 Gpr (imm ty (u64_and tymask 0x5555555555555555))) + (lo1 Gpr (x64_and ty src mask1)) + (hi1 Gpr (x64_and ty (shr ty src (Imm8Reg.Imm8 1)) mask1)) + (swap1 Gpr (or ty + (shl ty lo1 (Imm8Reg.Imm8 1)) + hi1)) + (mask2 Gpr (imm ty (u64_and tymask 0x3333333333333333))) + (lo2 Gpr (x64_and ty swap1 mask2)) + (hi2 Gpr (x64_and ty (shr ty swap1 (Imm8Reg.Imm8 2)) mask2)) + (swap2 Gpr (or ty + (shl ty lo2 (Imm8Reg.Imm8 2)) + hi2)) + (mask4 Gpr (imm ty (u64_and tymask 0x0f0f0f0f0f0f0f0f))) + (lo4 Gpr (x64_and ty swap2 mask4)) + (hi4 Gpr (x64_and ty (shr ty swap2 (Imm8Reg.Imm8 4)) mask4)) + (swap4 Gpr (or ty + (shl ty lo4 (Imm8Reg.Imm8 4)) + hi4))) + swap4)) + +(decl do_bitrev16 (Type Gpr) Gpr) +(rule (do_bitrev16 ty src) + (let ((src_ Gpr (do_bitrev8 ty src)) + (tymask u64 (ty_mask ty)) + (mask8 Gpr (imm ty (u64_and tymask 0x00ff00ff00ff00ff))) + (lo8 Gpr (x64_and ty src_ mask8)) + (hi8 Gpr (x64_and ty (shr ty src_ (Imm8Reg.Imm8 8)) mask8)) + (swap8 Gpr (or ty + (shl ty lo8 (Imm8Reg.Imm8 8)) + hi8))) + swap8)) + +(decl do_bitrev32 (Type Gpr) Gpr) +(rule (do_bitrev32 ty src) + (let ((src_ Gpr (do_bitrev16 ty src)) + (tymask u64 (ty_mask ty)) + (mask16 Gpr (imm ty (u64_and tymask 0x0000ffff0000ffff))) + (lo16 Gpr (x64_and ty src_ mask16)) + (hi16 Gpr (x64_and ty (shr ty src_ (Imm8Reg.Imm8 16)) mask16)) + (swap16 Gpr (or ty + (shl ty lo16 (Imm8Reg.Imm8 16)) + hi16))) + swap16)) + +(decl do_bitrev64 (Type Gpr) Gpr) +(rule (do_bitrev64 ty @ $I64 src) + (let ((src_ Gpr (do_bitrev32 ty src)) + (mask32 Gpr (imm ty 0xffffffff)) + (lo32 Gpr (x64_and ty src_ mask32)) + (hi32 Gpr (shr ty src_ (Imm8Reg.Imm8 32))) + (swap32 Gpr (or ty + (shl ty lo32 (Imm8Reg.Imm8 32)) + hi32))) + swap32)) + +;; Rules for `is_null` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Null references are represented by the constant value `0`. +(rule (lower (is_null src @ (value_type $R64))) + (with_flags + (cmp_imm (OperandSize.Size64) 0 src) + (setcc (CC.Z)))) + +;; Rules for `is_invalid` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Null references are represented by the constant value `-1`. +(rule (lower (is_invalid src @ (value_type $R64))) + (with_flags + (cmp_imm (OperandSize.Size64) 0xffffffff src) ;; simm32 0xffff_ffff is sign-extended to -1. + (setcc (CC.Z)))) + diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index d5de080132..024556ee1f 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -21,7 +21,7 @@ use alloc::boxed::Box; use alloc::vec::Vec; use log::trace; use regalloc::{Reg, RegClass, Writable}; -use smallvec::{smallvec, SmallVec}; +use smallvec::SmallVec; use std::convert::TryFrom; use target_lexicon::Triple; @@ -615,269 +615,6 @@ fn emit_fcmp>( cond_result } -fn emit_bitrev>(ctx: &mut C, src: Reg, dst: Writable, ty: Type) { - let bits = ty.bits(); - let const_mask = if bits == 64 { - 0xffff_ffff_ffff_ffff - } else { - (1u64 << bits) - 1 - }; - let tmp0 = ctx.alloc_tmp(types::I64).only_reg().unwrap(); - let tmp1 = ctx.alloc_tmp(types::I64).only_reg().unwrap(); - let tmp2 = ctx.alloc_tmp(types::I64).only_reg().unwrap(); - - ctx.emit(Inst::gen_move(tmp0, src, types::I64)); - - // Swap 1-bit units. - // tmp1 = src - ctx.emit(Inst::gen_move(tmp1, tmp0.to_reg(), types::I64)); - // tmp2 = 0b0101.. - ctx.emit(Inst::imm( - OperandSize::Size64, - 0x5555_5555_5555_5555 & const_mask, - tmp2, - )); - // tmp1 = src >> 1 - ctx.emit(Inst::shift_r( - OperandSize::Size64, - ShiftKind::ShiftRightLogical, - Some(1), - tmp1, - )); - // tmp1 = (src >> 1) & 0b0101.. - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::And, - RegMemImm::reg(tmp2.to_reg()), - tmp1, - )); - // tmp2 = src & 0b0101.. - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::And, - RegMemImm::reg(tmp0.to_reg()), - tmp2, - )); - // tmp2 = (src & 0b0101..) << 1 - ctx.emit(Inst::shift_r( - OperandSize::Size64, - ShiftKind::ShiftLeft, - Some(1), - tmp2, - )); - // tmp0 = (src >> 1) & 0b0101.. | (src & 0b0101..) << 1 - ctx.emit(Inst::gen_move(tmp0, tmp2.to_reg(), types::I64)); - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::Or, - RegMemImm::reg(tmp1.to_reg()), - tmp0, - )); - - // Swap 2-bit units. - ctx.emit(Inst::gen_move(tmp1, tmp0.to_reg(), types::I64)); - ctx.emit(Inst::imm( - OperandSize::Size64, - 0x3333_3333_3333_3333 & const_mask, - tmp2, - )); - ctx.emit(Inst::shift_r( - OperandSize::Size64, - ShiftKind::ShiftRightLogical, - Some(2), - tmp1, - )); - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::And, - RegMemImm::reg(tmp2.to_reg()), - tmp1, - )); - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::And, - RegMemImm::reg(tmp0.to_reg()), - tmp2, - )); - ctx.emit(Inst::shift_r( - OperandSize::Size64, - ShiftKind::ShiftLeft, - Some(2), - tmp2, - )); - ctx.emit(Inst::gen_move(tmp0, tmp2.to_reg(), types::I64)); - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::Or, - RegMemImm::reg(tmp1.to_reg()), - tmp0, - )); - - // Swap 4-bit units. - ctx.emit(Inst::gen_move(tmp1, tmp0.to_reg(), types::I64)); - ctx.emit(Inst::imm( - OperandSize::Size64, - 0x0f0f_0f0f_0f0f_0f0f & const_mask, - tmp2, - )); - ctx.emit(Inst::shift_r( - OperandSize::Size64, - ShiftKind::ShiftRightLogical, - Some(4), - tmp1, - )); - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::And, - RegMemImm::reg(tmp2.to_reg()), - tmp1, - )); - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::And, - RegMemImm::reg(tmp0.to_reg()), - tmp2, - )); - ctx.emit(Inst::shift_r( - OperandSize::Size64, - ShiftKind::ShiftLeft, - Some(4), - tmp2, - )); - ctx.emit(Inst::gen_move(tmp0, tmp2.to_reg(), types::I64)); - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::Or, - RegMemImm::reg(tmp1.to_reg()), - tmp0, - )); - - if bits > 8 { - // Swap 8-bit units. - ctx.emit(Inst::gen_move(tmp1, tmp0.to_reg(), types::I64)); - ctx.emit(Inst::imm( - OperandSize::Size64, - 0x00ff_00ff_00ff_00ff & const_mask, - tmp2, - )); - ctx.emit(Inst::shift_r( - OperandSize::Size64, - ShiftKind::ShiftRightLogical, - Some(8), - tmp1, - )); - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::And, - RegMemImm::reg(tmp2.to_reg()), - tmp1, - )); - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::And, - RegMemImm::reg(tmp0.to_reg()), - tmp2, - )); - ctx.emit(Inst::shift_r( - OperandSize::Size64, - ShiftKind::ShiftLeft, - Some(8), - tmp2, - )); - ctx.emit(Inst::gen_move(tmp0, tmp2.to_reg(), types::I64)); - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::Or, - RegMemImm::reg(tmp1.to_reg()), - tmp0, - )); - } - - if bits > 16 { - // Swap 16-bit units. - ctx.emit(Inst::gen_move(tmp1, tmp0.to_reg(), types::I64)); - ctx.emit(Inst::imm( - OperandSize::Size64, - 0x0000_ffff_0000_ffff & const_mask, - tmp2, - )); - ctx.emit(Inst::shift_r( - OperandSize::Size64, - ShiftKind::ShiftRightLogical, - Some(16), - tmp1, - )); - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::And, - RegMemImm::reg(tmp2.to_reg()), - tmp1, - )); - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::And, - RegMemImm::reg(tmp0.to_reg()), - tmp2, - )); - ctx.emit(Inst::shift_r( - OperandSize::Size64, - ShiftKind::ShiftLeft, - Some(16), - tmp2, - )); - ctx.emit(Inst::gen_move(tmp0, tmp2.to_reg(), types::I64)); - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::Or, - RegMemImm::reg(tmp1.to_reg()), - tmp0, - )); - } - - if bits > 32 { - // Swap 32-bit units. - ctx.emit(Inst::gen_move(tmp1, tmp0.to_reg(), types::I64)); - ctx.emit(Inst::imm( - OperandSize::Size64, - 0x0000_0000_ffff_ffff & const_mask, - tmp2, - )); - ctx.emit(Inst::shift_r( - OperandSize::Size64, - ShiftKind::ShiftRightLogical, - Some(32), - tmp1, - )); - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::And, - RegMemImm::reg(tmp2.to_reg()), - tmp1, - )); - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::And, - RegMemImm::reg(tmp0.to_reg()), - tmp2, - )); - ctx.emit(Inst::shift_r( - OperandSize::Size64, - ShiftKind::ShiftLeft, - Some(32), - tmp2, - )); - ctx.emit(Inst::gen_move(tmp0, tmp2.to_reg(), types::I64)); - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::Or, - RegMemImm::reg(tmp1.to_reg()), - tmp0, - )); - } - - ctx.emit(Inst::gen_move(dst, tmp0.to_reg(), types::I64)); -} - fn make_libcall_sig>( ctx: &mut C, insn: IRInst, @@ -1102,75 +839,6 @@ fn emit_cmoves>( } } -fn emit_clz>( - ctx: &mut C, - orig_ty: Type, - ty: Type, - src: Reg, - dst: Writable, -) { - let src = RegMem::reg(src); - let tmp = ctx.alloc_tmp(ty).only_reg().unwrap(); - ctx.emit(Inst::imm(OperandSize::from_ty(ty), u64::max_value(), dst)); - - ctx.emit(Inst::unary_rm_r( - OperandSize::from_ty(ty), - UnaryRmROpcode::Bsr, - src, - tmp, - )); - - ctx.emit(Inst::cmove( - OperandSize::from_ty(ty), - CC::Z, - RegMem::reg(dst.to_reg()), - tmp, - )); - - ctx.emit(Inst::imm( - OperandSize::from_ty(ty), - orig_ty.bits() as u64 - 1, - dst, - )); - - ctx.emit(Inst::alu_rmi_r( - if ty == types::I64 { - OperandSize::Size64 - } else { - OperandSize::Size32 - }, - AluRmiROpcode::Sub, - RegMemImm::reg(tmp.to_reg()), - dst, - )); -} - -fn emit_ctz>( - ctx: &mut C, - orig_ty: Type, - ty: Type, - src: Reg, - dst: Writable, -) { - let src = RegMem::reg(src); - let tmp = ctx.alloc_tmp(ty).only_reg().unwrap(); - ctx.emit(Inst::imm(OperandSize::Size32, orig_ty.bits() as u64, tmp)); - - ctx.emit(Inst::unary_rm_r( - OperandSize::from_ty(ty), - UnaryRmROpcode::Bsf, - src, - dst, - )); - - ctx.emit(Inst::cmove( - OperandSize::from_ty(ty), - CC::Z, - RegMem::reg(tmp.to_reg()), - dst, - )); -} - //============================================================================= // Top-level instruction lowering entry point, for one instruction. @@ -1243,686 +911,13 @@ fn lower_insn_to_regs>( | Opcode::Rotr | Opcode::Ineg | Opcode::Trap - | Opcode::ResumableTrap => implemented_in_isle(ctx), - - Opcode::Clz => { - let orig_ty = ty.unwrap(); - - if isa_flags.use_lzcnt() && (orig_ty == types::I32 || orig_ty == types::I64) { - // We can use a plain lzcnt instruction here. Note no special handling is required - // for zero inputs, because the machine instruction does what the CLIF expects for - // zero, i.e. it returns zero. - let src = input_to_reg_mem(ctx, inputs[0]); - let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - ctx.emit(Inst::unary_rm_r( - OperandSize::from_ty(orig_ty), - UnaryRmROpcode::Lzcnt, - src, - dst, - )); - return Ok(()); - } - - // General formula using bit-scan reverse (BSR): - // mov -1, %dst - // bsr %src, %tmp - // cmovz %dst, %tmp - // mov $(size_bits - 1), %dst - // sub %tmp, %dst - - if orig_ty == types::I128 { - // clz upper, tmp1 - // clz lower, dst - // add dst, 64 - // cmp tmp1, 64 - // cmovnz tmp1, dst - let dsts = get_output_reg(ctx, outputs[0]); - let dst = dsts.regs()[0]; - let tmp1 = ctx.alloc_tmp(types::I64).only_reg().unwrap(); - let srcs = put_input_in_regs(ctx, inputs[0]); - let src_lo = srcs.regs()[0]; - let src_hi = srcs.regs()[1]; - emit_clz(ctx, types::I64, types::I64, src_hi, tmp1); - emit_clz(ctx, types::I64, types::I64, src_lo, dst); - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::Add, - RegMemImm::imm(64), - dst, - )); - ctx.emit(Inst::cmp_rmi_r( - OperandSize::Size64, - RegMemImm::imm(64), - tmp1.to_reg(), - )); - ctx.emit(Inst::cmove( - OperandSize::Size64, - CC::NZ, - RegMem::reg(tmp1.to_reg()), - dst, - )); - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::Xor, - RegMemImm::reg(dsts.regs()[1].to_reg()), - dsts.regs()[1], - )); - } else { - let (ext_spec, ty) = match orig_ty { - types::I8 | types::I16 => (Some(ExtSpec::ZeroExtendTo32), types::I32), - a if a == types::I32 || a == types::I64 => (None, a), - _ => unreachable!(), - }; - let src = if let Some(ext_spec) = ext_spec { - extend_input_to_reg(ctx, inputs[0], ext_spec) - } else { - put_input_in_reg(ctx, inputs[0]) - }; - - let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - emit_clz(ctx, orig_ty, ty, src, dst); - } - } - - Opcode::Ctz => { - let orig_ty = ctx.input_ty(insn, 0); - - if isa_flags.use_bmi1() && (orig_ty == types::I32 || orig_ty == types::I64) { - // We can use a plain tzcnt instruction here. Note no special handling is required - // for zero inputs, because the machine instruction does what the CLIF expects for - // zero, i.e. it returns zero. - let src = input_to_reg_mem(ctx, inputs[0]); - let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - ctx.emit(Inst::unary_rm_r( - OperandSize::from_ty(orig_ty), - UnaryRmROpcode::Tzcnt, - src, - dst, - )); - return Ok(()); - } - - // General formula using bit-scan forward (BSF): - // bsf %src, %dst - // mov $(size_bits), %tmp - // cmovz %tmp, %dst - if orig_ty == types::I128 { - // ctz src_lo, dst - // ctz src_hi, tmp1 - // add tmp1, 64 - // cmp dst, 64 - // cmovz tmp1, dst - let dsts = get_output_reg(ctx, outputs[0]); - let dst = dsts.regs()[0]; - let tmp1 = ctx.alloc_tmp(types::I64).only_reg().unwrap(); - let srcs = put_input_in_regs(ctx, inputs[0]); - let src_lo = srcs.regs()[0]; - let src_hi = srcs.regs()[1]; - emit_ctz(ctx, types::I64, types::I64, src_lo, dst); - emit_ctz(ctx, types::I64, types::I64, src_hi, tmp1); - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::Add, - RegMemImm::imm(64), - tmp1, - )); - ctx.emit(Inst::cmp_rmi_r( - OperandSize::Size64, - RegMemImm::imm(64), - dst.to_reg(), - )); - ctx.emit(Inst::cmove( - OperandSize::Size64, - CC::Z, - RegMem::reg(tmp1.to_reg()), - dst, - )); - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::Xor, - RegMemImm::reg(dsts.regs()[1].to_reg()), - dsts.regs()[1], - )); - } else { - let ty = if orig_ty.bits() < 32 { - types::I32 - } else { - orig_ty - }; - debug_assert!(ty == types::I32 || ty == types::I64); - - let src = put_input_in_reg(ctx, inputs[0]); - let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - emit_ctz(ctx, orig_ty, ty, src, dst); - } - } - - Opcode::Popcnt => { - let ty_tmp = ty.unwrap(); - if !ty_tmp.is_vector() { - let ty = ctx.input_ty(insn, 0); - - if isa_flags.use_popcnt() { - match ty { - types::I8 | types::I16 => { - let src = RegMem::reg(extend_input_to_reg( - ctx, - inputs[0], - ExtSpec::ZeroExtendTo32, - )); - let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - ctx.emit(Inst::unary_rm_r( - OperandSize::from_ty(types::I32), - UnaryRmROpcode::Popcnt, - src, - dst, - )); - return Ok(()); - } - types::I32 | types::I64 => { - let src = input_to_reg_mem(ctx, inputs[0]); - let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - ctx.emit(Inst::unary_rm_r( - OperandSize::from_ty(ty), - UnaryRmROpcode::Popcnt, - src, - dst, - )); - return Ok(()); - } - - types::I128 => { - // The number of ones in a 128-bits value is the plain sum of the number of - // ones in its low and high parts. No risk of overflow here. - let dsts = get_output_reg(ctx, outputs[0]); - let dst = dsts.regs()[0]; - let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap(); - let srcs = put_input_in_regs(ctx, inputs[0]); - let src_lo = srcs.regs()[0]; - let src_hi = srcs.regs()[1]; - - ctx.emit(Inst::unary_rm_r( - OperandSize::Size64, - UnaryRmROpcode::Popcnt, - RegMem::reg(src_lo), - dst, - )); - ctx.emit(Inst::unary_rm_r( - OperandSize::Size64, - UnaryRmROpcode::Popcnt, - RegMem::reg(src_hi), - tmp, - )); - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::Add, - RegMemImm::reg(tmp.to_reg()), - dst, - )); - - // Zero the result's high component. - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::Xor, - RegMemImm::reg(dsts.regs()[1].to_reg()), - dsts.regs()[1], - )); - - return Ok(()); - } - _ => {} - } - } - - let (ext_spec, ty) = match ty { - types::I8 | types::I16 => (Some(ExtSpec::ZeroExtendTo32), types::I32), - a if a == types::I32 || a == types::I64 || a == types::I128 => (None, a), - _ => unreachable!(), - }; - - let (srcs, ty): (SmallVec<[RegMem; 2]>, Type) = if let Some(ext_spec) = ext_spec { - ( - smallvec![RegMem::reg(extend_input_to_reg(ctx, inputs[0], ext_spec))], - ty, - ) - } else if ty == types::I128 { - let regs = put_input_in_regs(ctx, inputs[0]); - ( - smallvec![RegMem::reg(regs.regs()[0]), RegMem::reg(regs.regs()[1])], - types::I64, - ) - } else { - // N.B.: explicitly put input in a reg here because the width of the instruction - // into which this RM op goes may not match the width of the input type (in fact, - // it won't for i32.popcnt), and we don't want a larger than necessary load. - (smallvec![RegMem::reg(put_input_in_reg(ctx, inputs[0]))], ty) - }; - - let mut dsts: SmallVec<[Reg; 2]> = smallvec![]; - for src in srcs { - let dst = ctx.alloc_tmp(types::I64).only_reg().unwrap(); - dsts.push(dst.to_reg()); - if ty == types::I64 { - let tmp1 = ctx.alloc_tmp(types::I64).only_reg().unwrap(); - let tmp2 = ctx.alloc_tmp(types::I64).only_reg().unwrap(); - let cst = ctx.alloc_tmp(types::I64).only_reg().unwrap(); - - // mov src, tmp1 - ctx.emit(Inst::mov64_rm_r(src.clone(), tmp1)); - - // shr $1, tmp1 - ctx.emit(Inst::shift_r( - OperandSize::Size64, - ShiftKind::ShiftRightLogical, - Some(1), - tmp1, - )); - - // mov 0x7777_7777_7777_7777, cst - ctx.emit(Inst::imm(OperandSize::Size64, 0x7777777777777777, cst)); - - // andq cst, tmp1 - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::And, - RegMemImm::reg(cst.to_reg()), - tmp1, - )); - - // mov src, tmp2 - ctx.emit(Inst::mov64_rm_r(src, tmp2)); - - // sub tmp1, tmp2 - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::Sub, - RegMemImm::reg(tmp1.to_reg()), - tmp2, - )); - - // shr $1, tmp1 - ctx.emit(Inst::shift_r( - OperandSize::Size64, - ShiftKind::ShiftRightLogical, - Some(1), - tmp1, - )); - - // and cst, tmp1 - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::And, - RegMemImm::reg(cst.to_reg()), - tmp1, - )); - - // sub tmp1, tmp2 - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::Sub, - RegMemImm::reg(tmp1.to_reg()), - tmp2, - )); - - // shr $1, tmp1 - ctx.emit(Inst::shift_r( - OperandSize::Size64, - ShiftKind::ShiftRightLogical, - Some(1), - tmp1, - )); - - // and cst, tmp1 - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::And, - RegMemImm::reg(cst.to_reg()), - tmp1, - )); - - // sub tmp1, tmp2 - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::Sub, - RegMemImm::reg(tmp1.to_reg()), - tmp2, - )); - - // mov tmp2, dst - ctx.emit(Inst::mov64_rm_r(RegMem::reg(tmp2.to_reg()), dst)); - - // shr $4, dst - ctx.emit(Inst::shift_r( - OperandSize::Size64, - ShiftKind::ShiftRightLogical, - Some(4), - dst, - )); - - // add tmp2, dst - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::Add, - RegMemImm::reg(tmp2.to_reg()), - dst, - )); - - // mov $0x0F0F_0F0F_0F0F_0F0F, cst - ctx.emit(Inst::imm(OperandSize::Size64, 0x0F0F0F0F0F0F0F0F, cst)); - - // and cst, dst - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::And, - RegMemImm::reg(cst.to_reg()), - dst, - )); - - // mov $0x0101_0101_0101_0101, cst - ctx.emit(Inst::imm(OperandSize::Size64, 0x0101010101010101, cst)); - - // mul cst, dst - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::Mul, - RegMemImm::reg(cst.to_reg()), - dst, - )); - - // shr $56, dst - ctx.emit(Inst::shift_r( - OperandSize::Size64, - ShiftKind::ShiftRightLogical, - Some(56), - dst, - )); - } else { - assert_eq!(ty, types::I32); - - let tmp1 = ctx.alloc_tmp(types::I64).only_reg().unwrap(); - let tmp2 = ctx.alloc_tmp(types::I64).only_reg().unwrap(); - - // mov src, tmp1 - ctx.emit(Inst::mov64_rm_r(src.clone(), tmp1)); - - // shr $1, tmp1 - ctx.emit(Inst::shift_r( - OperandSize::Size32, - ShiftKind::ShiftRightLogical, - Some(1), - tmp1, - )); - - // andq $0x7777_7777, tmp1 - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size32, - AluRmiROpcode::And, - RegMemImm::imm(0x77777777), - tmp1, - )); - - // mov src, tmp2 - ctx.emit(Inst::mov64_rm_r(src, tmp2)); - - // sub tmp1, tmp2 - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size32, - AluRmiROpcode::Sub, - RegMemImm::reg(tmp1.to_reg()), - tmp2, - )); - - // shr $1, tmp1 - ctx.emit(Inst::shift_r( - OperandSize::Size32, - ShiftKind::ShiftRightLogical, - Some(1), - tmp1, - )); - - // and 0x7777_7777, tmp1 - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size32, - AluRmiROpcode::And, - RegMemImm::imm(0x77777777), - tmp1, - )); - - // sub tmp1, tmp2 - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size32, - AluRmiROpcode::Sub, - RegMemImm::reg(tmp1.to_reg()), - tmp2, - )); - - // shr $1, tmp1 - ctx.emit(Inst::shift_r( - OperandSize::Size32, - ShiftKind::ShiftRightLogical, - Some(1), - tmp1, - )); - - // and $0x7777_7777, tmp1 - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size32, - AluRmiROpcode::And, - RegMemImm::imm(0x77777777), - tmp1, - )); - - // sub tmp1, tmp2 - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size32, - AluRmiROpcode::Sub, - RegMemImm::reg(tmp1.to_reg()), - tmp2, - )); - - // mov tmp2, dst - ctx.emit(Inst::mov64_rm_r(RegMem::reg(tmp2.to_reg()), dst)); - - // shr $4, dst - ctx.emit(Inst::shift_r( - OperandSize::Size32, - ShiftKind::ShiftRightLogical, - Some(4), - dst, - )); - - // add tmp2, dst - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size32, - AluRmiROpcode::Add, - RegMemImm::reg(tmp2.to_reg()), - dst, - )); - - // and $0x0F0F_0F0F, dst - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size32, - AluRmiROpcode::And, - RegMemImm::imm(0x0F0F0F0F), - dst, - )); - - // mul $0x0101_0101, dst - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size32, - AluRmiROpcode::Mul, - RegMemImm::imm(0x01010101), - dst, - )); - - // shr $24, dst - ctx.emit(Inst::shift_r( - OperandSize::Size32, - ShiftKind::ShiftRightLogical, - Some(24), - dst, - )); - } - } - - if dsts.len() == 1 { - let final_dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - ctx.emit(Inst::gen_move(final_dst, dsts[0], types::I64)); - } else { - assert!(dsts.len() == 2); - let final_dst = get_output_reg(ctx, outputs[0]); - ctx.emit(Inst::gen_move(final_dst.regs()[0], dsts[0], types::I64)); - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::Add, - RegMemImm::reg(dsts[1]), - final_dst.regs()[0], - )); - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::Xor, - RegMemImm::reg(final_dst.regs()[1].to_reg()), - final_dst.regs()[1], - )); - } - } else { - // Lower `popcount` for vectors. - let ty = ty.unwrap(); - let src = put_input_in_reg(ctx, inputs[0]); - let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - - if isa_flags.use_avx512vl_simd() && isa_flags.use_avx512bitalg_simd() { - // When AVX512VL and AVX512BITALG are available, - // `popcnt.i8x16` can be lowered to a single instruction. - assert_eq!(ty, types::I8X16); - ctx.emit(Inst::xmm_unary_rm_r_evex( - Avx512Opcode::Vpopcntb, - RegMem::reg(src), - dst, - )); - } else { - // For SIMD 4.4 we use Mula's algorithm (https://arxiv.org/pdf/1611.07612.pdf) - // - //__m128i count_bytes ( __m128i v) { - // __m128i lookup = _mm_setr_epi8(0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4); - // __m128i low_mask = _mm_set1_epi8 (0x0f); - // __m128i lo = _mm_and_si128 (v, low_mask); - // __m128i hi = _mm_and_si128 (_mm_srli_epi16 (v, 4), low_mask); - // __m128i cnt1 = _mm_shuffle_epi8 (lookup, lo); - // __m128i cnt2 = _mm_shuffle_epi8 (lookup, hi); - // return _mm_add_epi8 (cnt1, cnt2); - //} - // - // Details of the above algorithm can be found in the reference noted above, but the basics - // are to create a lookup table that pre populates the popcnt values for each number [0,15]. - // The algorithm uses shifts to isolate 4 bit sections of the vector, pshufb as part of the - // lookup process, and adds together the results. - - // __m128i lookup = _mm_setr_epi8(0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4); - static POPCOUNT_4BIT: [u8; 16] = [ - 0x00, 0x01, 0x01, 0x02, 0x01, 0x02, 0x02, 0x03, 0x01, 0x02, 0x02, 0x03, - 0x02, 0x03, 0x03, 0x04, - ]; - let lookup = ctx.use_constant(VCodeConstantData::WellKnown(&POPCOUNT_4BIT)); - - // Create a mask for lower 4bits of each subword. - static LOW_MASK: [u8; 16] = [0x0F; 16]; - let low_mask_const = ctx.use_constant(VCodeConstantData::WellKnown(&LOW_MASK)); - let low_mask = ctx.alloc_tmp(types::I8X16).only_reg().unwrap(); - ctx.emit(Inst::xmm_load_const(low_mask_const, low_mask, ty)); - - // __m128i lo = _mm_and_si128 (v, low_mask); - let lo = ctx.alloc_tmp(types::I8X16).only_reg().unwrap(); - ctx.emit(Inst::gen_move(lo, low_mask.to_reg(), types::I8X16)); - ctx.emit(Inst::xmm_rm_r(SseOpcode::Pand, RegMem::reg(src), lo)); - - // __m128i hi = _mm_and_si128 (_mm_srli_epi16 (v, 4), low_mask); - ctx.emit(Inst::gen_move(dst, src, ty)); - ctx.emit(Inst::xmm_rmi_reg(SseOpcode::Psrlw, RegMemImm::imm(4), dst)); - let tmp = ctx.alloc_tmp(types::I8X16).only_reg().unwrap(); - ctx.emit(Inst::gen_move(tmp, low_mask.to_reg(), types::I8X16)); - ctx.emit(Inst::xmm_rm_r( - SseOpcode::Pand, - RegMem::reg(dst.to_reg()), - tmp, - )); - - // __m128i cnt1 = _mm_shuffle_epi8 (lookup, lo); - let tmp2 = ctx.alloc_tmp(types::I8X16).only_reg().unwrap(); - ctx.emit(Inst::xmm_load_const(lookup, tmp2, ty)); - ctx.emit(Inst::gen_move(dst, tmp2.to_reg(), types::I8X16)); - - ctx.emit(Inst::xmm_rm_r( - SseOpcode::Pshufb, - RegMem::reg(lo.to_reg()), - dst, - )); - - // __m128i cnt2 = _mm_shuffle_epi8 (lookup , hi) ; - ctx.emit(Inst::xmm_rm_r( - SseOpcode::Pshufb, - RegMem::reg(tmp.to_reg()), - tmp2, - )); - - // return _mm_add_epi8 (cnt1 , cnt2 ) ; - ctx.emit(Inst::xmm_rm_r( - SseOpcode::Paddb, - RegMem::reg(tmp2.to_reg()), - dst, - )); - } - } - } - - Opcode::Bitrev => { - let ty = ctx.input_ty(insn, 0); - assert!( - ty == types::I8 - || ty == types::I16 - || ty == types::I32 - || ty == types::I64 - || ty == types::I128 - ); - - if ty == types::I128 { - let src = put_input_in_regs(ctx, inputs[0]); - let dst = get_output_reg(ctx, outputs[0]); - emit_bitrev(ctx, src.regs()[0], dst.regs()[1], types::I64); - emit_bitrev(ctx, src.regs()[1], dst.regs()[0], types::I64); - } else { - let src = put_input_in_reg(ctx, inputs[0]); - let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - emit_bitrev(ctx, src, dst, ty); - } - } - - Opcode::IsNull | Opcode::IsInvalid => { - // Null references are represented by the constant value 0; invalid references are - // represented by the constant value -1. See `define_reftypes()` in - // `meta/src/isa/x86/encodings.rs` to confirm. - let src = put_input_in_reg(ctx, inputs[0]); - let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - let ty = ctx.input_ty(insn, 0); - let imm = match op { - Opcode::IsNull => { - // TODO could use tst src, src for IsNull - 0 - } - Opcode::IsInvalid => { - // We can do a 32-bit comparison even in 64-bits mode, as the constant is then - // sign-extended. - 0xffffffff - } - _ => unreachable!(), - }; - ctx.emit(Inst::cmp_rmi_r( - OperandSize::from_ty(ty), - RegMemImm::imm(imm), - src, - )); - ctx.emit(Inst::setcc(CC::Z, dst)); - } + | Opcode::ResumableTrap + | Opcode::Clz + | Opcode::Ctz + | Opcode::Popcnt + | Opcode::Bitrev + | Opcode::IsNull + | Opcode::IsInvalid => implemented_in_isle(ctx), Opcode::Uextend | Opcode::Sextend | Opcode::Breduce | Opcode::Bextend | Opcode::Ireduce => { let src_ty = ctx.input_ty(insn, 0); diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index a0ae8be657..c20bf3b1e1 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -171,6 +171,42 @@ where } } + #[inline] + fn avx512bitalg_enabled(&mut self, _: Type) -> Option<()> { + if self.isa_flags.use_avx512bitalg_simd() { + Some(()) + } else { + None + } + } + + #[inline] + fn use_lzcnt(&mut self, _: Type) -> Option<()> { + if self.isa_flags.use_lzcnt() { + Some(()) + } else { + None + } + } + + #[inline] + fn use_bmi1(&mut self, _: Type) -> Option<()> { + if self.isa_flags.use_bmi1() { + Some(()) + } else { + None + } + } + + #[inline] + fn use_popcnt(&mut self, _: Type) -> Option<()> { + if self.isa_flags.use_popcnt() { + Some(()) + } else { + None + } + } + #[inline] fn imm8_from_value(&mut self, val: Value) -> Option { let inst = self.lower_ctx.dfg().value_def(val).inst()?; @@ -326,6 +362,16 @@ where SyntheticAmode::ConstantOffset(mask_table) } + fn popcount_4bit_table(&mut self) -> VCodeConstant { + self.lower_ctx + .use_constant(VCodeConstantData::WellKnown(&POPCOUNT_4BIT_TABLE)) + } + + fn popcount_low_mask(&mut self) -> VCodeConstant { + self.lower_ctx + .use_constant(VCodeConstantData::WellKnown(&POPCOUNT_LOW_MASK)) + } + #[inline] fn writable_reg_to_xmm(&mut self, r: WritableReg) -> WritableXmm { Writable::from_reg(Xmm::new(r.to_reg()).unwrap()) @@ -499,6 +545,18 @@ const I8X16_USHR_MASKS: [u8; 128] = [ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, ]; +/// Number of bits set in a given nibble (4-bit value). Used in the +/// vector implementation of popcount. +#[rustfmt::skip] // Preserve 4x4 layout. +const POPCOUNT_4BIT_TABLE: [u8; 16] = [ + 0x00, 0x01, 0x01, 0x02, + 0x01, 0x02, 0x02, 0x03, + 0x01, 0x02, 0x02, 0x03, + 0x02, 0x03, 0x03, 0x04, +]; + +const POPCOUNT_LOW_MASK: [u8; 16] = [0x0f; 16]; + #[inline] fn to_simm32(constant: i64) -> Option { if constant == ((constant << 32) >> 32) { diff --git a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest index 3cef290a61..99952d3c15 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest +++ b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest @@ -1,4 +1,4 @@ src/clif.isle 9ea75a6f790b5c03 -src/prelude.isle 9830498351ddf6a3 -src/isa/x64/inst.isle 5ee89205e6e9a46b -src/isa/x64/lower.isle 348a808ea5de4cdb +src/prelude.isle 6b0160bfcac86902 +src/isa/x64/inst.isle 67eb719e568c2a81 +src/isa/x64/lower.isle 142626fe062fd7d7 diff --git a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs index 225f985bbb..8ffdff98a5 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs @@ -39,8 +39,14 @@ pub trait Context { fn u8_as_u64(&mut self, arg0: u8) -> u64; fn u16_as_u64(&mut self, arg0: u16) -> u64; fn u32_as_u64(&mut self, arg0: u32) -> u64; + fn i64_as_u64(&mut self, arg0: i64) -> u64; + fn u64_add(&mut self, arg0: u64, arg1: u64) -> u64; + fn u64_sub(&mut self, arg0: u64, arg1: u64) -> u64; + fn u64_and(&mut self, arg0: u64, arg1: u64) -> u64; fn ty_bits(&mut self, arg0: Type) -> u8; fn ty_bits_u16(&mut self, arg0: Type) -> u16; + fn ty_bits_u64(&mut self, arg0: Type) -> u64; + fn ty_mask(&mut self, arg0: Type) -> u64; fn ty_bytes(&mut self, arg0: Type) -> u16; fn lane_type(&mut self, arg0: Type) -> Type; fn fits_in_16(&mut self, arg0: Type) -> Option; @@ -120,6 +126,10 @@ pub trait Context { fn avx512vl_enabled(&mut self, arg0: Type) -> Option<()>; fn avx512dq_enabled(&mut self, arg0: Type) -> Option<()>; fn avx512f_enabled(&mut self, arg0: Type) -> Option<()>; + fn avx512bitalg_enabled(&mut self, arg0: Type) -> Option<()>; + fn use_lzcnt(&mut self, arg0: Type) -> Option<()>; + fn use_bmi1(&mut self, arg0: Type) -> Option<()>; + fn use_popcnt(&mut self, arg0: Type) -> Option<()>; fn imm8_from_value(&mut self, arg0: Value) -> Option; fn const_to_type_masked_imm8(&mut self, arg0: u64, arg1: Type) -> Imm8Gpr; fn simm32_from_value(&mut self, arg0: Value) -> Option; @@ -133,15 +143,17 @@ pub trait Context { fn ushr_i8x16_mask_for_const(&mut self, arg0: u32) -> SyntheticAmode; fn ushr_i8x16_mask_table(&mut self) -> SyntheticAmode; fn sse_insertps_lane_imm(&mut self, arg0: u8) -> u8; + fn popcount_4bit_table(&mut self) -> VCodeConstant; + fn popcount_low_mask(&mut self) -> VCodeConstant; } -/// Internal type SideEffectNoResult: defined at src/prelude.isle line 363. +/// Internal type SideEffectNoResult: defined at src/prelude.isle line 385. #[derive(Clone, Debug)] pub enum SideEffectNoResult { Inst { inst: MInst }, } -/// Internal type ProducesFlags: defined at src/prelude.isle line 385. +/// Internal type ProducesFlags: defined at src/prelude.isle line 407. #[derive(Clone, Debug)] pub enum ProducesFlags { ProducesFlagsSideEffect { inst: MInst }, @@ -149,7 +161,7 @@ pub enum ProducesFlags { ProducesFlagsReturnsResultWithConsumer { inst: MInst, result: Reg }, } -/// Internal type ConsumesFlags: defined at src/prelude.isle line 396. +/// Internal type ConsumesFlags: defined at src/prelude.isle line 418. #[derive(Clone, Debug)] pub enum ConsumesFlags { ConsumesFlagsReturnsResultWithProducer { @@ -165,6 +177,13 @@ pub enum ConsumesFlags { inst2: MInst, result: ValueRegs, }, + ConsumesFlagsFourTimesReturnsValueRegs { + inst1: MInst, + inst2: MInst, + inst3: MInst, + inst4: MInst, + result: ValueRegs, + }, } /// Internal type MInst: defined at src/isa/x64/inst.isle line 8. @@ -289,14 +308,6 @@ pub enum MInst { alternative: Gpr, dst: WritableGpr, }, - CmoveOr { - size: OperandSize, - cc1: CC, - cc2: CC, - consequent: GprMem, - alternative: Gpr, - dst: WritableGpr, - }, XmmCmove { size: OperandSize, cc: CC, @@ -304,14 +315,6 @@ pub enum MInst { alternative: Xmm, dst: WritableXmm, }, - XmmCmoveOr { - size: OperandSize, - cc1: CC, - cc2: CC, - consequent: XmmMem, - alternative: Xmm, - dst: WritableXmm, - }, Push64 { src: GprMemImm, }, @@ -495,7 +498,7 @@ pub enum MInst { }, } -/// Internal type ExtendKind: defined at src/isa/x64/inst.isle line 1125. +/// Internal type ExtendKind: defined at src/isa/x64/inst.isle line 1119. #[derive(Copy, Clone, PartialEq, Eq, Debug)] pub enum ExtendKind { Sign, @@ -549,7 +552,7 @@ pub fn constructor_side_effect( inst: ref pattern1_0, } = pattern0_0 { - // Rule at src/prelude.isle line 368. + // Rule at src/prelude.isle line 390. let expr0_0 = C::emit(ctx, pattern1_0); let expr1_0 = C::output_none(ctx); return Some(expr1_0); @@ -567,7 +570,7 @@ pub fn constructor_safepoint( inst: ref pattern1_0, } = pattern0_0 { - // Rule at src/prelude.isle line 374. + // Rule at src/prelude.isle line 396. let expr0_0 = C::emit_safepoint(ctx, pattern1_0); let expr1_0 = C::output_none(ctx); return Some(expr1_0); @@ -575,6 +578,55 @@ pub fn constructor_safepoint( return None; } +// Generated as internal constructor for term produces_flags_get_reg. +pub fn constructor_produces_flags_get_reg( + ctx: &mut C, + arg0: &ProducesFlags, +) -> Option { + let pattern0_0 = arg0; + if let &ProducesFlags::ProducesFlagsReturnsReg { + inst: ref pattern1_0, + result: pattern1_1, + } = pattern0_0 + { + // Rule at src/prelude.isle line 434. + return Some(pattern1_1); + } + return None; +} + +// Generated as internal constructor for term produces_flags_ignore. +pub fn constructor_produces_flags_ignore( + ctx: &mut C, + arg0: &ProducesFlags, +) -> Option { + let pattern0_0 = arg0; + match pattern0_0 { + &ProducesFlags::ProducesFlagsReturnsReg { + inst: ref pattern1_0, + result: pattern1_1, + } => { + // Rule at src/prelude.isle line 439. + let expr0_0 = ProducesFlags::ProducesFlagsSideEffect { + inst: pattern1_0.clone(), + }; + return Some(expr0_0); + } + &ProducesFlags::ProducesFlagsReturnsResultWithConsumer { + inst: ref pattern1_0, + result: pattern1_1, + } => { + // Rule at src/prelude.isle line 441. + let expr0_0 = ProducesFlags::ProducesFlagsSideEffect { + inst: pattern1_0.clone(), + }; + return Some(expr0_0); + } + _ => {} + } + return None; +} + // Generated as internal constructor for term consumes_flags_concat. pub fn constructor_consumes_flags_concat( ctx: &mut C, @@ -593,7 +645,7 @@ pub fn constructor_consumes_flags_concat( result: pattern3_1, } = pattern2_0 { - // Rule at src/prelude.isle line 408. + // Rule at src/prelude.isle line 448. let expr0_0 = C::value_regs(ctx, pattern1_1, pattern3_1); let expr1_0 = ConsumesFlags::ConsumesFlagsTwiceReturnsValueRegs { inst1: pattern1_0.clone(), @@ -623,7 +675,7 @@ pub fn constructor_with_flags( inst: ref pattern3_0, result: pattern3_1, } => { - // Rule at src/prelude.isle line 433. + // Rule at src/prelude.isle line 473. let expr0_0 = C::emit(ctx, pattern1_0); let expr1_0 = C::emit(ctx, pattern3_0); let expr2_0 = C::value_reg(ctx, pattern3_1); @@ -634,12 +686,27 @@ pub fn constructor_with_flags( inst2: ref pattern3_1, result: pattern3_2, } => { - // Rule at src/prelude.isle line 439. + // Rule at src/prelude.isle line 479. let expr0_0 = C::emit(ctx, pattern1_0); - let expr1_0 = C::emit(ctx, pattern3_1); - let expr2_0 = C::emit(ctx, pattern3_0); + let expr1_0 = C::emit(ctx, pattern3_0); + let expr2_0 = C::emit(ctx, pattern3_1); return Some(pattern3_2); } + &ConsumesFlags::ConsumesFlagsFourTimesReturnsValueRegs { + inst1: ref pattern3_0, + inst2: ref pattern3_1, + inst3: ref pattern3_2, + inst4: ref pattern3_3, + result: pattern3_4, + } => { + // Rule at src/prelude.isle line 491. + let expr0_0 = C::emit(ctx, pattern1_0); + let expr1_0 = C::emit(ctx, pattern3_0); + let expr2_0 = C::emit(ctx, pattern3_1); + let expr3_0 = C::emit(ctx, pattern3_2); + let expr4_0 = C::emit(ctx, pattern3_3); + return Some(pattern3_4); + } _ => {} } } @@ -653,7 +720,7 @@ pub fn constructor_with_flags( result: pattern3_1, } = pattern2_0 { - // Rule at src/prelude.isle line 427. + // Rule at src/prelude.isle line 467. let expr0_0 = C::emit(ctx, pattern1_0); let expr1_0 = C::emit(ctx, pattern3_0); let expr2_0 = C::value_regs(ctx, pattern1_1, pattern3_1); @@ -673,7 +740,7 @@ pub fn constructor_with_flags_reg( ) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/prelude.isle line 452. + // Rule at src/prelude.isle line 508. let expr0_0 = constructor_with_flags(ctx, pattern0_0, pattern1_0)?; let expr1_0: usize = 0; let expr2_0 = C::value_regs_get(ctx, expr0_0, expr1_0); @@ -685,22 +752,22 @@ pub fn constructor_operand_size_bits(ctx: &mut C, arg0: &OperandSize let pattern0_0 = arg0; match pattern0_0 { &OperandSize::Size8 => { - // Rule at src/isa/x64/inst.isle line 527. + // Rule at src/isa/x64/inst.isle line 509. let expr0_0: u16 = 8; return Some(expr0_0); } &OperandSize::Size16 => { - // Rule at src/isa/x64/inst.isle line 528. + // Rule at src/isa/x64/inst.isle line 510. let expr0_0: u16 = 16; return Some(expr0_0); } &OperandSize::Size32 => { - // Rule at src/isa/x64/inst.isle line 529. + // Rule at src/isa/x64/inst.isle line 511. let expr0_0: u16 = 32; return Some(expr0_0); } &OperandSize::Size64 => { - // Rule at src/isa/x64/inst.isle line 530. + // Rule at src/isa/x64/inst.isle line 512. let expr0_0: u16 = 64; return Some(expr0_0); } @@ -712,7 +779,7 @@ pub fn constructor_operand_size_bits(ctx: &mut C, arg0: &OperandSize // Generated as internal constructor for term reg_to_gpr_mem_imm. pub fn constructor_reg_to_gpr_mem_imm(ctx: &mut C, arg0: Reg) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 967. + // Rule at src/isa/x64/inst.isle line 949. let expr0_0 = C::gpr_new(ctx, pattern0_0); let expr1_0 = C::gpr_to_gpr_mem_imm(ctx, expr0_0); return Some(expr1_0); @@ -721,7 +788,7 @@ pub fn constructor_reg_to_gpr_mem_imm(ctx: &mut C, arg0: Reg) -> Opt // Generated as internal constructor for term put_in_gpr. pub fn constructor_put_in_gpr(ctx: &mut C, arg0: Value) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 974. + // Rule at src/isa/x64/inst.isle line 956. let expr0_0 = C::put_in_reg(ctx, pattern0_0); let expr1_0 = C::gpr_new(ctx, expr0_0); return Some(expr1_0); @@ -730,7 +797,7 @@ pub fn constructor_put_in_gpr(ctx: &mut C, arg0: Value) -> Option(ctx: &mut C, arg0: Value) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 981. + // Rule at src/isa/x64/inst.isle line 963. let expr0_0 = C::put_in_reg_mem(ctx, pattern0_0); let expr1_0 = C::reg_mem_to_gpr_mem(ctx, &expr0_0); return Some(expr1_0); @@ -739,7 +806,7 @@ pub fn constructor_put_in_gpr_mem(ctx: &mut C, arg0: Value) -> Optio // Generated as internal constructor for term put_in_gpr_mem_imm. pub fn constructor_put_in_gpr_mem_imm(ctx: &mut C, arg0: Value) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 988. + // Rule at src/isa/x64/inst.isle line 970. let expr0_0 = C::put_in_reg_mem_imm(ctx, pattern0_0); let expr1_0 = C::gpr_mem_imm_new(ctx, &expr0_0); return Some(expr1_0); @@ -748,7 +815,7 @@ pub fn constructor_put_in_gpr_mem_imm(ctx: &mut C, arg0: Value) -> O // Generated as internal constructor for term put_in_xmm. pub fn constructor_put_in_xmm(ctx: &mut C, arg0: Value) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 995. + // Rule at src/isa/x64/inst.isle line 977. let expr0_0 = C::put_in_reg(ctx, pattern0_0); let expr1_0 = C::xmm_new(ctx, expr0_0); return Some(expr1_0); @@ -757,7 +824,7 @@ pub fn constructor_put_in_xmm(ctx: &mut C, arg0: Value) -> Option(ctx: &mut C, arg0: Value) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1002. + // Rule at src/isa/x64/inst.isle line 984. let expr0_0 = C::put_in_reg_mem(ctx, pattern0_0); let expr1_0 = C::reg_mem_to_xmm_mem(ctx, &expr0_0); return Some(expr1_0); @@ -766,7 +833,7 @@ pub fn constructor_put_in_xmm_mem(ctx: &mut C, arg0: Value) -> Optio // Generated as internal constructor for term put_in_xmm_mem_imm. pub fn constructor_put_in_xmm_mem_imm(ctx: &mut C, arg0: Value) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1009. + // Rule at src/isa/x64/inst.isle line 991. let expr0_0 = C::put_in_reg_mem_imm(ctx, pattern0_0); let expr1_0 = C::xmm_mem_imm_new(ctx, &expr0_0); return Some(expr1_0); @@ -775,7 +842,7 @@ pub fn constructor_put_in_xmm_mem_imm(ctx: &mut C, arg0: Value) -> O // Generated as internal constructor for term output_gpr. pub fn constructor_output_gpr(ctx: &mut C, arg0: Gpr) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1014. + // Rule at src/isa/x64/inst.isle line 996. let expr0_0 = C::gpr_to_reg(ctx, pattern0_0); let expr1_0 = constructor_output_reg(ctx, expr0_0)?; return Some(expr1_0); @@ -785,7 +852,7 @@ pub fn constructor_output_gpr(ctx: &mut C, arg0: Gpr) -> Option(ctx: &mut C, arg0: Gpr, arg1: Gpr) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1019. + // Rule at src/isa/x64/inst.isle line 1001. let expr0_0 = C::gpr_to_reg(ctx, pattern0_0); let expr1_0 = C::gpr_to_reg(ctx, pattern1_0); let expr2_0 = C::value_regs(ctx, expr0_0, expr1_0); @@ -795,7 +862,7 @@ pub fn constructor_value_gprs(ctx: &mut C, arg0: Gpr, arg1: Gpr) -> // Generated as internal constructor for term output_xmm. pub fn constructor_output_xmm(ctx: &mut C, arg0: Xmm) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1024. + // Rule at src/isa/x64/inst.isle line 1006. let expr0_0 = C::xmm_to_reg(ctx, pattern0_0); let expr1_0 = constructor_output_reg(ctx, expr0_0)?; return Some(expr1_0); @@ -809,7 +876,7 @@ pub fn constructor_value_regs_get_gpr( ) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1031. + // Rule at src/isa/x64/inst.isle line 1013. let expr0_0 = C::value_regs_get(ctx, pattern0_0, pattern1_0); let expr1_0 = C::gpr_new(ctx, expr0_0); return Some(expr1_0); @@ -818,7 +885,7 @@ pub fn constructor_value_regs_get_gpr( // Generated as internal constructor for term lo_gpr. pub fn constructor_lo_gpr(ctx: &mut C, arg0: Value) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1044. + // Rule at src/isa/x64/inst.isle line 1026. let expr0_0 = constructor_lo_reg(ctx, pattern0_0)?; let expr1_0 = C::gpr_new(ctx, expr0_0); return Some(expr1_0); @@ -830,7 +897,7 @@ pub fn constructor_sink_load_to_gpr_mem_imm( arg0: &SinkableLoad, ) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1115. + // Rule at src/isa/x64/inst.isle line 1109. let expr0_0 = C::sink_load(ctx, pattern0_0); let expr1_0 = C::gpr_mem_imm_new(ctx, &expr0_0); return Some(expr1_0); @@ -848,12 +915,12 @@ pub fn constructor_extend_to_gpr( let pattern2_0 = arg1; if pattern2_0 == pattern1_0 { let pattern4_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1137. + // Rule at src/isa/x64/inst.isle line 1131. let expr0_0 = constructor_put_in_gpr(ctx, pattern0_0)?; return Some(expr0_0); } let pattern3_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1140. + // Rule at src/isa/x64/inst.isle line 1134. let expr0_0 = C::ty_bits_u16(ctx, pattern1_0); let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern2_0); let expr2_0 = constructor_operand_size_bits(ctx, &expr1_0)?; @@ -877,7 +944,7 @@ pub fn constructor_extend( let pattern2_0 = arg1; let pattern3_0 = arg2; let pattern4_0 = arg3; - // Rule at src/isa/x64/inst.isle line 1160. + // Rule at src/isa/x64/inst.isle line 1154. let expr0_0 = constructor_movsx(ctx, pattern2_0, pattern3_0, pattern4_0)?; return Some(expr0_0); } @@ -885,7 +952,7 @@ pub fn constructor_extend( let pattern2_0 = arg1; let pattern3_0 = arg2; let pattern4_0 = arg3; - // Rule at src/isa/x64/inst.isle line 1156. + // Rule at src/isa/x64/inst.isle line 1150. let expr0_0 = constructor_movzx(ctx, pattern2_0, pattern3_0, pattern4_0)?; return Some(expr0_0); } @@ -898,17 +965,17 @@ pub fn constructor_extend( pub fn constructor_sse_xor_op(ctx: &mut C, arg0: Type) -> Option { let pattern0_0 = arg0; if pattern0_0 == F32X4 { - // Rule at src/isa/x64/inst.isle line 1167. + // Rule at src/isa/x64/inst.isle line 1161. let expr0_0 = SseOpcode::Xorps; return Some(expr0_0); } if pattern0_0 == F64X2 { - // Rule at src/isa/x64/inst.isle line 1168. + // Rule at src/isa/x64/inst.isle line 1162. let expr0_0 = SseOpcode::Xorpd; return Some(expr0_0); } if let Some((pattern1_0, pattern1_1)) = C::multi_lane(ctx, pattern0_0) { - // Rule at src/isa/x64/inst.isle line 1169. + // Rule at src/isa/x64/inst.isle line 1163. let expr0_0 = SseOpcode::Pxor; return Some(expr0_0); } @@ -925,7 +992,7 @@ pub fn constructor_sse_xor( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1173. + // Rule at src/isa/x64/inst.isle line 1167. let expr0_0 = constructor_sse_xor_op(ctx, pattern0_0)?; let expr1_0 = constructor_xmm_rm_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -935,40 +1002,40 @@ pub fn constructor_sse_xor( pub fn constructor_sse_cmp_op(ctx: &mut C, arg0: Type) -> Option { let pattern0_0 = arg0; if pattern0_0 == F32X4 { - // Rule at src/isa/x64/inst.isle line 1182. + // Rule at src/isa/x64/inst.isle line 1176. let expr0_0 = SseOpcode::Cmpps; return Some(expr0_0); } if pattern0_0 == F64X2 { - // Rule at src/isa/x64/inst.isle line 1183. + // Rule at src/isa/x64/inst.isle line 1177. let expr0_0 = SseOpcode::Cmppd; return Some(expr0_0); } if let Some((pattern1_0, pattern1_1)) = C::multi_lane(ctx, pattern0_0) { if pattern1_0 == 8 { if pattern1_1 == 16 { - // Rule at src/isa/x64/inst.isle line 1178. + // Rule at src/isa/x64/inst.isle line 1172. let expr0_0 = SseOpcode::Pcmpeqb; return Some(expr0_0); } } if pattern1_0 == 16 { if pattern1_1 == 8 { - // Rule at src/isa/x64/inst.isle line 1179. + // Rule at src/isa/x64/inst.isle line 1173. let expr0_0 = SseOpcode::Pcmpeqw; return Some(expr0_0); } } if pattern1_0 == 32 { if pattern1_1 == 4 { - // Rule at src/isa/x64/inst.isle line 1180. + // Rule at src/isa/x64/inst.isle line 1174. let expr0_0 = SseOpcode::Pcmpeqd; return Some(expr0_0); } } if pattern1_0 == 64 { if pattern1_1 == 2 { - // Rule at src/isa/x64/inst.isle line 1181. + // Rule at src/isa/x64/inst.isle line 1175. let expr0_0 = SseOpcode::Pcmpeqq; return Some(expr0_0); } @@ -980,7 +1047,7 @@ pub fn constructor_sse_cmp_op(ctx: &mut C, arg0: Type) -> Option(ctx: &mut C, arg0: Type) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1197. + // Rule at src/isa/x64/inst.isle line 1191. let expr0_0 = C::temp_writable_xmm(ctx); let expr1_0: Type = I32X4; let expr2_0 = constructor_sse_cmp_op(ctx, expr1_0)?; @@ -1005,7 +1072,7 @@ pub fn constructor_make_i64x2_from_lanes( ) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1207. + // Rule at src/isa/x64/inst.isle line 1201. let expr0_0 = C::temp_writable_xmm(ctx); let expr1_0 = C::writable_xmm_to_reg(ctx, expr0_0); let expr2_0 = MInst::XmmUninitializedValue { dst: expr0_0 }; @@ -1047,12 +1114,12 @@ pub fn constructor_mov_rmi_to_xmm(ctx: &mut C, arg0: &RegMemImm) -> let pattern0_0 = arg0; match pattern0_0 { &RegMemImm::Imm { simm32: pattern1_0 } => { - // Rule at src/isa/x64/inst.isle line 1228. + // Rule at src/isa/x64/inst.isle line 1222. let expr0_0 = C::xmm_mem_imm_new(ctx, pattern0_0); return Some(expr0_0); } &RegMemImm::Reg { reg: pattern1_0 } => { - // Rule at src/isa/x64/inst.isle line 1229. + // Rule at src/isa/x64/inst.isle line 1223. let expr0_0 = SseOpcode::Movd; let expr1_0 = C::reg_to_gpr_mem(ctx, pattern1_0); let expr2_0 = OperandSize::Size32; @@ -1063,7 +1130,7 @@ pub fn constructor_mov_rmi_to_xmm(ctx: &mut C, arg0: &RegMemImm) -> &RegMemImm::Mem { addr: ref pattern1_0, } => { - // Rule at src/isa/x64/inst.isle line 1227. + // Rule at src/isa/x64/inst.isle line 1221. let expr0_0 = C::xmm_mem_imm_new(ctx, pattern0_0); return Some(expr0_0); } @@ -1083,7 +1150,7 @@ pub fn constructor_x64_load( if pattern0_0 == I64 { let pattern2_0 = arg1; let pattern3_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1244. + // Rule at src/isa/x64/inst.isle line 1238. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = MInst::Mov64MR { src: pattern2_0.clone(), @@ -1096,7 +1163,7 @@ pub fn constructor_x64_load( if pattern0_0 == F32 { let pattern2_0 = arg1; let pattern3_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1249. + // Rule at src/isa/x64/inst.isle line 1243. let expr0_0 = SseOpcode::Movss; let expr1_0 = constructor_synthetic_amode_to_xmm_mem(ctx, pattern2_0)?; let expr2_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, &expr1_0)?; @@ -1106,7 +1173,7 @@ pub fn constructor_x64_load( if pattern0_0 == F64 { let pattern2_0 = arg1; let pattern3_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1253. + // Rule at src/isa/x64/inst.isle line 1247. let expr0_0 = SseOpcode::Movsd; let expr1_0 = constructor_synthetic_amode_to_xmm_mem(ctx, pattern2_0)?; let expr2_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, &expr1_0)?; @@ -1116,7 +1183,7 @@ pub fn constructor_x64_load( if pattern0_0 == F32X4 { let pattern2_0 = arg1; let pattern3_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1257. + // Rule at src/isa/x64/inst.isle line 1251. let expr0_0 = SseOpcode::Movups; let expr1_0 = constructor_synthetic_amode_to_xmm_mem(ctx, pattern2_0)?; let expr2_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, &expr1_0)?; @@ -1126,7 +1193,7 @@ pub fn constructor_x64_load( if pattern0_0 == F64X2 { let pattern2_0 = arg1; let pattern3_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1261. + // Rule at src/isa/x64/inst.isle line 1255. let expr0_0 = SseOpcode::Movupd; let expr1_0 = constructor_synthetic_amode_to_xmm_mem(ctx, pattern2_0)?; let expr2_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, &expr1_0)?; @@ -1136,7 +1203,7 @@ pub fn constructor_x64_load( if let Some((pattern1_0, pattern1_1)) = C::multi_lane(ctx, pattern0_0) { let pattern2_0 = arg1; let pattern3_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1265. + // Rule at src/isa/x64/inst.isle line 1259. let expr0_0 = SseOpcode::Movdqu; let expr1_0 = constructor_synthetic_amode_to_xmm_mem(ctx, pattern2_0)?; let expr2_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, &expr1_0)?; @@ -1147,7 +1214,7 @@ pub fn constructor_x64_load( let pattern2_0 = arg1; let pattern3_0 = arg2; if let &ExtKind::SignExtend = pattern3_0 { - // Rule at src/isa/x64/inst.isle line 1239. + // Rule at src/isa/x64/inst.isle line 1233. let expr0_0 = C::ty_bytes(ctx, pattern1_0); let expr1_0: u16 = 8; let expr2_0 = C::ext_mode(ctx, expr0_0, expr1_0); @@ -1160,6 +1227,27 @@ pub fn constructor_x64_load( return None; } +// Generated as internal constructor for term xmm_load_const. +pub fn constructor_xmm_load_const( + ctx: &mut C, + arg0: Type, + arg1: VCodeConstant, +) -> Option { + let pattern0_0 = arg0; + let pattern1_0 = arg1; + // Rule at src/isa/x64/inst.isle line 1265. + let expr0_0 = C::temp_writable_xmm(ctx); + let expr1_0 = C::writable_xmm_to_reg(ctx, expr0_0); + let expr2_0 = MInst::XmmLoadConst { + src: pattern1_0, + dst: expr1_0, + ty: pattern0_0, + }; + let expr3_0 = C::emit(ctx, &expr2_0); + let expr4_0 = C::writable_xmm_to_xmm(ctx, expr0_0); + return Some(expr4_0); +} + // Generated as internal constructor for term alu_rmi_r. pub fn constructor_alu_rmi_r( ctx: &mut C, @@ -1172,7 +1260,7 @@ pub fn constructor_alu_rmi_r( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/x64/inst.isle line 1277. + // Rule at src/isa/x64/inst.isle line 1278. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); let expr2_0 = MInst::AluRmiR { @@ -1197,7 +1285,7 @@ pub fn constructor_add( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1285. + // Rule at src/isa/x64/inst.isle line 1286. let expr0_0 = AluRmiROpcode::Add; let expr1_0 = constructor_alu_rmi_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -1213,7 +1301,7 @@ pub fn constructor_add_with_flags_paired( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1293. + // Rule at src/isa/x64/inst.isle line 1294. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); let expr2_0 = AluRmiROpcode::Add; @@ -1242,7 +1330,7 @@ pub fn constructor_adc_paired( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1305. + // Rule at src/isa/x64/inst.isle line 1306. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); let expr2_0 = AluRmiROpcode::Adc; @@ -1271,7 +1359,7 @@ pub fn constructor_sub( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1317. + // Rule at src/isa/x64/inst.isle line 1318. let expr0_0 = AluRmiROpcode::Sub; let expr1_0 = constructor_alu_rmi_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -1287,7 +1375,7 @@ pub fn constructor_sub_with_flags_paired( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1325. + // Rule at src/isa/x64/inst.isle line 1326. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); let expr2_0 = AluRmiROpcode::Sub; @@ -1316,7 +1404,7 @@ pub fn constructor_sbb_paired( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1337. + // Rule at src/isa/x64/inst.isle line 1338. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); let expr2_0 = AluRmiROpcode::Sbb; @@ -1345,7 +1433,7 @@ pub fn constructor_mul( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1349. + // Rule at src/isa/x64/inst.isle line 1350. let expr0_0 = AluRmiROpcode::Mul; let expr1_0 = constructor_alu_rmi_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -1361,7 +1449,7 @@ pub fn constructor_x64_and( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1357. + // Rule at src/isa/x64/inst.isle line 1358. let expr0_0 = AluRmiROpcode::And; let expr1_0 = constructor_alu_rmi_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -1377,7 +1465,7 @@ pub fn constructor_or( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1365. + // Rule at src/isa/x64/inst.isle line 1366. let expr0_0 = AluRmiROpcode::Or; let expr1_0 = constructor_alu_rmi_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -1393,7 +1481,7 @@ pub fn constructor_xor( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1373. + // Rule at src/isa/x64/inst.isle line 1374. let expr0_0 = AluRmiROpcode::Xor; let expr1_0 = constructor_alu_rmi_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -1405,7 +1493,7 @@ pub fn constructor_imm(ctx: &mut C, arg0: Type, arg1: u64) -> Option if pattern0_0 == I64 { let pattern2_0 = arg1; if let Some(pattern3_0) = C::nonzero_u64_fits_in_u32(ctx, pattern2_0) { - // Rule at src/isa/x64/inst.isle line 1406. + // Rule at src/isa/x64/inst.isle line 1414. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = OperandSize::Size32; let expr2_0 = MInst::Imm { @@ -1421,7 +1509,7 @@ pub fn constructor_imm(ctx: &mut C, arg0: Type, arg1: u64) -> Option if pattern0_0 == F32 { let pattern2_0 = arg1; if pattern2_0 == 0 { - // Rule at src/isa/x64/inst.isle line 1435. + // Rule at src/isa/x64/inst.isle line 1443. let expr0_0 = C::temp_writable_xmm(ctx); let expr1_0 = C::writable_xmm_to_xmm(ctx, expr0_0); let expr2_0 = SseOpcode::Xorps; @@ -1436,7 +1524,7 @@ pub fn constructor_imm(ctx: &mut C, arg0: Type, arg1: u64) -> Option let expr6_0 = C::xmm_to_reg(ctx, expr1_0); return Some(expr6_0); } - // Rule at src/isa/x64/inst.isle line 1390. + // Rule at src/isa/x64/inst.isle line 1391. let expr0_0 = SseOpcode::Movd; let expr1_0: Type = I32; let expr2_0 = constructor_imm(ctx, expr1_0, pattern2_0)?; @@ -1449,7 +1537,7 @@ pub fn constructor_imm(ctx: &mut C, arg0: Type, arg1: u64) -> Option if pattern0_0 == F64 { let pattern2_0 = arg1; if pattern2_0 == 0 { - // Rule at src/isa/x64/inst.isle line 1447. + // Rule at src/isa/x64/inst.isle line 1455. let expr0_0 = C::temp_writable_xmm(ctx); let expr1_0 = C::writable_xmm_to_xmm(ctx, expr0_0); let expr2_0 = SseOpcode::Xorpd; @@ -1464,7 +1552,7 @@ pub fn constructor_imm(ctx: &mut C, arg0: Type, arg1: u64) -> Option let expr6_0 = C::xmm_to_reg(ctx, expr1_0); return Some(expr6_0); } - // Rule at src/isa/x64/inst.isle line 1396. + // Rule at src/isa/x64/inst.isle line 1397. let expr0_0 = SseOpcode::Movq; let expr1_0: Type = I64; let expr2_0 = constructor_imm(ctx, expr1_0, pattern2_0)?; @@ -1477,7 +1565,7 @@ pub fn constructor_imm(ctx: &mut C, arg0: Type, arg1: u64) -> Option if let Some((pattern1_0, pattern1_1)) = C::multi_lane(ctx, pattern0_0) { let pattern2_0 = arg1; if pattern2_0 == 0 { - // Rule at src/isa/x64/inst.isle line 1425. + // Rule at src/isa/x64/inst.isle line 1433. let expr0_0 = C::temp_writable_xmm(ctx); let expr1_0 = C::writable_xmm_to_xmm(ctx, expr0_0); let expr2_0 = constructor_sse_xor_op(ctx, pattern0_0)?; @@ -1496,7 +1584,7 @@ pub fn constructor_imm(ctx: &mut C, arg0: Type, arg1: u64) -> Option if let Some(pattern1_0) = C::fits_in_64(ctx, pattern0_0) { let pattern2_0 = arg1; if pattern2_0 == 0 { - // Rule at src/isa/x64/inst.isle line 1412. + // Rule at src/isa/x64/inst.isle line 1420. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = C::writable_gpr_to_gpr(ctx, expr0_0); let expr2_0 = C::operand_size_of_type_32_64(ctx, pattern1_0); @@ -1513,7 +1601,7 @@ pub fn constructor_imm(ctx: &mut C, arg0: Type, arg1: u64) -> Option let expr7_0 = C::gpr_to_reg(ctx, expr1_0); return Some(expr7_0); } - // Rule at src/isa/x64/inst.isle line 1383. + // Rule at src/isa/x64/inst.isle line 1384. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern1_0); let expr2_0 = MInst::Imm { @@ -1528,6 +1616,16 @@ pub fn constructor_imm(ctx: &mut C, arg0: Type, arg1: u64) -> Option return None; } +// Generated as internal constructor for term imm_i64. +pub fn constructor_imm_i64(ctx: &mut C, arg0: Type, arg1: i64) -> Option { + let pattern0_0 = arg0; + let pattern1_0 = arg1; + // Rule at src/isa/x64/inst.isle line 1406. + let expr0_0 = C::i64_as_u64(ctx, pattern1_0); + let expr1_0 = constructor_imm(ctx, pattern0_0, expr0_0)?; + return Some(expr1_0); +} + // Generated as internal constructor for term shift_r. pub fn constructor_shift_r( ctx: &mut C, @@ -1540,7 +1638,7 @@ pub fn constructor_shift_r( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/x64/inst.isle line 1460. + // Rule at src/isa/x64/inst.isle line 1468. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = C::raw_operand_size_of_type(ctx, pattern0_0); let expr2_0 = MInst::ShiftR { @@ -1565,7 +1663,7 @@ pub fn constructor_x64_rotl( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1470. + // Rule at src/isa/x64/inst.isle line 1478. let expr0_0 = ShiftKind::RotateLeft; let expr1_0 = constructor_shift_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -1581,7 +1679,7 @@ pub fn constructor_x64_rotr( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1475. + // Rule at src/isa/x64/inst.isle line 1483. let expr0_0 = ShiftKind::RotateRight; let expr1_0 = constructor_shift_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -1597,7 +1695,7 @@ pub fn constructor_shl( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1480. + // Rule at src/isa/x64/inst.isle line 1488. let expr0_0 = ShiftKind::ShiftLeft; let expr1_0 = constructor_shift_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -1613,7 +1711,7 @@ pub fn constructor_shr( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1485. + // Rule at src/isa/x64/inst.isle line 1493. let expr0_0 = ShiftKind::ShiftRightLogical; let expr1_0 = constructor_shift_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -1629,7 +1727,7 @@ pub fn constructor_sar( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1490. + // Rule at src/isa/x64/inst.isle line 1498. let expr0_0 = ShiftKind::ShiftRightArithmetic; let expr1_0 = constructor_shift_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -1647,7 +1745,7 @@ pub fn constructor_cmp_rmi_r( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/x64/inst.isle line 1495. + // Rule at src/isa/x64/inst.isle line 1503. let expr0_0 = MInst::CmpRmiR { size: pattern0_0.clone(), opcode: pattern1_0.clone(), @@ -1668,12 +1766,30 @@ pub fn constructor_cmp( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1504. + // Rule at src/isa/x64/inst.isle line 1512. let expr0_0 = CmpOpcode::Cmp; let expr1_0 = constructor_cmp_rmi_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); } +// Generated as internal constructor for term cmp_imm. +pub fn constructor_cmp_imm( + ctx: &mut C, + arg0: &OperandSize, + arg1: u32, + arg2: Gpr, +) -> Option { + let pattern0_0 = arg0; + let pattern1_0 = arg1; + let pattern2_0 = arg2; + // Rule at src/isa/x64/inst.isle line 1517. + let expr0_0 = CmpOpcode::Cmp; + let expr1_0 = RegMemImm::Imm { simm32: pattern1_0 }; + let expr2_0 = C::gpr_mem_imm_new(ctx, &expr1_0); + let expr3_0 = constructor_cmp_rmi_r(ctx, pattern0_0, &expr0_0, &expr2_0, pattern2_0)?; + return Some(expr3_0); +} + // Generated as internal constructor for term xmm_cmp_rm_r. pub fn constructor_xmm_cmp_rm_r( ctx: &mut C, @@ -1684,7 +1800,7 @@ pub fn constructor_xmm_cmp_rm_r( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1509. + // Rule at src/isa/x64/inst.isle line 1522. let expr0_0 = MInst::XmmCmpRmR { op: pattern0_0.clone(), src: pattern1_0.clone(), @@ -1704,7 +1820,7 @@ pub fn constructor_fpcmp( let pattern1_0 = C::value_type(ctx, pattern0_0); if pattern1_0 == F32 { let pattern3_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1516. + // Rule at src/isa/x64/inst.isle line 1529. let expr0_0 = SseOpcode::Ucomiss; let expr1_0 = constructor_put_in_xmm_mem(ctx, pattern0_0)?; let expr2_0 = constructor_put_in_xmm(ctx, pattern3_0)?; @@ -1713,7 +1829,7 @@ pub fn constructor_fpcmp( } if pattern1_0 == F64 { let pattern3_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1518. + // Rule at src/isa/x64/inst.isle line 1531. let expr0_0 = SseOpcode::Ucomisd; let expr1_0 = constructor_put_in_xmm_mem(ctx, pattern0_0)?; let expr2_0 = constructor_put_in_xmm(ctx, pattern3_0)?; @@ -1733,7 +1849,7 @@ pub fn constructor_test( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1523. + // Rule at src/isa/x64/inst.isle line 1536. let expr0_0 = CmpOpcode::Test; let expr1_0 = constructor_cmp_rmi_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -1751,7 +1867,7 @@ pub fn constructor_cmove( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/x64/inst.isle line 1530. + // Rule at src/isa/x64/inst.isle line 1543. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); let expr2_0 = MInst::Cmove { @@ -1781,7 +1897,7 @@ pub fn constructor_cmove_xmm( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/x64/inst.isle line 1538. + // Rule at src/isa/x64/inst.isle line 1551. let expr0_0 = C::temp_writable_xmm(ctx); let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); let expr2_0 = MInst::XmmCmove { @@ -1812,7 +1928,7 @@ pub fn constructor_cmove_from_values( let pattern2_0 = arg1; let pattern3_0 = arg2; let pattern4_0 = arg3; - // Rule at src/isa/x64/inst.isle line 1549. + // Rule at src/isa/x64/inst.isle line 1562. let expr0_0 = C::put_in_regs(ctx, pattern3_0); let expr1_0 = C::put_in_regs(ctx, pattern4_0); let expr2_0 = C::temp_writable_gpr(ctx); @@ -1857,7 +1973,7 @@ pub fn constructor_cmove_from_values( let pattern3_0 = arg1; let pattern4_0 = arg2; let pattern5_0 = arg3; - // Rule at src/isa/x64/inst.isle line 1573. + // Rule at src/isa/x64/inst.isle line 1586. let expr0_0 = constructor_put_in_xmm_mem(ctx, pattern4_0)?; let expr1_0 = constructor_put_in_xmm(ctx, pattern5_0)?; let expr2_0 = constructor_cmove_xmm(ctx, pattern2_0, pattern3_0, &expr0_0, expr1_0)?; @@ -1869,7 +1985,7 @@ pub fn constructor_cmove_from_values( let pattern3_0 = arg1; let pattern4_0 = arg2; let pattern5_0 = arg3; - // Rule at src/isa/x64/inst.isle line 1570. + // Rule at src/isa/x64/inst.isle line 1583. let expr0_0 = constructor_put_in_gpr_mem(ctx, pattern4_0)?; let expr1_0 = constructor_put_in_gpr(ctx, pattern5_0)?; let expr2_0 = constructor_cmove(ctx, pattern2_0, pattern3_0, &expr0_0, expr1_0)?; @@ -1893,23 +2009,32 @@ pub fn constructor_cmove_or( let pattern2_0 = arg2; let pattern3_0 = arg3; let pattern4_0 = arg4; - // Rule at src/isa/x64/inst.isle line 1580. + // Rule at src/isa/x64/inst.isle line 1593. let expr0_0 = C::temp_writable_gpr(ctx); - let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); - let expr2_0 = MInst::CmoveOr { - size: expr1_0, - cc1: pattern1_0.clone(), - cc2: pattern2_0.clone(), + let expr1_0 = C::temp_writable_gpr(ctx); + let expr2_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); + let expr3_0 = MInst::Cmove { + size: expr2_0, + cc: pattern1_0.clone(), consequent: pattern3_0.clone(), alternative: pattern4_0, + dst: expr1_0, + }; + let expr4_0 = C::writable_gpr_to_gpr(ctx, expr1_0); + let expr5_0 = MInst::Cmove { + size: expr2_0, + cc: pattern2_0.clone(), + consequent: pattern3_0.clone(), + alternative: expr4_0, dst: expr0_0, }; - let expr3_0 = constructor_writable_gpr_to_r_reg(ctx, expr0_0)?; - let expr4_0 = ConsumesFlags::ConsumesFlagsReturnsReg { - inst: expr2_0, - result: expr3_0, + let expr6_0 = constructor_writable_gpr_to_value_regs(ctx, expr0_0)?; + let expr7_0 = ConsumesFlags::ConsumesFlagsTwiceReturnsValueRegs { + inst1: expr3_0, + inst2: expr5_0, + result: expr6_0, }; - return Some(expr4_0); + return Some(expr7_0); } // Generated as internal constructor for term cmove_or_xmm. @@ -1926,23 +2051,32 @@ pub fn constructor_cmove_or_xmm( let pattern2_0 = arg2; let pattern3_0 = arg3; let pattern4_0 = arg4; - // Rule at src/isa/x64/inst.isle line 1588. + // Rule at src/isa/x64/inst.isle line 1605. let expr0_0 = C::temp_writable_xmm(ctx); - let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); - let expr2_0 = MInst::XmmCmoveOr { - size: expr1_0, - cc1: pattern1_0.clone(), - cc2: pattern2_0.clone(), + let expr1_0 = C::temp_writable_xmm(ctx); + let expr2_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); + let expr3_0 = MInst::XmmCmove { + size: expr2_0, + cc: pattern1_0.clone(), consequent: pattern3_0.clone(), alternative: pattern4_0, + dst: expr1_0, + }; + let expr4_0 = C::writable_xmm_to_xmm(ctx, expr1_0); + let expr5_0 = MInst::XmmCmove { + size: expr2_0, + cc: pattern2_0.clone(), + consequent: pattern3_0.clone(), + alternative: expr4_0, dst: expr0_0, }; - let expr3_0 = constructor_writable_xmm_to_r_reg(ctx, expr0_0)?; - let expr4_0 = ConsumesFlags::ConsumesFlagsReturnsReg { - inst: expr2_0, - result: expr3_0, + let expr6_0 = constructor_writable_xmm_to_value_regs(ctx, expr0_0)?; + let expr7_0 = ConsumesFlags::ConsumesFlagsTwiceReturnsValueRegs { + inst1: expr3_0, + inst2: expr5_0, + result: expr6_0, }; - return Some(expr4_0); + return Some(expr7_0); } // Generated as internal constructor for term cmove_or_from_values. @@ -1960,47 +2094,71 @@ pub fn constructor_cmove_or_from_values( let pattern3_0 = arg2; let pattern4_0 = arg3; let pattern5_0 = arg4; - // Rule at src/isa/x64/inst.isle line 1599. + // Rule at src/isa/x64/inst.isle line 1620. let expr0_0 = C::put_in_regs(ctx, pattern4_0); let expr1_0 = C::put_in_regs(ctx, pattern5_0); let expr2_0 = C::temp_writable_gpr(ctx); let expr3_0 = C::temp_writable_gpr(ctx); - let expr4_0 = OperandSize::Size64; - let expr5_0: usize = 0; - let expr6_0 = constructor_value_regs_get_gpr(ctx, expr0_0, expr5_0)?; - let expr7_0 = C::gpr_to_gpr_mem(ctx, expr6_0); - let expr8_0: usize = 0; - let expr9_0 = constructor_value_regs_get_gpr(ctx, expr1_0, expr8_0)?; - let expr10_0 = MInst::CmoveOr { - size: expr4_0, - cc1: pattern2_0.clone(), - cc2: pattern3_0.clone(), - consequent: expr7_0, - alternative: expr9_0, + let expr4_0 = C::temp_writable_gpr(ctx); + let expr5_0 = C::temp_writable_gpr(ctx); + let expr6_0 = OperandSize::Size64; + let expr7_0: usize = 0; + let expr8_0 = constructor_value_regs_get_gpr(ctx, expr0_0, expr7_0)?; + let expr9_0 = C::gpr_to_gpr_mem(ctx, expr8_0); + let expr10_0: usize = 0; + let expr11_0 = constructor_value_regs_get_gpr(ctx, expr1_0, expr10_0)?; + let expr12_0 = MInst::Cmove { + size: expr6_0, + cc: pattern2_0.clone(), + consequent: expr9_0, + alternative: expr11_0, + dst: expr4_0, + }; + let expr13_0: usize = 0; + let expr14_0 = constructor_value_regs_get_gpr(ctx, expr0_0, expr13_0)?; + let expr15_0 = C::gpr_to_gpr_mem(ctx, expr14_0); + let expr16_0 = C::writable_gpr_to_gpr(ctx, expr4_0); + let expr17_0 = MInst::Cmove { + size: expr6_0, + cc: pattern2_0.clone(), + consequent: expr15_0, + alternative: expr16_0, dst: expr2_0, }; - let expr11_0: usize = 1; - let expr12_0 = constructor_value_regs_get_gpr(ctx, expr0_0, expr11_0)?; - let expr13_0 = C::gpr_to_gpr_mem(ctx, expr12_0); - let expr14_0: usize = 1; - let expr15_0 = constructor_value_regs_get_gpr(ctx, expr1_0, expr14_0)?; - let expr16_0 = MInst::CmoveOr { - size: expr4_0, - cc1: pattern2_0.clone(), - cc2: pattern3_0.clone(), - consequent: expr13_0, - alternative: expr15_0, + let expr18_0: usize = 1; + let expr19_0 = constructor_value_regs_get_gpr(ctx, expr0_0, expr18_0)?; + let expr20_0 = C::gpr_to_gpr_mem(ctx, expr19_0); + let expr21_0: usize = 1; + let expr22_0 = constructor_value_regs_get_gpr(ctx, expr1_0, expr21_0)?; + let expr23_0 = MInst::Cmove { + size: expr6_0, + cc: pattern2_0.clone(), + consequent: expr20_0, + alternative: expr22_0, + dst: expr5_0, + }; + let expr24_0: usize = 1; + let expr25_0 = constructor_value_regs_get_gpr(ctx, expr0_0, expr24_0)?; + let expr26_0 = C::gpr_to_gpr_mem(ctx, expr25_0); + let expr27_0 = C::writable_gpr_to_gpr(ctx, expr5_0); + let expr28_0 = MInst::Cmove { + size: expr6_0, + cc: pattern2_0.clone(), + consequent: expr26_0, + alternative: expr27_0, dst: expr3_0, }; - let expr17_0 = constructor_writable_gpr_to_r_reg(ctx, expr2_0)?; - let expr18_0 = constructor_writable_gpr_to_r_reg(ctx, expr3_0)?; - let expr19_0 = C::value_regs(ctx, expr17_0, expr18_0); - let expr20_0 = ConsumesFlags::ConsumesFlagsTwiceReturnsValueRegs { - inst1: expr10_0, - inst2: expr16_0, - result: expr19_0, + let expr29_0 = constructor_writable_gpr_to_r_reg(ctx, expr2_0)?; + let expr30_0 = constructor_writable_gpr_to_r_reg(ctx, expr3_0)?; + let expr31_0 = C::value_regs(ctx, expr29_0, expr30_0); + let expr32_0 = ConsumesFlags::ConsumesFlagsFourTimesReturnsValueRegs { + inst1: expr12_0, + inst2: expr17_0, + inst3: expr23_0, + inst4: expr28_0, + result: expr31_0, }; - return Some(expr20_0); + return Some(expr32_0); } if let Some(pattern1_0) = C::is_xmm_type(ctx, pattern0_0) { if let Some(pattern2_0) = C::is_single_register_type(ctx, pattern1_0) { @@ -2008,7 +2166,7 @@ pub fn constructor_cmove_or_from_values( let pattern4_0 = arg2; let pattern5_0 = arg3; let pattern6_0 = arg4; - // Rule at src/isa/x64/inst.isle line 1615. + // Rule at src/isa/x64/inst.isle line 1642. let expr0_0 = constructor_put_in_xmm_mem(ctx, pattern5_0)?; let expr1_0 = constructor_put_in_xmm(ctx, pattern6_0)?; let expr2_0 = constructor_cmove_or_xmm( @@ -2023,7 +2181,7 @@ pub fn constructor_cmove_or_from_values( let pattern4_0 = arg2; let pattern5_0 = arg3; let pattern6_0 = arg4; - // Rule at src/isa/x64/inst.isle line 1612. + // Rule at src/isa/x64/inst.isle line 1639. let expr0_0 = constructor_put_in_gpr_mem(ctx, pattern5_0)?; let expr1_0 = constructor_put_in_gpr(ctx, pattern6_0)?; let expr2_0 = @@ -2034,6 +2192,23 @@ pub fn constructor_cmove_or_from_values( return None; } +// Generated as internal constructor for term setcc. +pub fn constructor_setcc(ctx: &mut C, arg0: &CC) -> Option { + let pattern0_0 = arg0; + // Rule at src/isa/x64/inst.isle line 1647. + let expr0_0 = C::temp_writable_gpr(ctx); + let expr1_0 = MInst::Setcc { + cc: pattern0_0.clone(), + dst: expr0_0, + }; + let expr2_0 = constructor_writable_gpr_to_r_reg(ctx, expr0_0)?; + let expr3_0 = ConsumesFlags::ConsumesFlagsReturnsReg { + inst: expr1_0, + result: expr2_0, + }; + return Some(expr3_0); +} + // Generated as internal constructor for term movzx. pub fn constructor_movzx( ctx: &mut C, @@ -2044,7 +2219,7 @@ pub fn constructor_movzx( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1620. + // Rule at src/isa/x64/inst.isle line 1655. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = MInst::MovzxRmR { ext_mode: pattern1_0.clone(), @@ -2066,7 +2241,7 @@ pub fn constructor_movsx( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1627. + // Rule at src/isa/x64/inst.isle line 1662. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = MInst::MovsxRmR { ext_mode: pattern1_0.clone(), @@ -2090,7 +2265,7 @@ pub fn constructor_xmm_rm_r( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/x64/inst.isle line 1634. + // Rule at src/isa/x64/inst.isle line 1669. let expr0_0 = C::temp_writable_xmm(ctx); let expr1_0 = MInst::XmmRmR { op: pattern1_0.clone(), @@ -2107,7 +2282,7 @@ pub fn constructor_xmm_rm_r( pub fn constructor_paddb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1641. + // Rule at src/isa/x64/inst.isle line 1676. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Paddb; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2118,7 +2293,7 @@ pub fn constructor_paddb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> O pub fn constructor_paddw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1646. + // Rule at src/isa/x64/inst.isle line 1681. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Paddw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2129,7 +2304,7 @@ pub fn constructor_paddw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> O pub fn constructor_paddd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1651. + // Rule at src/isa/x64/inst.isle line 1686. let expr0_0: Type = I32X4; let expr1_0 = SseOpcode::Paddd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2140,7 +2315,7 @@ pub fn constructor_paddd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> O pub fn constructor_paddq(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1656. + // Rule at src/isa/x64/inst.isle line 1691. let expr0_0: Type = I64X2; let expr1_0 = SseOpcode::Paddq; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2151,7 +2326,7 @@ pub fn constructor_paddq(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> O pub fn constructor_paddsb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1661. + // Rule at src/isa/x64/inst.isle line 1696. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Paddsb; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2162,7 +2337,7 @@ pub fn constructor_paddsb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> pub fn constructor_paddsw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1666. + // Rule at src/isa/x64/inst.isle line 1701. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Paddsw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2173,7 +2348,7 @@ pub fn constructor_paddsw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> pub fn constructor_paddusb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1671. + // Rule at src/isa/x64/inst.isle line 1706. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Paddusb; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2184,7 +2359,7 @@ pub fn constructor_paddusb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> pub fn constructor_paddusw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1676. + // Rule at src/isa/x64/inst.isle line 1711. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Paddusw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2195,7 +2370,7 @@ pub fn constructor_paddusw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> pub fn constructor_psubb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1681. + // Rule at src/isa/x64/inst.isle line 1716. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Psubb; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2206,7 +2381,7 @@ pub fn constructor_psubb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> O pub fn constructor_psubw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1686. + // Rule at src/isa/x64/inst.isle line 1721. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Psubw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2217,7 +2392,7 @@ pub fn constructor_psubw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> O pub fn constructor_psubd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1691. + // Rule at src/isa/x64/inst.isle line 1726. let expr0_0: Type = I32X4; let expr1_0 = SseOpcode::Psubd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2228,7 +2403,7 @@ pub fn constructor_psubd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> O pub fn constructor_psubq(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1696. + // Rule at src/isa/x64/inst.isle line 1731. let expr0_0: Type = I64X2; let expr1_0 = SseOpcode::Psubq; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2239,7 +2414,7 @@ pub fn constructor_psubq(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> O pub fn constructor_psubsb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1701. + // Rule at src/isa/x64/inst.isle line 1736. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Psubsb; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2250,7 +2425,7 @@ pub fn constructor_psubsb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> pub fn constructor_psubsw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1706. + // Rule at src/isa/x64/inst.isle line 1741. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Psubsw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2261,7 +2436,7 @@ pub fn constructor_psubsw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> pub fn constructor_psubusb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1711. + // Rule at src/isa/x64/inst.isle line 1746. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Psubusb; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2272,7 +2447,7 @@ pub fn constructor_psubusb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> pub fn constructor_psubusw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1716. + // Rule at src/isa/x64/inst.isle line 1751. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Psubusw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2283,7 +2458,7 @@ pub fn constructor_psubusw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> pub fn constructor_pavgb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1721. + // Rule at src/isa/x64/inst.isle line 1756. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pavgb; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2294,7 +2469,7 @@ pub fn constructor_pavgb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> O pub fn constructor_pavgw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1726. + // Rule at src/isa/x64/inst.isle line 1761. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Pavgw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2305,7 +2480,7 @@ pub fn constructor_pavgw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> O pub fn constructor_pand(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1731. + // Rule at src/isa/x64/inst.isle line 1766. let expr0_0: Type = F32X4; let expr1_0 = SseOpcode::Pand; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2316,7 +2491,7 @@ pub fn constructor_pand(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Op pub fn constructor_andps(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1736. + // Rule at src/isa/x64/inst.isle line 1771. let expr0_0: Type = F32X4; let expr1_0 = SseOpcode::Andps; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2327,7 +2502,7 @@ pub fn constructor_andps(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> O pub fn constructor_andpd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1741. + // Rule at src/isa/x64/inst.isle line 1776. let expr0_0: Type = F64X2; let expr1_0 = SseOpcode::Andpd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2338,7 +2513,7 @@ pub fn constructor_andpd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> O pub fn constructor_por(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1746. + // Rule at src/isa/x64/inst.isle line 1781. let expr0_0: Type = F32X4; let expr1_0 = SseOpcode::Por; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2349,7 +2524,7 @@ pub fn constructor_por(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Opt pub fn constructor_orps(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1751. + // Rule at src/isa/x64/inst.isle line 1786. let expr0_0: Type = F32X4; let expr1_0 = SseOpcode::Orps; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2360,7 +2535,7 @@ pub fn constructor_orps(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Op pub fn constructor_orpd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1756. + // Rule at src/isa/x64/inst.isle line 1791. let expr0_0: Type = F64X2; let expr1_0 = SseOpcode::Orpd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2371,7 +2546,7 @@ pub fn constructor_orpd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Op pub fn constructor_pxor(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1761. + // Rule at src/isa/x64/inst.isle line 1796. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pxor; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2382,7 +2557,7 @@ pub fn constructor_pxor(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Op pub fn constructor_xorps(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1766. + // Rule at src/isa/x64/inst.isle line 1801. let expr0_0: Type = F32X4; let expr1_0 = SseOpcode::Xorps; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2393,7 +2568,7 @@ pub fn constructor_xorps(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> O pub fn constructor_xorpd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1771. + // Rule at src/isa/x64/inst.isle line 1806. let expr0_0: Type = F64X2; let expr1_0 = SseOpcode::Xorpd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2404,7 +2579,7 @@ pub fn constructor_xorpd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> O pub fn constructor_pmullw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1776. + // Rule at src/isa/x64/inst.isle line 1811. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Pmullw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2415,7 +2590,7 @@ pub fn constructor_pmullw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> pub fn constructor_pmulld(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1781. + // Rule at src/isa/x64/inst.isle line 1816. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Pmulld; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2426,7 +2601,7 @@ pub fn constructor_pmulld(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> pub fn constructor_pmulhw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1786. + // Rule at src/isa/x64/inst.isle line 1821. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Pmulhw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2437,7 +2612,7 @@ pub fn constructor_pmulhw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> pub fn constructor_pmulhuw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1791. + // Rule at src/isa/x64/inst.isle line 1826. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Pmulhuw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2448,7 +2623,7 @@ pub fn constructor_pmulhuw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> pub fn constructor_pmuldq(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1796. + // Rule at src/isa/x64/inst.isle line 1831. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Pmuldq; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2459,7 +2634,7 @@ pub fn constructor_pmuldq(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> pub fn constructor_pmuludq(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1801. + // Rule at src/isa/x64/inst.isle line 1836. let expr0_0: Type = I64X2; let expr1_0 = SseOpcode::Pmuludq; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2470,7 +2645,7 @@ pub fn constructor_pmuludq(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> pub fn constructor_punpckhwd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1806. + // Rule at src/isa/x64/inst.isle line 1841. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Punpckhwd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2481,7 +2656,7 @@ pub fn constructor_punpckhwd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_punpcklwd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1811. + // Rule at src/isa/x64/inst.isle line 1846. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Punpcklwd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2492,7 +2667,7 @@ pub fn constructor_punpcklwd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_andnps(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1816. + // Rule at src/isa/x64/inst.isle line 1851. let expr0_0: Type = F32X4; let expr1_0 = SseOpcode::Andnps; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2503,7 +2678,7 @@ pub fn constructor_andnps(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> pub fn constructor_andnpd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1821. + // Rule at src/isa/x64/inst.isle line 1856. let expr0_0: Type = F64X2; let expr1_0 = SseOpcode::Andnpd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2514,7 +2689,7 @@ pub fn constructor_andnpd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> pub fn constructor_pandn(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1826. + // Rule at src/isa/x64/inst.isle line 1861. let expr0_0: Type = F64X2; let expr1_0 = SseOpcode::Pandn; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2525,17 +2700,17 @@ pub fn constructor_pandn(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> O pub fn constructor_sse_blend_op(ctx: &mut C, arg0: Type) -> Option { let pattern0_0 = arg0; if pattern0_0 == F32X4 { - // Rule at src/isa/x64/inst.isle line 1830. + // Rule at src/isa/x64/inst.isle line 1865. let expr0_0 = SseOpcode::Blendvps; return Some(expr0_0); } if pattern0_0 == F64X2 { - // Rule at src/isa/x64/inst.isle line 1831. + // Rule at src/isa/x64/inst.isle line 1866. let expr0_0 = SseOpcode::Blendvpd; return Some(expr0_0); } if let Some((pattern1_0, pattern1_1)) = C::multi_lane(ctx, pattern0_0) { - // Rule at src/isa/x64/inst.isle line 1832. + // Rule at src/isa/x64/inst.isle line 1867. let expr0_0 = SseOpcode::Pblendvb; return Some(expr0_0); } @@ -2546,17 +2721,17 @@ pub fn constructor_sse_blend_op(ctx: &mut C, arg0: Type) -> Option(ctx: &mut C, arg0: Type) -> Option { let pattern0_0 = arg0; if pattern0_0 == F32X4 { - // Rule at src/isa/x64/inst.isle line 1835. + // Rule at src/isa/x64/inst.isle line 1870. let expr0_0 = SseOpcode::Movaps; return Some(expr0_0); } if pattern0_0 == F64X2 { - // Rule at src/isa/x64/inst.isle line 1836. + // Rule at src/isa/x64/inst.isle line 1871. let expr0_0 = SseOpcode::Movapd; return Some(expr0_0); } if let Some((pattern1_0, pattern1_1)) = C::multi_lane(ctx, pattern0_0) { - // Rule at src/isa/x64/inst.isle line 1837. + // Rule at src/isa/x64/inst.isle line 1872. let expr0_0 = SseOpcode::Movdqa; return Some(expr0_0); } @@ -2575,7 +2750,7 @@ pub fn constructor_sse_blend( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/x64/inst.isle line 1841. + // Rule at src/isa/x64/inst.isle line 1876. let expr0_0 = C::xmm0(ctx); let expr1_0 = constructor_sse_mov_op(ctx, pattern0_0)?; let expr2_0 = MInst::XmmUnaryRmR { @@ -2599,7 +2774,7 @@ pub fn constructor_blendvpd( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1855. + // Rule at src/isa/x64/inst.isle line 1890. let expr0_0 = C::xmm0(ctx); let expr1_0 = SseOpcode::Movapd; let expr2_0 = C::xmm_to_xmm_mem(ctx, pattern2_0); @@ -2619,7 +2794,7 @@ pub fn constructor_blendvpd( pub fn constructor_movsd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1869. + // Rule at src/isa/x64/inst.isle line 1904. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Movsd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2630,7 +2805,7 @@ pub fn constructor_movsd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> O pub fn constructor_movlhps(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1874. + // Rule at src/isa/x64/inst.isle line 1909. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Movlhps; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2641,7 +2816,7 @@ pub fn constructor_movlhps(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> pub fn constructor_pmaxsb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1879. + // Rule at src/isa/x64/inst.isle line 1914. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pmaxsb; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2652,7 +2827,7 @@ pub fn constructor_pmaxsb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> pub fn constructor_pmaxsw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1884. + // Rule at src/isa/x64/inst.isle line 1919. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pmaxsw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2663,7 +2838,7 @@ pub fn constructor_pmaxsw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> pub fn constructor_pmaxsd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1889. + // Rule at src/isa/x64/inst.isle line 1924. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pmaxsd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2674,7 +2849,7 @@ pub fn constructor_pmaxsd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> pub fn constructor_pminsb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1894. + // Rule at src/isa/x64/inst.isle line 1929. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pminsb; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2685,7 +2860,7 @@ pub fn constructor_pminsb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> pub fn constructor_pminsw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1899. + // Rule at src/isa/x64/inst.isle line 1934. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pminsw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2696,7 +2871,7 @@ pub fn constructor_pminsw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> pub fn constructor_pminsd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1904. + // Rule at src/isa/x64/inst.isle line 1939. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pminsd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2707,7 +2882,7 @@ pub fn constructor_pminsd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> pub fn constructor_pmaxub(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1909. + // Rule at src/isa/x64/inst.isle line 1944. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pmaxub; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2718,7 +2893,7 @@ pub fn constructor_pmaxub(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> pub fn constructor_pmaxuw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1914. + // Rule at src/isa/x64/inst.isle line 1949. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pmaxuw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2729,7 +2904,7 @@ pub fn constructor_pmaxuw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> pub fn constructor_pmaxud(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1919. + // Rule at src/isa/x64/inst.isle line 1954. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pmaxud; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2740,7 +2915,7 @@ pub fn constructor_pmaxud(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> pub fn constructor_pminub(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1924. + // Rule at src/isa/x64/inst.isle line 1959. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pminub; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2751,7 +2926,7 @@ pub fn constructor_pminub(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> pub fn constructor_pminuw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1929. + // Rule at src/isa/x64/inst.isle line 1964. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pminuw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2762,7 +2937,7 @@ pub fn constructor_pminuw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> pub fn constructor_pminud(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1934. + // Rule at src/isa/x64/inst.isle line 1969. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pminud; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2773,7 +2948,7 @@ pub fn constructor_pminud(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> pub fn constructor_punpcklbw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1939. + // Rule at src/isa/x64/inst.isle line 1974. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Punpcklbw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2784,7 +2959,7 @@ pub fn constructor_punpcklbw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_punpckhbw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1944. + // Rule at src/isa/x64/inst.isle line 1979. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Punpckhbw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2795,7 +2970,7 @@ pub fn constructor_punpckhbw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_packsswb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1949. + // Rule at src/isa/x64/inst.isle line 1984. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Packsswb; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2816,7 +2991,7 @@ pub fn constructor_xmm_rm_r_imm( let pattern2_0 = arg2; let pattern3_0 = arg3; let pattern4_0 = arg4; - // Rule at src/isa/x64/inst.isle line 1954. + // Rule at src/isa/x64/inst.isle line 1989. let expr0_0 = C::temp_writable_xmm(ctx); let expr1_0 = C::writable_xmm_to_reg(ctx, expr0_0); let expr2_0 = MInst::XmmRmRImm { @@ -2844,7 +3019,7 @@ pub fn constructor_palignr( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/x64/inst.isle line 1966. + // Rule at src/isa/x64/inst.isle line 2001. let expr0_0 = SseOpcode::Palignr; let expr1_0 = C::xmm_to_reg(ctx, pattern0_0); let expr2_0 = C::xmm_mem_to_reg_mem(ctx, pattern1_0); @@ -2863,7 +3038,7 @@ pub fn constructor_cmpps( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1975. + // Rule at src/isa/x64/inst.isle line 2010. let expr0_0 = SseOpcode::Cmpps; let expr1_0 = C::xmm_to_reg(ctx, pattern0_0); let expr2_0 = C::xmm_mem_to_reg_mem(ctx, pattern1_0); @@ -2883,7 +3058,7 @@ pub fn constructor_pinsrb( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1984. + // Rule at src/isa/x64/inst.isle line 2019. let expr0_0 = SseOpcode::Pinsrb; let expr1_0 = C::xmm_to_reg(ctx, pattern0_0); let expr2_0 = C::gpr_mem_to_reg_mem(ctx, pattern1_0); @@ -2902,7 +3077,7 @@ pub fn constructor_pinsrw( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1993. + // Rule at src/isa/x64/inst.isle line 2028. let expr0_0 = SseOpcode::Pinsrw; let expr1_0 = C::xmm_to_reg(ctx, pattern0_0); let expr2_0 = C::gpr_mem_to_reg_mem(ctx, pattern1_0); @@ -2923,7 +3098,7 @@ pub fn constructor_pinsrd( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/x64/inst.isle line 2002. + // Rule at src/isa/x64/inst.isle line 2037. let expr0_0 = SseOpcode::Pinsrd; let expr1_0 = C::xmm_to_reg(ctx, pattern0_0); let expr2_0 = C::gpr_mem_to_reg_mem(ctx, pattern1_0); @@ -2942,7 +3117,7 @@ pub fn constructor_insertps( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 2011. + // Rule at src/isa/x64/inst.isle line 2046. let expr0_0 = SseOpcode::Insertps; let expr1_0 = C::xmm_to_reg(ctx, pattern0_0); let expr2_0 = C::xmm_mem_to_reg_mem(ctx, pattern1_0); @@ -2961,7 +3136,7 @@ pub fn constructor_pshufd( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 2020. + // Rule at src/isa/x64/inst.isle line 2055. let expr0_0 = C::temp_writable_xmm(ctx); let expr1_0 = SseOpcode::Pshufd; let expr2_0 = constructor_writable_xmm_to_r_reg(ctx, expr0_0)?; @@ -2980,6 +3155,24 @@ pub fn constructor_pshufd( return Some(expr7_0); } +// Generated as internal constructor for term pshufb. +pub fn constructor_pshufb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { + let pattern0_0 = arg0; + let pattern1_0 = arg1; + // Rule at src/isa/x64/inst.isle line 2067. + let expr0_0 = C::temp_writable_xmm(ctx); + let expr1_0 = SseOpcode::Pshufb; + let expr2_0 = MInst::XmmRmR { + op: expr1_0, + src1: pattern0_0, + src2: pattern1_0.clone(), + dst: expr0_0, + }; + let expr3_0 = C::emit(ctx, &expr2_0); + let expr4_0 = C::writable_xmm_to_xmm(ctx, expr0_0); + return Some(expr4_0); +} + // Generated as internal constructor for term xmm_unary_rm_r. pub fn constructor_xmm_unary_rm_r( ctx: &mut C, @@ -2988,7 +3181,7 @@ pub fn constructor_xmm_unary_rm_r( ) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2032. + // Rule at src/isa/x64/inst.isle line 2077. let expr0_0 = C::temp_writable_xmm(ctx); let expr1_0 = MInst::XmmUnaryRmR { op: pattern0_0.clone(), @@ -3003,7 +3196,7 @@ pub fn constructor_xmm_unary_rm_r( // Generated as internal constructor for term pmovsxbw. pub fn constructor_pmovsxbw(ctx: &mut C, arg0: &XmmMem) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 2039. + // Rule at src/isa/x64/inst.isle line 2084. let expr0_0 = SseOpcode::Pmovsxbw; let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern0_0)?; return Some(expr1_0); @@ -3012,7 +3205,7 @@ pub fn constructor_pmovsxbw(ctx: &mut C, arg0: &XmmMem) -> Option(ctx: &mut C, arg0: &XmmMem) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 2044. + // Rule at src/isa/x64/inst.isle line 2089. let expr0_0 = SseOpcode::Pmovzxbw; let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern0_0)?; return Some(expr1_0); @@ -3021,7 +3214,7 @@ pub fn constructor_pmovzxbw(ctx: &mut C, arg0: &XmmMem) -> Option(ctx: &mut C, arg0: &XmmMem) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 2049. + // Rule at src/isa/x64/inst.isle line 2094. let expr0_0 = SseOpcode::Pabsb; let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern0_0)?; return Some(expr1_0); @@ -3030,7 +3223,7 @@ pub fn constructor_pabsb(ctx: &mut C, arg0: &XmmMem) -> Option // Generated as internal constructor for term pabsw. pub fn constructor_pabsw(ctx: &mut C, arg0: &XmmMem) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 2054. + // Rule at src/isa/x64/inst.isle line 2099. let expr0_0 = SseOpcode::Pabsw; let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern0_0)?; return Some(expr1_0); @@ -3039,7 +3232,7 @@ pub fn constructor_pabsw(ctx: &mut C, arg0: &XmmMem) -> Option // Generated as internal constructor for term pabsd. pub fn constructor_pabsd(ctx: &mut C, arg0: &XmmMem) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 2059. + // Rule at src/isa/x64/inst.isle line 2104. let expr0_0 = SseOpcode::Pabsd; let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern0_0)?; return Some(expr1_0); @@ -3053,7 +3246,7 @@ pub fn constructor_xmm_unary_rm_r_evex( ) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2064. + // Rule at src/isa/x64/inst.isle line 2109. let expr0_0 = C::temp_writable_xmm(ctx); let expr1_0 = MInst::XmmUnaryRmREvex { op: pattern0_0.clone(), @@ -3068,12 +3261,21 @@ pub fn constructor_xmm_unary_rm_r_evex( // Generated as internal constructor for term vpabsq. pub fn constructor_vpabsq(ctx: &mut C, arg0: &XmmMem) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 2071. + // Rule at src/isa/x64/inst.isle line 2116. let expr0_0 = Avx512Opcode::Vpabsq; let expr1_0 = constructor_xmm_unary_rm_r_evex(ctx, &expr0_0, pattern0_0)?; return Some(expr1_0); } +// Generated as internal constructor for term vpopcntb. +pub fn constructor_vpopcntb(ctx: &mut C, arg0: &XmmMem) -> Option { + let pattern0_0 = arg0; + // Rule at src/isa/x64/inst.isle line 2121. + let expr0_0 = Avx512Opcode::Vpopcntb; + let expr1_0 = constructor_xmm_unary_rm_r_evex(ctx, &expr0_0, pattern0_0)?; + return Some(expr1_0); +} + // Generated as internal constructor for term xmm_rm_r_evex. pub fn constructor_xmm_rm_r_evex( ctx: &mut C, @@ -3084,7 +3286,7 @@ pub fn constructor_xmm_rm_r_evex( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 2076. + // Rule at src/isa/x64/inst.isle line 2126. let expr0_0 = C::temp_writable_xmm(ctx); let expr1_0 = MInst::XmmRmREvex { op: pattern0_0.clone(), @@ -3101,7 +3303,7 @@ pub fn constructor_xmm_rm_r_evex( pub fn constructor_vpmullq(ctx: &mut C, arg0: &XmmMem, arg1: Xmm) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2088. + // Rule at src/isa/x64/inst.isle line 2138. let expr0_0 = Avx512Opcode::Vpmullq; let expr1_0 = constructor_xmm_rm_r_evex(ctx, &expr0_0, pattern0_0, pattern1_0)?; return Some(expr1_0); @@ -3119,7 +3321,7 @@ pub fn constructor_mul_hi( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/x64/inst.isle line 2097. + // Rule at src/isa/x64/inst.isle line 2147. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = C::temp_writable_gpr(ctx); let expr2_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); @@ -3148,7 +3350,7 @@ pub fn constructor_mulhi_u( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 2112. + // Rule at src/isa/x64/inst.isle line 2162. let expr0_0: bool = false; let expr1_0 = constructor_mul_hi(ctx, pattern0_0, expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -3164,7 +3366,7 @@ pub fn constructor_xmm_rmi_xmm( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 2117. + // Rule at src/isa/x64/inst.isle line 2167. let expr0_0 = C::temp_writable_xmm(ctx); let expr1_0 = MInst::XmmRmiReg { opcode: pattern0_0.clone(), @@ -3181,7 +3383,7 @@ pub fn constructor_xmm_rmi_xmm( pub fn constructor_psllw(ctx: &mut C, arg0: Xmm, arg1: &XmmMemImm) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2127. + // Rule at src/isa/x64/inst.isle line 2177. let expr0_0 = SseOpcode::Psllw; let expr1_0 = constructor_xmm_rmi_xmm(ctx, &expr0_0, pattern0_0, pattern1_0)?; return Some(expr1_0); @@ -3191,7 +3393,7 @@ pub fn constructor_psllw(ctx: &mut C, arg0: Xmm, arg1: &XmmMemImm) - pub fn constructor_pslld(ctx: &mut C, arg0: Xmm, arg1: &XmmMemImm) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2132. + // Rule at src/isa/x64/inst.isle line 2182. let expr0_0 = SseOpcode::Pslld; let expr1_0 = constructor_xmm_rmi_xmm(ctx, &expr0_0, pattern0_0, pattern1_0)?; return Some(expr1_0); @@ -3201,7 +3403,7 @@ pub fn constructor_pslld(ctx: &mut C, arg0: Xmm, arg1: &XmmMemImm) - pub fn constructor_psllq(ctx: &mut C, arg0: Xmm, arg1: &XmmMemImm) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2137. + // Rule at src/isa/x64/inst.isle line 2187. let expr0_0 = SseOpcode::Psllq; let expr1_0 = constructor_xmm_rmi_xmm(ctx, &expr0_0, pattern0_0, pattern1_0)?; return Some(expr1_0); @@ -3211,7 +3413,7 @@ pub fn constructor_psllq(ctx: &mut C, arg0: Xmm, arg1: &XmmMemImm) - pub fn constructor_psrlw(ctx: &mut C, arg0: Xmm, arg1: &XmmMemImm) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2142. + // Rule at src/isa/x64/inst.isle line 2192. let expr0_0 = SseOpcode::Psrlw; let expr1_0 = constructor_xmm_rmi_xmm(ctx, &expr0_0, pattern0_0, pattern1_0)?; return Some(expr1_0); @@ -3221,7 +3423,7 @@ pub fn constructor_psrlw(ctx: &mut C, arg0: Xmm, arg1: &XmmMemImm) - pub fn constructor_psrld(ctx: &mut C, arg0: Xmm, arg1: &XmmMemImm) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2147. + // Rule at src/isa/x64/inst.isle line 2197. let expr0_0 = SseOpcode::Psrld; let expr1_0 = constructor_xmm_rmi_xmm(ctx, &expr0_0, pattern0_0, pattern1_0)?; return Some(expr1_0); @@ -3231,7 +3433,7 @@ pub fn constructor_psrld(ctx: &mut C, arg0: Xmm, arg1: &XmmMemImm) - pub fn constructor_psrlq(ctx: &mut C, arg0: Xmm, arg1: &XmmMemImm) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2152. + // Rule at src/isa/x64/inst.isle line 2202. let expr0_0 = SseOpcode::Psrlq; let expr1_0 = constructor_xmm_rmi_xmm(ctx, &expr0_0, pattern0_0, pattern1_0)?; return Some(expr1_0); @@ -3241,7 +3443,7 @@ pub fn constructor_psrlq(ctx: &mut C, arg0: Xmm, arg1: &XmmMemImm) - pub fn constructor_psraw(ctx: &mut C, arg0: Xmm, arg1: &XmmMemImm) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2157. + // Rule at src/isa/x64/inst.isle line 2207. let expr0_0 = SseOpcode::Psraw; let expr1_0 = constructor_xmm_rmi_xmm(ctx, &expr0_0, pattern0_0, pattern1_0)?; return Some(expr1_0); @@ -3251,7 +3453,7 @@ pub fn constructor_psraw(ctx: &mut C, arg0: Xmm, arg1: &XmmMemImm) - pub fn constructor_psrad(ctx: &mut C, arg0: Xmm, arg1: &XmmMemImm) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2162. + // Rule at src/isa/x64/inst.isle line 2212. let expr0_0 = SseOpcode::Psrad; let expr1_0 = constructor_xmm_rmi_xmm(ctx, &expr0_0, pattern0_0, pattern1_0)?; return Some(expr1_0); @@ -3262,7 +3464,7 @@ pub fn constructor_pextrd(ctx: &mut C, arg0: Type, arg1: Xmm, arg2: let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 2167. + // Rule at src/isa/x64/inst.isle line 2217. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = SseOpcode::Pextrd; let expr2_0 = constructor_writable_gpr_to_r_reg(ctx, expr0_0)?; @@ -3295,7 +3497,7 @@ pub fn constructor_cmppd( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 2183. + // Rule at src/isa/x64/inst.isle line 2233. let expr0_0 = SseOpcode::Cmppd; let expr1_0 = C::xmm_to_reg(ctx, pattern0_0); let expr2_0 = C::xmm_mem_to_reg_mem(ctx, pattern1_0); @@ -3315,7 +3517,7 @@ pub fn constructor_gpr_to_xmm( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 2192. + // Rule at src/isa/x64/inst.isle line 2242. let expr0_0 = C::temp_writable_xmm(ctx); let expr1_0 = MInst::GprToXmm { op: pattern0_0.clone(), @@ -3332,7 +3534,7 @@ pub fn constructor_gpr_to_xmm( pub fn constructor_not(ctx: &mut C, arg0: Type, arg1: Gpr) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2199. + // Rule at src/isa/x64/inst.isle line 2249. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); let expr2_0 = MInst::Not { @@ -3349,7 +3551,7 @@ pub fn constructor_not(ctx: &mut C, arg0: Type, arg1: Gpr) -> Option pub fn constructor_neg(ctx: &mut C, arg0: Type, arg1: Gpr) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2207. + // Rule at src/isa/x64/inst.isle line 2257. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); let expr2_0 = MInst::Neg { @@ -3365,7 +3567,7 @@ pub fn constructor_neg(ctx: &mut C, arg0: Type, arg1: Gpr) -> Option // Generated as internal constructor for term lea. pub fn constructor_lea(ctx: &mut C, arg0: &SyntheticAmode) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 2214. + // Rule at src/isa/x64/inst.isle line 2264. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = MInst::LoadEffectiveAddress { addr: pattern0_0.clone(), @@ -3379,7 +3581,7 @@ pub fn constructor_lea(ctx: &mut C, arg0: &SyntheticAmode) -> Option // Generated as internal constructor for term ud2. pub fn constructor_ud2(ctx: &mut C, arg0: &TrapCode) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 2221. + // Rule at src/isa/x64/inst.isle line 2271. let expr0_0 = MInst::Ud2 { trap_code: pattern0_0.clone(), }; @@ -3387,10 +3589,162 @@ pub fn constructor_ud2(ctx: &mut C, arg0: &TrapCode) -> Option(ctx: &mut C, arg0: Type, arg1: Gpr) -> Option { + let pattern0_0 = arg0; + let pattern1_0 = arg1; + // Rule at src/isa/x64/inst.isle line 2276. + let expr0_0 = C::temp_writable_gpr(ctx); + let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); + let expr2_0 = UnaryRmROpcode::Lzcnt; + let expr3_0 = C::gpr_to_gpr_mem(ctx, pattern1_0); + let expr4_0 = MInst::UnaryRmR { + size: expr1_0, + op: expr2_0, + src: expr3_0, + dst: expr0_0, + }; + let expr5_0 = C::emit(ctx, &expr4_0); + let expr6_0 = C::writable_gpr_to_gpr(ctx, expr0_0); + return Some(expr6_0); +} + +// Generated as internal constructor for term tzcnt. +pub fn constructor_tzcnt(ctx: &mut C, arg0: Type, arg1: Gpr) -> Option { + let pattern0_0 = arg0; + let pattern1_0 = arg1; + // Rule at src/isa/x64/inst.isle line 2284. + let expr0_0 = C::temp_writable_gpr(ctx); + let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); + let expr2_0 = UnaryRmROpcode::Tzcnt; + let expr3_0 = C::gpr_to_gpr_mem(ctx, pattern1_0); + let expr4_0 = MInst::UnaryRmR { + size: expr1_0, + op: expr2_0, + src: expr3_0, + dst: expr0_0, + }; + let expr5_0 = C::emit(ctx, &expr4_0); + let expr6_0 = C::writable_gpr_to_gpr(ctx, expr0_0); + return Some(expr6_0); +} + +// Generated as internal constructor for term bsr. +pub fn constructor_bsr(ctx: &mut C, arg0: Type, arg1: Gpr) -> Option { + let pattern0_0 = arg0; + let pattern1_0 = arg1; + // Rule at src/isa/x64/inst.isle line 2292. + let expr0_0 = C::temp_writable_gpr(ctx); + let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); + let expr2_0 = UnaryRmROpcode::Bsr; + let expr3_0 = C::gpr_to_gpr_mem(ctx, pattern1_0); + let expr4_0 = MInst::UnaryRmR { + size: expr1_0, + op: expr2_0, + src: expr3_0, + dst: expr0_0, + }; + let expr5_0 = constructor_writable_gpr_to_r_reg(ctx, expr0_0)?; + let expr6_0 = ProducesFlags::ProducesFlagsReturnsReg { + inst: expr4_0, + result: expr5_0, + }; + return Some(expr6_0); +} + +// Generated as internal constructor for term bsr_or_else. +pub fn constructor_bsr_or_else( + ctx: &mut C, + arg0: Type, + arg1: Gpr, + arg2: Gpr, +) -> Option { + let pattern0_0 = arg0; + let pattern1_0 = arg1; + let pattern2_0 = arg2; + // Rule at src/isa/x64/inst.isle line 2301. + let expr0_0 = constructor_bsr(ctx, pattern0_0, pattern1_0)?; + let expr1_0 = constructor_produces_flags_get_reg(ctx, &expr0_0)?; + let expr2_0 = C::gpr_new(ctx, expr1_0); + let expr3_0 = CC::Z; + let expr4_0 = C::gpr_to_gpr_mem(ctx, pattern2_0); + let expr5_0 = constructor_cmove(ctx, pattern0_0, &expr3_0, &expr4_0, expr2_0)?; + let expr6_0 = constructor_produces_flags_ignore(ctx, &expr0_0)?; + let expr7_0 = constructor_with_flags_reg(ctx, &expr6_0, &expr5_0)?; + let expr8_0 = C::gpr_new(ctx, expr7_0); + return Some(expr8_0); +} + +// Generated as internal constructor for term bsf. +pub fn constructor_bsf(ctx: &mut C, arg0: Type, arg1: Gpr) -> Option { + let pattern0_0 = arg0; + let pattern1_0 = arg1; + // Rule at src/isa/x64/inst.isle line 2312. + let expr0_0 = C::temp_writable_gpr(ctx); + let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); + let expr2_0 = UnaryRmROpcode::Bsf; + let expr3_0 = C::gpr_to_gpr_mem(ctx, pattern1_0); + let expr4_0 = MInst::UnaryRmR { + size: expr1_0, + op: expr2_0, + src: expr3_0, + dst: expr0_0, + }; + let expr5_0 = constructor_writable_gpr_to_r_reg(ctx, expr0_0)?; + let expr6_0 = ProducesFlags::ProducesFlagsReturnsReg { + inst: expr4_0, + result: expr5_0, + }; + return Some(expr6_0); +} + +// Generated as internal constructor for term bsf_or_else. +pub fn constructor_bsf_or_else( + ctx: &mut C, + arg0: Type, + arg1: Gpr, + arg2: Gpr, +) -> Option { + let pattern0_0 = arg0; + let pattern1_0 = arg1; + let pattern2_0 = arg2; + // Rule at src/isa/x64/inst.isle line 2321. + let expr0_0 = constructor_bsf(ctx, pattern0_0, pattern1_0)?; + let expr1_0 = constructor_produces_flags_get_reg(ctx, &expr0_0)?; + let expr2_0 = C::gpr_new(ctx, expr1_0); + let expr3_0 = CC::Z; + let expr4_0 = C::gpr_to_gpr_mem(ctx, pattern2_0); + let expr5_0 = constructor_cmove(ctx, pattern0_0, &expr3_0, &expr4_0, expr2_0)?; + let expr6_0 = constructor_produces_flags_ignore(ctx, &expr0_0)?; + let expr7_0 = constructor_with_flags_reg(ctx, &expr6_0, &expr5_0)?; + let expr8_0 = C::gpr_new(ctx, expr7_0); + return Some(expr8_0); +} + +// Generated as internal constructor for term x64_popcnt. +pub fn constructor_x64_popcnt(ctx: &mut C, arg0: Type, arg1: Gpr) -> Option { + let pattern0_0 = arg0; + let pattern1_0 = arg1; + // Rule at src/isa/x64/inst.isle line 2332. + let expr0_0 = C::temp_writable_gpr(ctx); + let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); + let expr2_0 = UnaryRmROpcode::Popcnt; + let expr3_0 = C::gpr_to_gpr_mem(ctx, pattern1_0); + let expr4_0 = MInst::UnaryRmR { + size: expr1_0, + op: expr2_0, + src: expr3_0, + dst: expr0_0, + }; + let expr5_0 = C::emit(ctx, &expr4_0); + let expr6_0 = C::writable_gpr_to_gpr(ctx, expr0_0); + return Some(expr6_0); +} + // Generated as internal constructor for term reg_to_xmm_mem. pub fn constructor_reg_to_xmm_mem(ctx: &mut C, arg0: Reg) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 2270. + // Rule at src/isa/x64/inst.isle line 2388. let expr0_0 = C::xmm_new(ctx, pattern0_0); let expr1_0 = C::xmm_to_xmm_mem(ctx, expr0_0); return Some(expr1_0); @@ -3399,7 +3753,7 @@ pub fn constructor_reg_to_xmm_mem(ctx: &mut C, arg0: Reg) -> Option< // Generated as internal constructor for term xmm_to_reg_mem. pub fn constructor_xmm_to_reg_mem(ctx: &mut C, arg0: Reg) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 2273. + // Rule at src/isa/x64/inst.isle line 2391. let expr0_0 = C::xmm_new(ctx, pattern0_0); let expr1_0 = C::xmm_to_reg(ctx, expr0_0); let expr2_0 = RegMem::Reg { reg: expr1_0 }; @@ -3413,19 +3767,43 @@ pub fn constructor_writable_gpr_to_r_reg( arg0: WritableGpr, ) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 2277. + // Rule at src/isa/x64/inst.isle line 2395. let expr0_0 = C::writable_gpr_to_reg(ctx, pattern0_0); let expr1_0 = C::writable_reg_to_reg(ctx, expr0_0); return Some(expr1_0); } +// Generated as internal constructor for term writable_gpr_to_gpr_mem. +pub fn constructor_writable_gpr_to_gpr_mem( + ctx: &mut C, + arg0: WritableGpr, +) -> Option { + let pattern0_0 = arg0; + // Rule at src/isa/x64/inst.isle line 2398. + let expr0_0 = C::writable_gpr_to_gpr(ctx, pattern0_0); + let expr1_0 = C::gpr_to_gpr_mem(ctx, expr0_0); + return Some(expr1_0); +} + +// Generated as internal constructor for term writable_gpr_to_value_regs. +pub fn constructor_writable_gpr_to_value_regs( + ctx: &mut C, + arg0: WritableGpr, +) -> Option { + let pattern0_0 = arg0; + // Rule at src/isa/x64/inst.isle line 2401. + let expr0_0 = constructor_writable_gpr_to_r_reg(ctx, pattern0_0)?; + let expr1_0 = C::value_reg(ctx, expr0_0); + return Some(expr1_0); +} + // Generated as internal constructor for term writable_xmm_to_r_reg. pub fn constructor_writable_xmm_to_r_reg( ctx: &mut C, arg0: WritableXmm, ) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 2280. + // Rule at src/isa/x64/inst.isle line 2404. let expr0_0 = C::writable_xmm_to_reg(ctx, pattern0_0); let expr1_0 = C::writable_reg_to_reg(ctx, expr0_0); return Some(expr1_0); @@ -3437,19 +3815,31 @@ pub fn constructor_writable_xmm_to_xmm_mem( arg0: WritableXmm, ) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 2283. + // Rule at src/isa/x64/inst.isle line 2407. let expr0_0 = C::writable_xmm_to_xmm(ctx, pattern0_0); let expr1_0 = C::xmm_to_xmm_mem(ctx, expr0_0); return Some(expr1_0); } +// Generated as internal constructor for term writable_xmm_to_value_regs. +pub fn constructor_writable_xmm_to_value_regs( + ctx: &mut C, + arg0: WritableXmm, +) -> Option { + let pattern0_0 = arg0; + // Rule at src/isa/x64/inst.isle line 2410. + let expr0_0 = constructor_writable_xmm_to_r_reg(ctx, pattern0_0)?; + let expr1_0 = C::value_reg(ctx, expr0_0); + return Some(expr1_0); +} + // Generated as internal constructor for term synthetic_amode_to_gpr_mem. pub fn constructor_synthetic_amode_to_gpr_mem( ctx: &mut C, arg0: &SyntheticAmode, ) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 2287. + // Rule at src/isa/x64/inst.isle line 2414. let expr0_0 = C::synthetic_amode_to_reg_mem(ctx, pattern0_0); let expr1_0 = C::reg_mem_to_gpr_mem(ctx, &expr0_0); return Some(expr1_0); @@ -3461,7 +3851,7 @@ pub fn constructor_synthetic_amode_to_xmm_mem( arg0: &SyntheticAmode, ) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 2290. + // Rule at src/isa/x64/inst.isle line 2417. let expr0_0 = C::synthetic_amode_to_reg_mem(ctx, pattern0_0); let expr1_0 = C::reg_mem_to_xmm_mem(ctx, &expr0_0); return Some(expr1_0); @@ -3469,6 +3859,136 @@ pub fn constructor_synthetic_amode_to_xmm_mem( // Generated as internal constructor for term lower. pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { + let pattern0_0 = arg0; + if let Some(pattern1_0) = C::first_result(ctx, pattern0_0) { + let pattern2_0 = C::value_type(ctx, pattern1_0); + if pattern2_0 == I128 { + if let Some(()) = C::use_popcnt(ctx, pattern2_0) { + let pattern5_0 = C::inst_data(ctx, pattern0_0); + if let &InstructionData::Unary { + opcode: ref pattern6_0, + arg: pattern6_1, + } = &pattern5_0 + { + if let &Opcode::Popcnt = pattern6_0 { + // Rule at src/isa/x64/lower.isle line 1628. + let expr0_0: Type = I64; + let expr1_0 = C::put_in_regs(ctx, pattern6_1); + let expr2_0: usize = 0; + let expr3_0 = constructor_value_regs_get_gpr(ctx, expr1_0, expr2_0)?; + let expr4_0 = constructor_x64_popcnt(ctx, expr0_0, expr3_0)?; + let expr5_0: Type = I64; + let expr6_0 = C::put_in_regs(ctx, pattern6_1); + let expr7_0: usize = 1; + let expr8_0 = constructor_value_regs_get_gpr(ctx, expr6_0, expr7_0)?; + let expr9_0 = constructor_x64_popcnt(ctx, expr5_0, expr8_0)?; + let expr10_0: Type = I64; + let expr11_0 = C::gpr_to_gpr_mem_imm(ctx, expr9_0); + let expr12_0 = constructor_add(ctx, expr10_0, expr4_0, &expr11_0)?; + let expr13_0 = C::gpr_to_reg(ctx, expr12_0); + let expr14_0: Type = I64; + let expr15_0: u64 = 0; + let expr16_0 = constructor_imm(ctx, expr14_0, expr15_0)?; + let expr17_0 = C::value_regs(ctx, expr13_0, expr16_0); + let expr18_0 = C::output(ctx, expr17_0); + return Some(expr18_0); + } + } + } + } + if pattern2_0 == I8X16 { + if let Some(()) = C::avx512vl_enabled(ctx, pattern2_0) { + if let Some(()) = C::avx512bitalg_enabled(ctx, pattern2_0) { + let pattern6_0 = C::inst_data(ctx, pattern0_0); + if let &InstructionData::Unary { + opcode: ref pattern7_0, + arg: pattern7_1, + } = &pattern6_0 + { + if let &Opcode::Popcnt = pattern7_0 { + // Rule at src/isa/x64/lower.isle line 1720. + let expr0_0 = constructor_put_in_xmm_mem(ctx, pattern7_1)?; + let expr1_0 = constructor_vpopcntb(ctx, &expr0_0)?; + let expr2_0 = constructor_output_xmm(ctx, expr1_0)?; + return Some(expr2_0); + } + } + } + } + } + if let Some(pattern3_0) = C::ty_32_or_64(ctx, pattern2_0) { + if let Some(()) = C::use_lzcnt(ctx, pattern2_0) { + let pattern5_0 = C::inst_data(ctx, pattern0_0); + if let &InstructionData::Unary { + opcode: ref pattern6_0, + arg: pattern6_1, + } = &pattern5_0 + { + if let &Opcode::Clz = pattern6_0 { + // Rule at src/isa/x64/lower.isle line 1536. + let expr0_0 = constructor_put_in_gpr(ctx, pattern6_1)?; + let expr1_0 = constructor_lzcnt(ctx, pattern3_0, expr0_0)?; + let expr2_0 = constructor_output_gpr(ctx, expr1_0)?; + return Some(expr2_0); + } + } + } + if let Some(()) = C::use_bmi1(ctx, pattern2_0) { + let pattern5_0 = C::inst_data(ctx, pattern0_0); + if let &InstructionData::Unary { + opcode: ref pattern6_0, + arg: pattern6_1, + } = &pattern5_0 + { + if let &Opcode::Ctz = pattern6_0 { + // Rule at src/isa/x64/lower.isle line 1578. + let expr0_0 = constructor_put_in_gpr(ctx, pattern6_1)?; + let expr1_0 = constructor_tzcnt(ctx, pattern3_0, expr0_0)?; + let expr2_0 = constructor_output_gpr(ctx, expr1_0)?; + return Some(expr2_0); + } + } + } + if let Some(()) = C::use_popcnt(ctx, pattern2_0) { + let pattern5_0 = C::inst_data(ctx, pattern0_0); + if let &InstructionData::Unary { + opcode: ref pattern6_0, + arg: pattern6_1, + } = &pattern5_0 + { + if let &Opcode::Popcnt = pattern6_0 { + // Rule at src/isa/x64/lower.isle line 1614. + let expr0_0 = constructor_put_in_gpr(ctx, pattern6_1)?; + let expr1_0 = constructor_x64_popcnt(ctx, pattern3_0, expr0_0)?; + let expr2_0 = constructor_output_gpr(ctx, expr1_0)?; + return Some(expr2_0); + } + } + } + } + if let Some(pattern3_0) = C::ty_8_or_16(ctx, pattern2_0) { + if let Some(()) = C::use_popcnt(ctx, pattern2_0) { + let pattern5_0 = C::inst_data(ctx, pattern0_0); + if let &InstructionData::Unary { + opcode: ref pattern6_0, + arg: pattern6_1, + } = &pattern5_0 + { + if let &Opcode::Popcnt = pattern6_0 { + // Rule at src/isa/x64/lower.isle line 1621. + let expr0_0: Type = I32; + let expr1_0: Type = I32; + let expr2_0 = ExtendKind::Zero; + let expr3_0 = + constructor_extend_to_gpr(ctx, pattern6_1, expr1_0, &expr2_0)?; + let expr4_0 = constructor_x64_popcnt(ctx, expr0_0, expr3_0)?; + let expr5_0 = constructor_output_gpr(ctx, expr4_0)?; + return Some(expr5_0); + } + } + } + } + } let pattern0_0 = arg0; let pattern1_0 = C::inst_data(ctx, pattern0_0); match &pattern1_0 { @@ -3536,6 +4056,44 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { + match pattern2_0 { + &Opcode::IsNull => { + let pattern4_0 = C::value_type(ctx, pattern2_1); + if pattern4_0 == R64 { + // Rule at src/isa/x64/lower.isle line 1849. + let expr0_0 = OperandSize::Size64; + let expr1_0: u32 = 0; + let expr2_0 = constructor_put_in_gpr(ctx, pattern2_1)?; + let expr3_0 = constructor_cmp_imm(ctx, &expr0_0, expr1_0, expr2_0)?; + let expr4_0 = CC::Z; + let expr5_0 = constructor_setcc(ctx, &expr4_0)?; + let expr6_0 = constructor_with_flags(ctx, &expr3_0, &expr5_0)?; + let expr7_0 = C::output(ctx, expr6_0); + return Some(expr7_0); + } + } + &Opcode::IsInvalid => { + let pattern4_0 = C::value_type(ctx, pattern2_1); + if pattern4_0 == R64 { + // Rule at src/isa/x64/lower.isle line 1857. + let expr0_0 = OperandSize::Size64; + let expr1_0: u32 = 4294967295; + let expr2_0 = constructor_put_in_gpr(ctx, pattern2_1)?; + let expr3_0 = constructor_cmp_imm(ctx, &expr0_0, expr1_0, expr2_0)?; + let expr4_0 = CC::Z; + let expr5_0 = constructor_setcc(ctx, &expr4_0)?; + let expr6_0 = constructor_with_flags(ctx, &expr3_0, &expr5_0)?; + let expr7_0 = C::output(ctx, expr6_0); + return Some(expr7_0); + } + } + _ => {} + } + } _ => {} } if let Some(pattern1_0) = C::first_result(ctx, pattern0_0) { @@ -3644,6 +4202,74 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option {} } } + if pattern2_0 == I8 { + let pattern4_0 = C::inst_data(ctx, pattern0_0); + if let &InstructionData::Unary { + opcode: ref pattern5_0, + arg: pattern5_1, + } = &pattern4_0 + { + if let &Opcode::Bitrev = pattern5_0 { + // Rule at src/isa/x64/lower.isle line 1771. + let expr0_0: Type = I32; + let expr1_0 = constructor_put_in_gpr(ctx, pattern5_1)?; + let expr2_0 = constructor_do_bitrev8(ctx, expr0_0, expr1_0)?; + let expr3_0 = constructor_output_gpr(ctx, expr2_0)?; + return Some(expr3_0); + } + } + } + if pattern2_0 == I16 { + let pattern4_0 = C::inst_data(ctx, pattern0_0); + if let &InstructionData::Unary { + opcode: ref pattern5_0, + arg: pattern5_1, + } = &pattern4_0 + { + if let &Opcode::Bitrev = pattern5_0 { + // Rule at src/isa/x64/lower.isle line 1774. + let expr0_0: Type = I32; + let expr1_0 = constructor_put_in_gpr(ctx, pattern5_1)?; + let expr2_0 = constructor_do_bitrev16(ctx, expr0_0, expr1_0)?; + let expr3_0 = constructor_output_gpr(ctx, expr2_0)?; + return Some(expr3_0); + } + } + } + if pattern2_0 == I32 { + let pattern4_0 = C::inst_data(ctx, pattern0_0); + if let &InstructionData::Unary { + opcode: ref pattern5_0, + arg: pattern5_1, + } = &pattern4_0 + { + if let &Opcode::Bitrev = pattern5_0 { + // Rule at src/isa/x64/lower.isle line 1777. + let expr0_0: Type = I32; + let expr1_0 = constructor_put_in_gpr(ctx, pattern5_1)?; + let expr2_0 = constructor_do_bitrev32(ctx, expr0_0, expr1_0)?; + let expr3_0 = constructor_output_gpr(ctx, expr2_0)?; + return Some(expr3_0); + } + } + } + if pattern2_0 == I64 { + let pattern4_0 = C::inst_data(ctx, pattern0_0); + if let &InstructionData::Unary { + opcode: ref pattern5_0, + arg: pattern5_1, + } = &pattern4_0 + { + if let &Opcode::Bitrev = pattern5_0 { + // Rule at src/isa/x64/lower.isle line 1780. + let expr0_0: Type = I64; + let expr1_0 = constructor_put_in_gpr(ctx, pattern5_1)?; + let expr2_0 = constructor_do_bitrev64(ctx, expr0_0, expr1_0)?; + let expr3_0 = constructor_output_gpr(ctx, expr2_0)?; + return Some(expr3_0); + } + } + } if pattern2_0 == I128 { let pattern4_0 = C::inst_data(ctx, pattern0_0); match &pattern4_0 { @@ -3882,11 +4508,129 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { - if let &Opcode::Bnot = pattern5_0 { - // Rule at src/isa/x64/lower.isle line 1266. - let expr0_0 = constructor_i128_not(ctx, pattern5_1)?; - let expr1_0 = C::output(ctx, expr0_0); - return Some(expr1_0); + match pattern5_0 { + &Opcode::Bnot => { + // Rule at src/isa/x64/lower.isle line 1266. + let expr0_0 = constructor_i128_not(ctx, pattern5_1)?; + let expr1_0 = C::output(ctx, expr0_0); + return Some(expr1_0); + } + &Opcode::Bitrev => { + // Rule at src/isa/x64/lower.isle line 1783. + let expr0_0: Type = I64; + let expr1_0 = C::put_in_regs(ctx, pattern5_1); + let expr2_0: usize = 1; + let expr3_0 = constructor_value_regs_get_gpr(ctx, expr1_0, expr2_0)?; + let expr4_0 = constructor_do_bitrev64(ctx, expr0_0, expr3_0)?; + let expr5_0 = C::gpr_to_reg(ctx, expr4_0); + let expr6_0: Type = I64; + let expr7_0 = C::put_in_regs(ctx, pattern5_1); + let expr8_0: usize = 0; + let expr9_0 = constructor_value_regs_get_gpr(ctx, expr7_0, expr8_0)?; + let expr10_0 = constructor_do_bitrev64(ctx, expr6_0, expr9_0)?; + let expr11_0 = C::gpr_to_reg(ctx, expr10_0); + let expr12_0 = C::value_regs(ctx, expr5_0, expr11_0); + let expr13_0 = C::output(ctx, expr12_0); + return Some(expr13_0); + } + &Opcode::Clz => { + // Rule at src/isa/x64/lower.isle line 1553. + let expr0_0: Type = I64; + let expr1_0: Type = I64; + let expr2_0 = C::put_in_regs(ctx, pattern5_1); + let expr3_0: usize = 1; + let expr4_0 = constructor_value_regs_get_gpr(ctx, expr2_0, expr3_0)?; + let expr5_0 = constructor_do_clz(ctx, expr0_0, expr1_0, expr4_0)?; + let expr6_0: Type = I64; + let expr7_0: Type = I64; + let expr8_0: Type = I64; + let expr9_0 = C::put_in_regs(ctx, pattern5_1); + let expr10_0: usize = 0; + let expr11_0 = constructor_value_regs_get_gpr(ctx, expr9_0, expr10_0)?; + let expr12_0 = constructor_do_clz(ctx, expr7_0, expr8_0, expr11_0)?; + let expr13_0: u32 = 64; + let expr14_0 = RegMemImm::Imm { simm32: expr13_0 }; + let expr15_0 = C::gpr_mem_imm_new(ctx, &expr14_0); + let expr16_0 = constructor_add(ctx, expr6_0, expr12_0, &expr15_0)?; + let expr17_0 = OperandSize::Size64; + let expr18_0: u32 = 64; + let expr19_0 = constructor_cmp_imm(ctx, &expr17_0, expr18_0, expr5_0)?; + let expr20_0: Type = I64; + let expr21_0 = CC::NZ; + let expr22_0 = C::gpr_to_gpr_mem(ctx, expr5_0); + let expr23_0 = + constructor_cmove(ctx, expr20_0, &expr21_0, &expr22_0, expr16_0)?; + let expr24_0 = constructor_with_flags_reg(ctx, &expr19_0, &expr23_0)?; + let expr25_0 = C::gpr_new(ctx, expr24_0); + let expr26_0 = C::gpr_to_reg(ctx, expr25_0); + let expr27_0: Type = I64; + let expr28_0: u64 = 0; + let expr29_0 = constructor_imm(ctx, expr27_0, expr28_0)?; + let expr30_0 = C::value_regs(ctx, expr26_0, expr29_0); + let expr31_0 = C::output(ctx, expr30_0); + return Some(expr31_0); + } + &Opcode::Ctz => { + // Rule at src/isa/x64/lower.isle line 1595. + let expr0_0: Type = I64; + let expr1_0: Type = I64; + let expr2_0 = C::put_in_regs(ctx, pattern5_1); + let expr3_0: usize = 0; + let expr4_0 = constructor_value_regs_get_gpr(ctx, expr2_0, expr3_0)?; + let expr5_0 = constructor_do_ctz(ctx, expr0_0, expr1_0, expr4_0)?; + let expr6_0: Type = I64; + let expr7_0: Type = I64; + let expr8_0: Type = I64; + let expr9_0 = C::put_in_regs(ctx, pattern5_1); + let expr10_0: usize = 1; + let expr11_0 = constructor_value_regs_get_gpr(ctx, expr9_0, expr10_0)?; + let expr12_0 = constructor_do_ctz(ctx, expr7_0, expr8_0, expr11_0)?; + let expr13_0: u32 = 64; + let expr14_0 = RegMemImm::Imm { simm32: expr13_0 }; + let expr15_0 = C::gpr_mem_imm_new(ctx, &expr14_0); + let expr16_0 = constructor_add(ctx, expr6_0, expr12_0, &expr15_0)?; + let expr17_0 = OperandSize::Size64; + let expr18_0: u32 = 64; + let expr19_0 = constructor_cmp_imm(ctx, &expr17_0, expr18_0, expr5_0)?; + let expr20_0: Type = I64; + let expr21_0 = CC::Z; + let expr22_0 = C::gpr_to_gpr_mem(ctx, expr16_0); + let expr23_0 = + constructor_cmove(ctx, expr20_0, &expr21_0, &expr22_0, expr5_0)?; + let expr24_0 = constructor_with_flags_reg(ctx, &expr19_0, &expr23_0)?; + let expr25_0 = C::gpr_new(ctx, expr24_0); + let expr26_0 = C::gpr_to_reg(ctx, expr25_0); + let expr27_0: Type = I64; + let expr28_0: u64 = 0; + let expr29_0 = constructor_imm(ctx, expr27_0, expr28_0)?; + let expr30_0 = C::value_regs(ctx, expr26_0, expr29_0); + let expr31_0 = C::output(ctx, expr30_0); + return Some(expr31_0); + } + &Opcode::Popcnt => { + // Rule at src/isa/x64/lower.isle line 1647. + let expr0_0: Type = I64; + let expr1_0 = C::put_in_regs(ctx, pattern5_1); + let expr2_0: usize = 0; + let expr3_0 = constructor_value_regs_get_gpr(ctx, expr1_0, expr2_0)?; + let expr4_0 = constructor_do_popcnt(ctx, expr0_0, expr3_0)?; + let expr5_0: Type = I64; + let expr6_0 = C::put_in_regs(ctx, pattern5_1); + let expr7_0: usize = 1; + let expr8_0 = constructor_value_regs_get_gpr(ctx, expr6_0, expr7_0)?; + let expr9_0 = constructor_do_popcnt(ctx, expr5_0, expr8_0)?; + let expr10_0: Type = I64; + let expr11_0 = C::gpr_to_gpr_mem_imm(ctx, expr9_0); + let expr12_0 = constructor_add(ctx, expr10_0, expr4_0, &expr11_0)?; + let expr13_0 = C::gpr_to_reg(ctx, expr12_0); + let expr14_0: Type = I64; + let expr15_0: u64 = 0; + let expr16_0 = constructor_imm(ctx, expr14_0, expr15_0)?; + let expr17_0 = C::value_regs(ctx, expr13_0, expr16_0); + let expr18_0 = C::output(ctx, expr17_0); + return Some(expr18_0); + } + _ => {} } } &InstructionData::BinaryImm64 { @@ -4049,6 +4793,36 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { + // Rule at src/isa/x64/lower.isle line 1754. + let expr0_0 = C::popcount_4bit_table(ctx); + let expr1_0: Type = I8X16; + let expr2_0 = C::popcount_low_mask(ctx); + let expr3_0 = constructor_xmm_load_const(ctx, expr1_0, expr2_0)?; + let expr4_0: Type = I8X16; + let expr5_0 = constructor_put_in_xmm(ctx, pattern5_1)?; + let expr6_0 = C::xmm_to_xmm_mem(ctx, expr3_0); + let expr7_0 = constructor_sse_and(ctx, expr4_0, expr5_0, &expr6_0)?; + let expr8_0 = constructor_put_in_xmm(ctx, pattern5_1)?; + let expr9_0: u32 = 4; + let expr10_0 = RegMemImm::Imm { simm32: expr9_0 }; + let expr11_0 = constructor_mov_rmi_to_xmm(ctx, &expr10_0)?; + let expr12_0 = constructor_psrlw(ctx, expr8_0, &expr11_0)?; + let expr13_0: Type = I8X16; + let expr14_0 = C::xmm_to_xmm_mem(ctx, expr3_0); + let expr15_0 = constructor_sse_and(ctx, expr13_0, expr12_0, &expr14_0)?; + let expr16_0: Type = I8X16; + let expr17_0 = C::popcount_4bit_table(ctx); + let expr18_0 = constructor_xmm_load_const(ctx, expr16_0, expr17_0)?; + let expr19_0 = C::xmm_to_xmm_mem(ctx, expr7_0); + let expr20_0 = constructor_pshufb(ctx, expr18_0, &expr19_0)?; + let expr21_0 = C::xmm_to_xmm_mem(ctx, expr15_0); + let expr22_0 = constructor_pshufb(ctx, expr18_0, &expr21_0)?; + let expr23_0 = C::xmm_to_xmm_mem(ctx, expr22_0); + let expr24_0 = constructor_paddb(ctx, expr20_0, &expr23_0)?; + let expr25_0 = constructor_output_xmm(ctx, expr24_0)?; + return Some(expr25_0); + } _ => {} } } @@ -6243,152 +7017,242 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { - let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, pattern5_1); - if let Some(pattern8_0) = C::def_inst(ctx, pattern7_1) { - let pattern9_0 = C::inst_data(ctx, pattern8_0); - if let &InstructionData::UnaryImm { - opcode: ref pattern10_0, - imm: pattern10_1, - } = &pattern9_0 - { - if let &Opcode::Iconst = pattern10_0 { - let pattern12_0 = C::u64_from_imm64(ctx, pattern10_1); - // Rule at src/isa/x64/lower.isle line 823. - let expr0_0 = constructor_put_in_gpr(ctx, pattern7_0)?; - let expr1_0 = - C::const_to_type_masked_imm8(ctx, pattern12_0, pattern3_0); - let expr2_0 = - constructor_x64_rotl(ctx, pattern3_0, expr0_0, &expr1_0)?; - let expr3_0 = constructor_output_gpr(ctx, expr2_0)?; - return Some(expr3_0); + match &pattern4_0 { + &InstructionData::Binary { + opcode: ref pattern5_0, + args: ref pattern5_1, + } => { + match pattern5_0 { + &Opcode::Rotl => { + let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, pattern5_1); + if let Some(pattern8_0) = C::def_inst(ctx, pattern7_1) { + let pattern9_0 = C::inst_data(ctx, pattern8_0); + if let &InstructionData::UnaryImm { + opcode: ref pattern10_0, + imm: pattern10_1, + } = &pattern9_0 + { + if let &Opcode::Iconst = pattern10_0 { + let pattern12_0 = C::u64_from_imm64(ctx, pattern10_1); + // Rule at src/isa/x64/lower.isle line 823. + let expr0_0 = constructor_put_in_gpr(ctx, pattern7_0)?; + let expr1_0 = C::const_to_type_masked_imm8( + ctx, + pattern12_0, + pattern3_0, + ); + let expr2_0 = constructor_x64_rotl( + ctx, pattern3_0, expr0_0, &expr1_0, + )?; + let expr3_0 = constructor_output_gpr(ctx, expr2_0)?; + return Some(expr3_0); + } } } + // Rule at src/isa/x64/lower.isle line 817. + let expr0_0 = constructor_lo_gpr(ctx, pattern7_1)?; + let expr1_0 = constructor_put_in_gpr(ctx, pattern7_0)?; + let expr2_0 = C::gpr_to_imm8_gpr(ctx, expr0_0); + let expr3_0 = constructor_x64_rotl(ctx, pattern3_0, expr1_0, &expr2_0)?; + let expr4_0 = constructor_output_gpr(ctx, expr3_0)?; + return Some(expr4_0); } - // Rule at src/isa/x64/lower.isle line 817. - let expr0_0 = constructor_lo_gpr(ctx, pattern7_1)?; - let expr1_0 = constructor_put_in_gpr(ctx, pattern7_0)?; - let expr2_0 = C::gpr_to_imm8_gpr(ctx, expr0_0); - let expr3_0 = constructor_x64_rotl(ctx, pattern3_0, expr1_0, &expr2_0)?; - let expr4_0 = constructor_output_gpr(ctx, expr3_0)?; - return Some(expr4_0); - } - &Opcode::Rotr => { - let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, pattern5_1); - if let Some(pattern8_0) = C::def_inst(ctx, pattern7_1) { - let pattern9_0 = C::inst_data(ctx, pattern8_0); - if let &InstructionData::UnaryImm { - opcode: ref pattern10_0, - imm: pattern10_1, - } = &pattern9_0 - { - if let &Opcode::Iconst = pattern10_0 { - let pattern12_0 = C::u64_from_imm64(ctx, pattern10_1); - // Rule at src/isa/x64/lower.isle line 863. - let expr0_0 = constructor_put_in_gpr(ctx, pattern7_0)?; - let expr1_0 = - C::const_to_type_masked_imm8(ctx, pattern12_0, pattern3_0); - let expr2_0 = - constructor_x64_rotr(ctx, pattern3_0, expr0_0, &expr1_0)?; - let expr3_0 = constructor_output_gpr(ctx, expr2_0)?; - return Some(expr3_0); + &Opcode::Rotr => { + let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, pattern5_1); + if let Some(pattern8_0) = C::def_inst(ctx, pattern7_1) { + let pattern9_0 = C::inst_data(ctx, pattern8_0); + if let &InstructionData::UnaryImm { + opcode: ref pattern10_0, + imm: pattern10_1, + } = &pattern9_0 + { + if let &Opcode::Iconst = pattern10_0 { + let pattern12_0 = C::u64_from_imm64(ctx, pattern10_1); + // Rule at src/isa/x64/lower.isle line 863. + let expr0_0 = constructor_put_in_gpr(ctx, pattern7_0)?; + let expr1_0 = C::const_to_type_masked_imm8( + ctx, + pattern12_0, + pattern3_0, + ); + let expr2_0 = constructor_x64_rotr( + ctx, pattern3_0, expr0_0, &expr1_0, + )?; + let expr3_0 = constructor_output_gpr(ctx, expr2_0)?; + return Some(expr3_0); + } } } + // Rule at src/isa/x64/lower.isle line 857. + let expr0_0 = constructor_lo_gpr(ctx, pattern7_1)?; + let expr1_0 = constructor_put_in_gpr(ctx, pattern7_0)?; + let expr2_0 = C::gpr_to_imm8_gpr(ctx, expr0_0); + let expr3_0 = constructor_x64_rotr(ctx, pattern3_0, expr1_0, &expr2_0)?; + let expr4_0 = constructor_output_gpr(ctx, expr3_0)?; + return Some(expr4_0); } - // Rule at src/isa/x64/lower.isle line 857. - let expr0_0 = constructor_lo_gpr(ctx, pattern7_1)?; - let expr1_0 = constructor_put_in_gpr(ctx, pattern7_0)?; - let expr2_0 = C::gpr_to_imm8_gpr(ctx, expr0_0); - let expr3_0 = constructor_x64_rotr(ctx, pattern3_0, expr1_0, &expr2_0)?; - let expr4_0 = constructor_output_gpr(ctx, expr3_0)?; - return Some(expr4_0); + _ => {} } - _ => {} } + &InstructionData::Unary { + opcode: ref pattern5_0, + arg: pattern5_1, + } => { + match pattern5_0 { + &Opcode::Clz => { + // Rule at src/isa/x64/lower.isle line 1543. + let expr0_0 = constructor_put_in_gpr(ctx, pattern5_1)?; + let expr1_0 = constructor_do_clz(ctx, pattern3_0, pattern3_0, expr0_0)?; + let expr2_0 = constructor_output_gpr(ctx, expr1_0)?; + return Some(expr2_0); + } + &Opcode::Ctz => { + // Rule at src/isa/x64/lower.isle line 1585. + let expr0_0 = constructor_put_in_gpr(ctx, pattern5_1)?; + let expr1_0 = constructor_do_ctz(ctx, pattern3_0, pattern3_0, expr0_0)?; + let expr2_0 = constructor_output_gpr(ctx, expr1_0)?; + return Some(expr2_0); + } + &Opcode::Popcnt => { + // Rule at src/isa/x64/lower.isle line 1637. + let expr0_0 = constructor_put_in_gpr(ctx, pattern5_1)?; + let expr1_0 = constructor_do_popcnt(ctx, pattern3_0, expr0_0)?; + let expr2_0 = constructor_output_gpr(ctx, expr1_0)?; + return Some(expr2_0); + } + _ => {} + } + } + _ => {} } } if let Some(pattern3_0) = C::ty_8_or_16(ctx, pattern2_0) { let pattern4_0 = C::inst_data(ctx, pattern0_0); - if let &InstructionData::Binary { - opcode: ref pattern5_0, - args: ref pattern5_1, - } = &pattern4_0 - { - match pattern5_0 { - &Opcode::Rotl => { - let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, pattern5_1); - if let Some(pattern8_0) = C::def_inst(ctx, pattern7_1) { - let pattern9_0 = C::inst_data(ctx, pattern8_0); - if let &InstructionData::UnaryImm { - opcode: ref pattern10_0, - imm: pattern10_1, - } = &pattern9_0 - { - if let &Opcode::Iconst = pattern10_0 { - let pattern12_0 = C::u64_from_imm64(ctx, pattern10_1); - // Rule at src/isa/x64/lower.isle line 809. - let expr0_0 = constructor_put_in_gpr(ctx, pattern7_0)?; - let expr1_0 = - C::const_to_type_masked_imm8(ctx, pattern12_0, pattern3_0); - let expr2_0 = - constructor_x64_rotl(ctx, pattern3_0, expr0_0, &expr1_0)?; - let expr3_0 = constructor_output_gpr(ctx, expr2_0)?; - return Some(expr3_0); + match &pattern4_0 { + &InstructionData::Binary { + opcode: ref pattern5_0, + args: ref pattern5_1, + } => { + match pattern5_0 { + &Opcode::Rotl => { + let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, pattern5_1); + if let Some(pattern8_0) = C::def_inst(ctx, pattern7_1) { + let pattern9_0 = C::inst_data(ctx, pattern8_0); + if let &InstructionData::UnaryImm { + opcode: ref pattern10_0, + imm: pattern10_1, + } = &pattern9_0 + { + if let &Opcode::Iconst = pattern10_0 { + let pattern12_0 = C::u64_from_imm64(ctx, pattern10_1); + // Rule at src/isa/x64/lower.isle line 809. + let expr0_0 = constructor_put_in_gpr(ctx, pattern7_0)?; + let expr1_0 = C::const_to_type_masked_imm8( + ctx, + pattern12_0, + pattern3_0, + ); + let expr2_0 = constructor_x64_rotl( + ctx, pattern3_0, expr0_0, &expr1_0, + )?; + let expr3_0 = constructor_output_gpr(ctx, expr2_0)?; + return Some(expr3_0); + } } } + // Rule at src/isa/x64/lower.isle line 805. + let expr0_0: Type = I32; + let expr1_0 = ExtendKind::Zero; + let expr2_0 = + constructor_extend_to_gpr(ctx, pattern7_1, expr0_0, &expr1_0)?; + let expr3_0 = constructor_put_in_gpr(ctx, pattern7_0)?; + let expr4_0 = C::gpr_to_imm8_gpr(ctx, expr2_0); + let expr5_0 = constructor_x64_rotl(ctx, pattern3_0, expr3_0, &expr4_0)?; + let expr6_0 = constructor_output_gpr(ctx, expr5_0)?; + return Some(expr6_0); } - // Rule at src/isa/x64/lower.isle line 805. - let expr0_0: Type = I32; - let expr1_0 = ExtendKind::Zero; - let expr2_0 = - constructor_extend_to_gpr(ctx, pattern7_1, expr0_0, &expr1_0)?; - let expr3_0 = constructor_put_in_gpr(ctx, pattern7_0)?; - let expr4_0 = C::gpr_to_imm8_gpr(ctx, expr2_0); - let expr5_0 = constructor_x64_rotl(ctx, pattern3_0, expr3_0, &expr4_0)?; - let expr6_0 = constructor_output_gpr(ctx, expr5_0)?; - return Some(expr6_0); - } - &Opcode::Rotr => { - let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, pattern5_1); - if let Some(pattern8_0) = C::def_inst(ctx, pattern7_1) { - let pattern9_0 = C::inst_data(ctx, pattern8_0); - if let &InstructionData::UnaryImm { - opcode: ref pattern10_0, - imm: pattern10_1, - } = &pattern9_0 - { - if let &Opcode::Iconst = pattern10_0 { - let pattern12_0 = C::u64_from_imm64(ctx, pattern10_1); - // Rule at src/isa/x64/lower.isle line 849. - let expr0_0 = constructor_put_in_gpr(ctx, pattern7_0)?; - let expr1_0 = - C::const_to_type_masked_imm8(ctx, pattern12_0, pattern3_0); - let expr2_0 = - constructor_x64_rotr(ctx, pattern3_0, expr0_0, &expr1_0)?; - let expr3_0 = constructor_output_gpr(ctx, expr2_0)?; - return Some(expr3_0); + &Opcode::Rotr => { + let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, pattern5_1); + if let Some(pattern8_0) = C::def_inst(ctx, pattern7_1) { + let pattern9_0 = C::inst_data(ctx, pattern8_0); + if let &InstructionData::UnaryImm { + opcode: ref pattern10_0, + imm: pattern10_1, + } = &pattern9_0 + { + if let &Opcode::Iconst = pattern10_0 { + let pattern12_0 = C::u64_from_imm64(ctx, pattern10_1); + // Rule at src/isa/x64/lower.isle line 849. + let expr0_0 = constructor_put_in_gpr(ctx, pattern7_0)?; + let expr1_0 = C::const_to_type_masked_imm8( + ctx, + pattern12_0, + pattern3_0, + ); + let expr2_0 = constructor_x64_rotr( + ctx, pattern3_0, expr0_0, &expr1_0, + )?; + let expr3_0 = constructor_output_gpr(ctx, expr2_0)?; + return Some(expr3_0); + } } } + // Rule at src/isa/x64/lower.isle line 845. + let expr0_0: Type = I32; + let expr1_0 = ExtendKind::Zero; + let expr2_0 = + constructor_extend_to_gpr(ctx, pattern7_1, expr0_0, &expr1_0)?; + let expr3_0 = constructor_put_in_gpr(ctx, pattern7_0)?; + let expr4_0 = C::gpr_to_imm8_gpr(ctx, expr2_0); + let expr5_0 = constructor_x64_rotr(ctx, pattern3_0, expr3_0, &expr4_0)?; + let expr6_0 = constructor_output_gpr(ctx, expr5_0)?; + return Some(expr6_0); } - // Rule at src/isa/x64/lower.isle line 845. - let expr0_0: Type = I32; - let expr1_0 = ExtendKind::Zero; - let expr2_0 = - constructor_extend_to_gpr(ctx, pattern7_1, expr0_0, &expr1_0)?; - let expr3_0 = constructor_put_in_gpr(ctx, pattern7_0)?; - let expr4_0 = C::gpr_to_imm8_gpr(ctx, expr2_0); - let expr5_0 = constructor_x64_rotr(ctx, pattern3_0, expr3_0, &expr4_0)?; - let expr6_0 = constructor_output_gpr(ctx, expr5_0)?; - return Some(expr6_0); + _ => {} } - _ => {} } + &InstructionData::Unary { + opcode: ref pattern5_0, + arg: pattern5_1, + } => { + match pattern5_0 { + &Opcode::Clz => { + // Rule at src/isa/x64/lower.isle line 1548. + let expr0_0: Type = I32; + let expr1_0: Type = I32; + let expr2_0 = ExtendKind::Zero; + let expr3_0 = + constructor_extend_to_gpr(ctx, pattern5_1, expr1_0, &expr2_0)?; + let expr4_0 = constructor_do_clz(ctx, expr0_0, pattern3_0, expr3_0)?; + let expr5_0 = constructor_output_gpr(ctx, expr4_0)?; + return Some(expr5_0); + } + &Opcode::Ctz => { + // Rule at src/isa/x64/lower.isle line 1590. + let expr0_0: Type = I32; + let expr1_0: Type = I32; + let expr2_0 = ExtendKind::Zero; + let expr3_0 = + constructor_extend_to_gpr(ctx, pattern5_1, expr1_0, &expr2_0)?; + let expr4_0 = constructor_do_ctz(ctx, expr0_0, pattern3_0, expr3_0)?; + let expr5_0 = constructor_output_gpr(ctx, expr4_0)?; + return Some(expr5_0); + } + &Opcode::Popcnt => { + // Rule at src/isa/x64/lower.isle line 1642. + let expr0_0: Type = I32; + let expr1_0: Type = I32; + let expr2_0 = ExtendKind::Zero; + let expr3_0 = + constructor_extend_to_gpr(ctx, pattern5_1, expr1_0, &expr2_0)?; + let expr4_0 = constructor_do_popcnt(ctx, expr0_0, expr3_0)?; + let expr5_0 = constructor_output_gpr(ctx, expr4_0)?; + return Some(expr5_0); + } + _ => {} + } + } + _ => {} } } } @@ -6996,3 +7860,335 @@ pub fn constructor_cmp_and_choose( } return None; } + +// Generated as internal constructor for term do_clz. +pub fn constructor_do_clz( + ctx: &mut C, + arg0: Type, + arg1: Type, + arg2: Gpr, +) -> Option { + let pattern0_0 = arg0; + let pattern1_0 = arg1; + let pattern2_0 = arg2; + // Rule at src/isa/x64/lower.isle line 1568. + let expr0_0: Type = I64; + let expr1_0: i64 = -1; + let expr2_0 = constructor_imm_i64(ctx, expr0_0, expr1_0)?; + let expr3_0 = C::gpr_new(ctx, expr2_0); + let expr4_0 = constructor_bsr_or_else(ctx, pattern0_0, pattern2_0, expr3_0)?; + let expr5_0 = C::gpr_to_reg(ctx, expr4_0); + let expr6_0 = C::ty_bits_u64(ctx, pattern1_0); + let expr7_0: u64 = 1; + let expr8_0 = C::u64_sub(ctx, expr6_0, expr7_0); + let expr9_0 = constructor_imm(ctx, pattern0_0, expr8_0)?; + let expr10_0 = C::gpr_new(ctx, expr9_0); + let expr11_0 = constructor_reg_to_gpr_mem_imm(ctx, expr5_0)?; + let expr12_0 = constructor_sub(ctx, pattern0_0, expr10_0, &expr11_0)?; + return Some(expr12_0); +} + +// Generated as internal constructor for term do_ctz. +pub fn constructor_do_ctz( + ctx: &mut C, + arg0: Type, + arg1: Type, + arg2: Gpr, +) -> Option { + let pattern0_0 = arg0; + let pattern1_0 = arg1; + let pattern2_0 = arg2; + // Rule at src/isa/x64/lower.isle line 1609. + let expr0_0: Type = I64; + let expr1_0 = C::ty_bits_u64(ctx, pattern1_0); + let expr2_0 = constructor_imm(ctx, expr0_0, expr1_0)?; + let expr3_0 = C::gpr_new(ctx, expr2_0); + let expr4_0 = constructor_bsf_or_else(ctx, pattern0_0, pattern2_0, expr3_0)?; + return Some(expr4_0); +} + +// Generated as internal constructor for term do_popcnt. +pub fn constructor_do_popcnt(ctx: &mut C, arg0: Type, arg1: Gpr) -> Option { + let pattern0_0 = arg0; + if pattern0_0 == I32 { + let pattern2_0 = arg1; + // Rule at src/isa/x64/lower.isle line 1700. + let expr0_0: Type = I32; + let expr1_0: u8 = 1; + let expr2_0 = Imm8Reg::Imm8 { imm: expr1_0 }; + let expr3_0 = C::imm8_reg_to_imm8_gpr(ctx, &expr2_0); + let expr4_0 = constructor_shr(ctx, expr0_0, pattern2_0, &expr3_0)?; + let expr5_0: Type = I32; + let expr6_0: u64 = 2004318071; + let expr7_0 = constructor_imm(ctx, expr5_0, expr6_0)?; + let expr8_0 = C::gpr_new(ctx, expr7_0); + let expr9_0: Type = I32; + let expr10_0 = C::gpr_to_gpr_mem_imm(ctx, expr8_0); + let expr11_0 = constructor_x64_and(ctx, expr9_0, expr4_0, &expr10_0)?; + let expr12_0: Type = I32; + let expr13_0 = C::gpr_to_gpr_mem_imm(ctx, expr11_0); + let expr14_0 = constructor_sub(ctx, expr12_0, pattern2_0, &expr13_0)?; + let expr15_0: Type = I32; + let expr16_0: u8 = 1; + let expr17_0 = Imm8Reg::Imm8 { imm: expr16_0 }; + let expr18_0 = C::imm8_reg_to_imm8_gpr(ctx, &expr17_0); + let expr19_0 = constructor_shr(ctx, expr15_0, expr11_0, &expr18_0)?; + let expr20_0: Type = I32; + let expr21_0 = C::gpr_to_gpr_mem_imm(ctx, expr8_0); + let expr22_0 = constructor_x64_and(ctx, expr20_0, expr19_0, &expr21_0)?; + let expr23_0: Type = I32; + let expr24_0 = C::gpr_to_gpr_mem_imm(ctx, expr22_0); + let expr25_0 = constructor_sub(ctx, expr23_0, expr14_0, &expr24_0)?; + let expr26_0: Type = I32; + let expr27_0: u8 = 1; + let expr28_0 = Imm8Reg::Imm8 { imm: expr27_0 }; + let expr29_0 = C::imm8_reg_to_imm8_gpr(ctx, &expr28_0); + let expr30_0 = constructor_shr(ctx, expr26_0, expr22_0, &expr29_0)?; + let expr31_0: Type = I32; + let expr32_0 = C::gpr_to_gpr_mem_imm(ctx, expr8_0); + let expr33_0 = constructor_x64_and(ctx, expr31_0, expr30_0, &expr32_0)?; + let expr34_0: Type = I32; + let expr35_0 = C::gpr_to_gpr_mem_imm(ctx, expr33_0); + let expr36_0 = constructor_sub(ctx, expr34_0, expr25_0, &expr35_0)?; + let expr37_0: Type = I32; + let expr38_0: Type = I32; + let expr39_0: u8 = 4; + let expr40_0 = Imm8Reg::Imm8 { imm: expr39_0 }; + let expr41_0 = C::imm8_reg_to_imm8_gpr(ctx, &expr40_0); + let expr42_0 = constructor_shr(ctx, expr38_0, expr36_0, &expr41_0)?; + let expr43_0 = C::gpr_to_gpr_mem_imm(ctx, expr36_0); + let expr44_0 = constructor_add(ctx, expr37_0, expr42_0, &expr43_0)?; + let expr45_0: Type = I32; + let expr46_0: u32 = 252645135; + let expr47_0 = RegMemImm::Imm { simm32: expr46_0 }; + let expr48_0 = C::gpr_mem_imm_new(ctx, &expr47_0); + let expr49_0 = constructor_x64_and(ctx, expr45_0, expr44_0, &expr48_0)?; + let expr50_0: Type = I32; + let expr51_0: u32 = 16843009; + let expr52_0 = RegMemImm::Imm { simm32: expr51_0 }; + let expr53_0 = C::gpr_mem_imm_new(ctx, &expr52_0); + let expr54_0 = constructor_mul(ctx, expr50_0, expr49_0, &expr53_0)?; + let expr55_0: Type = I32; + let expr56_0: u8 = 24; + let expr57_0 = Imm8Reg::Imm8 { imm: expr56_0 }; + let expr58_0 = C::imm8_reg_to_imm8_gpr(ctx, &expr57_0); + let expr59_0 = constructor_shr(ctx, expr55_0, expr54_0, &expr58_0)?; + return Some(expr59_0); + } + if pattern0_0 == I64 { + let pattern2_0 = arg1; + // Rule at src/isa/x64/lower.isle line 1657. + let expr0_0: Type = I64; + let expr1_0: u8 = 1; + let expr2_0 = Imm8Reg::Imm8 { imm: expr1_0 }; + let expr3_0 = C::imm8_reg_to_imm8_gpr(ctx, &expr2_0); + let expr4_0 = constructor_shr(ctx, expr0_0, pattern2_0, &expr3_0)?; + let expr5_0: Type = I64; + let expr6_0: u64 = 8608480567731124087; + let expr7_0 = constructor_imm(ctx, expr5_0, expr6_0)?; + let expr8_0 = C::gpr_new(ctx, expr7_0); + let expr9_0: Type = I64; + let expr10_0 = C::gpr_to_gpr_mem_imm(ctx, expr8_0); + let expr11_0 = constructor_x64_and(ctx, expr9_0, expr4_0, &expr10_0)?; + let expr12_0: Type = I64; + let expr13_0 = C::gpr_to_gpr_mem_imm(ctx, expr11_0); + let expr14_0 = constructor_sub(ctx, expr12_0, pattern2_0, &expr13_0)?; + let expr15_0: Type = I64; + let expr16_0: u8 = 1; + let expr17_0 = Imm8Reg::Imm8 { imm: expr16_0 }; + let expr18_0 = C::imm8_reg_to_imm8_gpr(ctx, &expr17_0); + let expr19_0 = constructor_shr(ctx, expr15_0, expr11_0, &expr18_0)?; + let expr20_0: Type = I64; + let expr21_0 = C::gpr_to_gpr_mem_imm(ctx, expr8_0); + let expr22_0 = constructor_x64_and(ctx, expr20_0, expr19_0, &expr21_0)?; + let expr23_0: Type = I64; + let expr24_0 = C::gpr_to_gpr_mem_imm(ctx, expr22_0); + let expr25_0 = constructor_sub(ctx, expr23_0, expr14_0, &expr24_0)?; + let expr26_0: Type = I64; + let expr27_0: u8 = 1; + let expr28_0 = Imm8Reg::Imm8 { imm: expr27_0 }; + let expr29_0 = C::imm8_reg_to_imm8_gpr(ctx, &expr28_0); + let expr30_0 = constructor_shr(ctx, expr26_0, expr22_0, &expr29_0)?; + let expr31_0: Type = I64; + let expr32_0 = C::gpr_to_gpr_mem_imm(ctx, expr8_0); + let expr33_0 = constructor_x64_and(ctx, expr31_0, expr30_0, &expr32_0)?; + let expr34_0: Type = I64; + let expr35_0 = C::gpr_to_gpr_mem_imm(ctx, expr33_0); + let expr36_0 = constructor_sub(ctx, expr34_0, expr25_0, &expr35_0)?; + let expr37_0: Type = I64; + let expr38_0: Type = I64; + let expr39_0: u8 = 4; + let expr40_0 = Imm8Reg::Imm8 { imm: expr39_0 }; + let expr41_0 = C::imm8_reg_to_imm8_gpr(ctx, &expr40_0); + let expr42_0 = constructor_shr(ctx, expr38_0, expr36_0, &expr41_0)?; + let expr43_0 = C::gpr_to_gpr_mem_imm(ctx, expr36_0); + let expr44_0 = constructor_add(ctx, expr37_0, expr42_0, &expr43_0)?; + let expr45_0: Type = I64; + let expr46_0: u64 = 1085102592571150095; + let expr47_0 = constructor_imm(ctx, expr45_0, expr46_0)?; + let expr48_0 = C::gpr_new(ctx, expr47_0); + let expr49_0: Type = I64; + let expr50_0 = C::gpr_to_gpr_mem_imm(ctx, expr48_0); + let expr51_0 = constructor_x64_and(ctx, expr49_0, expr44_0, &expr50_0)?; + let expr52_0: Type = I64; + let expr53_0: u64 = 72340172838076673; + let expr54_0 = constructor_imm(ctx, expr52_0, expr53_0)?; + let expr55_0 = C::gpr_new(ctx, expr54_0); + let expr56_0: Type = I64; + let expr57_0 = C::gpr_to_gpr_mem_imm(ctx, expr55_0); + let expr58_0 = constructor_mul(ctx, expr56_0, expr51_0, &expr57_0)?; + let expr59_0: Type = I64; + let expr60_0: u8 = 56; + let expr61_0 = Imm8Reg::Imm8 { imm: expr60_0 }; + let expr62_0 = C::imm8_reg_to_imm8_gpr(ctx, &expr61_0); + let expr63_0 = constructor_shr(ctx, expr59_0, expr58_0, &expr62_0)?; + return Some(expr63_0); + } + return None; +} + +// Generated as internal constructor for term do_bitrev8. +pub fn constructor_do_bitrev8(ctx: &mut C, arg0: Type, arg1: Gpr) -> Option { + let pattern0_0 = arg0; + let pattern1_0 = arg1; + // Rule at src/isa/x64/lower.isle line 1789. + let expr0_0 = C::ty_mask(ctx, pattern0_0); + let expr1_0: u64 = 6148914691236517205; + let expr2_0 = C::u64_and(ctx, expr0_0, expr1_0); + let expr3_0 = constructor_imm(ctx, pattern0_0, expr2_0)?; + let expr4_0 = C::gpr_new(ctx, expr3_0); + let expr5_0 = C::gpr_to_gpr_mem_imm(ctx, expr4_0); + let expr6_0 = constructor_x64_and(ctx, pattern0_0, pattern1_0, &expr5_0)?; + let expr7_0: u8 = 1; + let expr8_0 = Imm8Reg::Imm8 { imm: expr7_0 }; + let expr9_0 = C::imm8_reg_to_imm8_gpr(ctx, &expr8_0); + let expr10_0 = constructor_shr(ctx, pattern0_0, pattern1_0, &expr9_0)?; + let expr11_0 = C::gpr_to_gpr_mem_imm(ctx, expr4_0); + let expr12_0 = constructor_x64_and(ctx, pattern0_0, expr10_0, &expr11_0)?; + let expr13_0: u8 = 1; + let expr14_0 = Imm8Reg::Imm8 { imm: expr13_0 }; + let expr15_0 = C::imm8_reg_to_imm8_gpr(ctx, &expr14_0); + let expr16_0 = constructor_shl(ctx, pattern0_0, expr6_0, &expr15_0)?; + let expr17_0 = C::gpr_to_gpr_mem_imm(ctx, expr12_0); + let expr18_0 = constructor_or(ctx, pattern0_0, expr16_0, &expr17_0)?; + let expr19_0: u64 = 3689348814741910323; + let expr20_0 = C::u64_and(ctx, expr0_0, expr19_0); + let expr21_0 = constructor_imm(ctx, pattern0_0, expr20_0)?; + let expr22_0 = C::gpr_new(ctx, expr21_0); + let expr23_0 = C::gpr_to_gpr_mem_imm(ctx, expr22_0); + let expr24_0 = constructor_x64_and(ctx, pattern0_0, expr18_0, &expr23_0)?; + let expr25_0: u8 = 2; + let expr26_0 = Imm8Reg::Imm8 { imm: expr25_0 }; + let expr27_0 = C::imm8_reg_to_imm8_gpr(ctx, &expr26_0); + let expr28_0 = constructor_shr(ctx, pattern0_0, expr18_0, &expr27_0)?; + let expr29_0 = C::gpr_to_gpr_mem_imm(ctx, expr22_0); + let expr30_0 = constructor_x64_and(ctx, pattern0_0, expr28_0, &expr29_0)?; + let expr31_0: u8 = 2; + let expr32_0 = Imm8Reg::Imm8 { imm: expr31_0 }; + let expr33_0 = C::imm8_reg_to_imm8_gpr(ctx, &expr32_0); + let expr34_0 = constructor_shl(ctx, pattern0_0, expr24_0, &expr33_0)?; + let expr35_0 = C::gpr_to_gpr_mem_imm(ctx, expr30_0); + let expr36_0 = constructor_or(ctx, pattern0_0, expr34_0, &expr35_0)?; + let expr37_0: u64 = 1085102592571150095; + let expr38_0 = C::u64_and(ctx, expr0_0, expr37_0); + let expr39_0 = constructor_imm(ctx, pattern0_0, expr38_0)?; + let expr40_0 = C::gpr_new(ctx, expr39_0); + let expr41_0 = C::gpr_to_gpr_mem_imm(ctx, expr40_0); + let expr42_0 = constructor_x64_and(ctx, pattern0_0, expr36_0, &expr41_0)?; + let expr43_0: u8 = 4; + let expr44_0 = Imm8Reg::Imm8 { imm: expr43_0 }; + let expr45_0 = C::imm8_reg_to_imm8_gpr(ctx, &expr44_0); + let expr46_0 = constructor_shr(ctx, pattern0_0, expr36_0, &expr45_0)?; + let expr47_0 = C::gpr_to_gpr_mem_imm(ctx, expr40_0); + let expr48_0 = constructor_x64_and(ctx, pattern0_0, expr46_0, &expr47_0)?; + let expr49_0: u8 = 4; + let expr50_0 = Imm8Reg::Imm8 { imm: expr49_0 }; + let expr51_0 = C::imm8_reg_to_imm8_gpr(ctx, &expr50_0); + let expr52_0 = constructor_shl(ctx, pattern0_0, expr42_0, &expr51_0)?; + let expr53_0 = C::gpr_to_gpr_mem_imm(ctx, expr48_0); + let expr54_0 = constructor_or(ctx, pattern0_0, expr52_0, &expr53_0)?; + return Some(expr54_0); +} + +// Generated as internal constructor for term do_bitrev16. +pub fn constructor_do_bitrev16(ctx: &mut C, arg0: Type, arg1: Gpr) -> Option { + let pattern0_0 = arg0; + let pattern1_0 = arg1; + // Rule at src/isa/x64/lower.isle line 1812. + let expr0_0 = constructor_do_bitrev8(ctx, pattern0_0, pattern1_0)?; + let expr1_0 = C::ty_mask(ctx, pattern0_0); + let expr2_0: u64 = 71777214294589695; + let expr3_0 = C::u64_and(ctx, expr1_0, expr2_0); + let expr4_0 = constructor_imm(ctx, pattern0_0, expr3_0)?; + let expr5_0 = C::gpr_new(ctx, expr4_0); + let expr6_0 = C::gpr_to_gpr_mem_imm(ctx, expr5_0); + let expr7_0 = constructor_x64_and(ctx, pattern0_0, expr0_0, &expr6_0)?; + let expr8_0: u8 = 8; + let expr9_0 = Imm8Reg::Imm8 { imm: expr8_0 }; + let expr10_0 = C::imm8_reg_to_imm8_gpr(ctx, &expr9_0); + let expr11_0 = constructor_shr(ctx, pattern0_0, expr0_0, &expr10_0)?; + let expr12_0 = C::gpr_to_gpr_mem_imm(ctx, expr5_0); + let expr13_0 = constructor_x64_and(ctx, pattern0_0, expr11_0, &expr12_0)?; + let expr14_0: u8 = 8; + let expr15_0 = Imm8Reg::Imm8 { imm: expr14_0 }; + let expr16_0 = C::imm8_reg_to_imm8_gpr(ctx, &expr15_0); + let expr17_0 = constructor_shl(ctx, pattern0_0, expr7_0, &expr16_0)?; + let expr18_0 = C::gpr_to_gpr_mem_imm(ctx, expr13_0); + let expr19_0 = constructor_or(ctx, pattern0_0, expr17_0, &expr18_0)?; + return Some(expr19_0); +} + +// Generated as internal constructor for term do_bitrev32. +pub fn constructor_do_bitrev32(ctx: &mut C, arg0: Type, arg1: Gpr) -> Option { + let pattern0_0 = arg0; + let pattern1_0 = arg1; + // Rule at src/isa/x64/lower.isle line 1824. + let expr0_0 = constructor_do_bitrev16(ctx, pattern0_0, pattern1_0)?; + let expr1_0 = C::ty_mask(ctx, pattern0_0); + let expr2_0: u64 = 281470681808895; + let expr3_0 = C::u64_and(ctx, expr1_0, expr2_0); + let expr4_0 = constructor_imm(ctx, pattern0_0, expr3_0)?; + let expr5_0 = C::gpr_new(ctx, expr4_0); + let expr6_0 = C::gpr_to_gpr_mem_imm(ctx, expr5_0); + let expr7_0 = constructor_x64_and(ctx, pattern0_0, expr0_0, &expr6_0)?; + let expr8_0: u8 = 16; + let expr9_0 = Imm8Reg::Imm8 { imm: expr8_0 }; + let expr10_0 = C::imm8_reg_to_imm8_gpr(ctx, &expr9_0); + let expr11_0 = constructor_shr(ctx, pattern0_0, expr0_0, &expr10_0)?; + let expr12_0 = C::gpr_to_gpr_mem_imm(ctx, expr5_0); + let expr13_0 = constructor_x64_and(ctx, pattern0_0, expr11_0, &expr12_0)?; + let expr14_0: u8 = 16; + let expr15_0 = Imm8Reg::Imm8 { imm: expr14_0 }; + let expr16_0 = C::imm8_reg_to_imm8_gpr(ctx, &expr15_0); + let expr17_0 = constructor_shl(ctx, pattern0_0, expr7_0, &expr16_0)?; + let expr18_0 = C::gpr_to_gpr_mem_imm(ctx, expr13_0); + let expr19_0 = constructor_or(ctx, pattern0_0, expr17_0, &expr18_0)?; + return Some(expr19_0); +} + +// Generated as internal constructor for term do_bitrev64. +pub fn constructor_do_bitrev64(ctx: &mut C, arg0: Type, arg1: Gpr) -> Option { + let pattern0_0 = arg0; + if pattern0_0 == I64 { + let pattern2_0 = arg1; + // Rule at src/isa/x64/lower.isle line 1836. + let expr0_0 = constructor_do_bitrev32(ctx, pattern0_0, pattern2_0)?; + let expr1_0: u64 = 4294967295; + let expr2_0 = constructor_imm(ctx, pattern0_0, expr1_0)?; + let expr3_0 = C::gpr_new(ctx, expr2_0); + let expr4_0 = C::gpr_to_gpr_mem_imm(ctx, expr3_0); + let expr5_0 = constructor_x64_and(ctx, pattern0_0, expr0_0, &expr4_0)?; + let expr6_0: u8 = 32; + let expr7_0 = Imm8Reg::Imm8 { imm: expr6_0 }; + let expr8_0 = C::imm8_reg_to_imm8_gpr(ctx, &expr7_0); + let expr9_0 = constructor_shr(ctx, pattern0_0, expr0_0, &expr8_0)?; + let expr10_0: u8 = 32; + let expr11_0 = Imm8Reg::Imm8 { imm: expr10_0 }; + let expr12_0 = C::imm8_reg_to_imm8_gpr(ctx, &expr11_0); + let expr13_0 = constructor_shl(ctx, pattern0_0, expr5_0, &expr12_0)?; + let expr14_0 = C::gpr_to_gpr_mem_imm(ctx, expr9_0); + let expr15_0 = constructor_or(ctx, pattern0_0, expr13_0, &expr14_0)?; + return Some(expr15_0); + } + return None; +} diff --git a/cranelift/codegen/src/machinst/isle.rs b/cranelift/codegen/src/machinst/isle.rs index a7259c6d3a..c07fc393cb 100644 --- a/cranelift/codegen/src/machinst/isle.rs +++ b/cranelift/codegen/src/machinst/isle.rs @@ -140,6 +140,26 @@ macro_rules! isle_prelude_methods { x.into() } + #[inline] + fn i64_as_u64(&mut self, x: i64) -> u64 { + x as u64 + } + + #[inline] + fn u64_add(&mut self, x: u64, y: u64) -> u64 { + x.wrapping_add(y) + } + + #[inline] + fn u64_sub(&mut self, x: u64, y: u64) -> u64 { + x.wrapping_sub(y) + } + + #[inline] + fn u64_and(&mut self, x: u64, y: u64) -> u64 { + x & y + } + #[inline] fn ty_bits(&mut self, ty: Type) -> u8 { use std::convert::TryInto; @@ -151,11 +171,28 @@ macro_rules! isle_prelude_methods { ty.bits() } + #[inline] + fn ty_bits_u64(&mut self, ty: Type) -> u64 { + ty.bits() as u64 + } + #[inline] fn ty_bytes(&mut self, ty: Type) -> u16 { u16::try_from(ty.bytes()).unwrap() } + #[inline] + fn ty_mask(&mut self, ty: Type) -> u64 { + match ty.bits() { + 1 => 1, + 8 => 0xff, + 16 => 0xffff, + 32 => 0xffff_ffff, + 64 => 0xffff_ffff_ffff_ffff, + _ => unimplemented!(), + } + } + fn fits_in_16(&mut self, ty: Type) -> Option { if ty.bits() <= 16 { Some(ty) diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index 0be9c856f5..c631bdc0e6 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -167,6 +167,20 @@ (decl u32_as_u64 (u32) u64) (extern constructor u32_as_u64 u32_as_u64) +(decl i64_as_u64 (i64) u64) +(extern constructor i64_as_u64 i64_as_u64) + +;;;; Primitive Arithmetic ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(decl u64_add (u64 u64) u64) +(extern constructor u64_add u64_add) + +(decl u64_sub (u64 u64) u64) +(extern constructor u64_sub u64_sub) + +(decl u64_and (u64 u64) u64) +(extern constructor u64_and u64_and) + ;;;; `cranelift_codegen::ir::Type` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (extern const $B1 Type) @@ -209,6 +223,14 @@ (decl ty_bits_u16 (Type) u16) (extern constructor ty_bits_u16 ty_bits_u16) +;; Get the bit width of a given type. +(decl ty_bits_u64 (Type) u64) +(extern constructor ty_bits_u64 ty_bits_u64) + +;; Get a mask for the width of a given type. +(decl ty_mask (Type) u64) +(extern constructor ty_mask ty_mask) + ;; Get the byte width of a given type. (decl ty_bytes (Type) u16) (extern constructor ty_bytes ty_bytes) @@ -398,9 +420,27 @@ (ConsumesFlagsReturnsReg (inst MInst) (result Reg)) (ConsumesFlagsTwiceReturnsValueRegs (inst1 MInst) (inst2 MInst) - (result ValueRegs)))) + (result ValueRegs)) + (ConsumesFlagsFourTimesReturnsValueRegs (inst1 MInst) + (inst2 MInst) + (inst3 MInst) + (inst4 MInst) + (result ValueRegs)))) + +;; Get the produced register out of a ProducesFlags. +(decl produces_flags_get_reg (ProducesFlags) Reg) +(rule (produces_flags_get_reg (ProducesFlags.ProducesFlagsReturnsReg _ reg)) reg) + +;; Modify a ProducesFlags to use it only for its side-effect, ignoring +;; its result. +(decl produces_flags_ignore (ProducesFlags) ProducesFlags) +(rule (produces_flags_ignore (ProducesFlags.ProducesFlagsReturnsReg inst _)) + (ProducesFlags.ProducesFlagsSideEffect inst)) +(rule (produces_flags_ignore (ProducesFlags.ProducesFlagsReturnsResultWithConsumer inst _)) + (ProducesFlags.ProducesFlagsSideEffect inst)) + ;; Helper for combining two flags-consumer instructions that return a ;; single Reg, giving a ConsumesFlags that returns both values in a ;; ValueRegs. @@ -440,12 +480,28 @@ (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs consumer_inst_1 consumer_inst_2 consumer_result)) + ;; We must emit these instructions in order as the creator of + ;; the ConsumesFlags may be relying on dataflow dependencies + ;; amongst them. (let ((_x Unit (emit producer_inst)) - ;; Note that the order of emission here is swapped, as this seems - ;; to generate better register allocation for now with fewer - ;; `mov` instructions. - (_y Unit (emit consumer_inst_2)) - (_z Unit (emit consumer_inst_1))) + (_y Unit (emit consumer_inst_1)) + (_z Unit (emit consumer_inst_2))) + consumer_result)) + +(rule (with_flags (ProducesFlags.ProducesFlagsSideEffect producer_inst) + (ConsumesFlags.ConsumesFlagsFourTimesReturnsValueRegs consumer_inst_1 + consumer_inst_2 + consumer_inst_3 + consumer_inst_4 + consumer_result)) + ;; We must emit these instructions in order as the creator of + ;; the ConsumesFlags may be relying on dataflow dependencies + ;; amongst them. + (let ((_x Unit (emit producer_inst)) + (_y Unit (emit consumer_inst_1)) + (_z Unit (emit consumer_inst_2)) + (_w Unit (emit consumer_inst_3)) + (_v Unit (emit consumer_inst_4))) consumer_result)) (decl with_flags_reg (ProducesFlags ConsumesFlags) Reg) diff --git a/cranelift/filetests/filetests/isa/aarch64/bitops.clif b/cranelift/filetests/filetests/isa/aarch64/bitops.clif index 32a3bec059..69a1a189fb 100644 --- a/cranelift/filetests/filetests/isa/aarch64/bitops.clif +++ b/cranelift/filetests/filetests/isa/aarch64/bitops.clif @@ -1184,7 +1184,7 @@ block0(v0: i128, v1: i8): ; Entry block: 0 ; Block 0: ; (original IR block: block0) -; (instruction range: 0 .. 10) +; (instruction range: 0 .. 13) ; Inst 0: lsl x4, x0, x2 ; Inst 1: lsl x3, x1, x2 ; Inst 2: orn w1, wzr, w2 @@ -1192,9 +1192,12 @@ block0(v0: i128, v1: i8): ; Inst 4: lsr x0, x0, x1 ; Inst 5: orr x0, x3, x0 ; Inst 6: ands xzr, x2, #64 -; Inst 7: csel x1, x4, x0, ne -; Inst 8: csel x0, xzr, x4, ne -; Inst 9: ret +; Inst 7: csel x1, xzr, x4, ne +; Inst 8: csel x0, x4, x0, ne +; Inst 9: mov x2, x0 +; Inst 10: mov x0, x1 +; Inst 11: mov x1, x2 +; Inst 12: ret ; }} function %ishl_i128_i128(i128, i128) -> i128 { @@ -1207,7 +1210,7 @@ block0(v0: i128, v1: i128): ; Entry block: 0 ; Block 0: ; (original IR block: block0) -; (instruction range: 0 .. 10) +; (instruction range: 0 .. 13) ; Inst 0: lsl x3, x0, x2 ; Inst 1: lsl x1, x1, x2 ; Inst 2: orn w4, wzr, w2 @@ -1215,9 +1218,12 @@ block0(v0: i128, v1: i128): ; Inst 4: lsr x0, x0, x4 ; Inst 5: orr x0, x1, x0 ; Inst 6: ands xzr, x2, #64 -; Inst 7: csel x1, x3, x0, ne -; Inst 8: csel x0, xzr, x3, ne -; Inst 9: ret +; Inst 7: csel x1, xzr, x3, ne +; Inst 8: csel x0, x3, x0, ne +; Inst 9: mov x2, x0 +; Inst 10: mov x0, x1 +; Inst 11: mov x1, x2 +; Inst 12: ret ; }} function %ushr_i128_i8(i128, i8) -> i128 { @@ -1230,17 +1236,20 @@ block0(v0: i128, v1: i8): ; Entry block: 0 ; Block 0: ; (original IR block: block0) -; (instruction range: 0 .. 10) +; (instruction range: 0 .. 13) ; Inst 0: lsr x3, x0, x2 ; Inst 1: lsr x0, x1, x2 ; Inst 2: orn w4, wzr, w2 ; Inst 3: lsl x1, x1, #1 ; Inst 4: lsl x1, x1, x4 -; Inst 5: orr x3, x3, x1 +; Inst 5: orr x1, x3, x1 ; Inst 6: ands xzr, x2, #64 -; Inst 7: csel x1, xzr, x0, ne -; Inst 8: csel x0, x0, x3, ne -; Inst 9: ret +; Inst 7: csel x1, x0, x1, ne +; Inst 8: csel x0, xzr, x0, ne +; Inst 9: mov x2, x0 +; Inst 10: mov x0, x1 +; Inst 11: mov x1, x2 +; Inst 12: ret ; }} function %ushr_i128_i128(i128, i128) -> i128 { @@ -1253,17 +1262,20 @@ block0(v0: i128, v1: i128): ; Entry block: 0 ; Block 0: ; (original IR block: block0) -; (instruction range: 0 .. 10) +; (instruction range: 0 .. 13) ; Inst 0: lsr x3, x0, x2 ; Inst 1: lsr x0, x1, x2 ; Inst 2: orn w4, wzr, w2 ; Inst 3: lsl x1, x1, #1 ; Inst 4: lsl x1, x1, x4 -; Inst 5: orr x3, x3, x1 +; Inst 5: orr x1, x3, x1 ; Inst 6: ands xzr, x2, #64 -; Inst 7: csel x1, xzr, x0, ne -; Inst 8: csel x0, x0, x3, ne -; Inst 9: ret +; Inst 7: csel x1, x0, x1, ne +; Inst 8: csel x0, xzr, x0, ne +; Inst 9: mov x2, x0 +; Inst 10: mov x0, x1 +; Inst 11: mov x1, x2 +; Inst 12: ret ; }} function %sshr_i128_i8(i128, i8) -> i128 { @@ -1276,7 +1288,7 @@ block0(v0: i128, v1: i8): ; Entry block: 0 ; Block 0: ; (original IR block: block0) -; (instruction range: 0 .. 11) +; (instruction range: 0 .. 13) ; Inst 0: lsr x3, x0, x2 ; Inst 1: asr x0, x1, x2 ; Inst 2: orn w4, wzr, w2 @@ -1285,9 +1297,11 @@ block0(v0: i128, v1: i8): ; Inst 5: asr x1, x1, #63 ; Inst 6: orr x3, x3, x4 ; Inst 7: ands xzr, x2, #64 -; Inst 8: csel x1, x1, x0, ne -; Inst 9: csel x0, x0, x3, ne -; Inst 10: ret +; Inst 8: csel x2, x0, x3, ne +; Inst 9: csel x0, x1, x0, ne +; Inst 10: mov x1, x0 +; Inst 11: mov x0, x2 +; Inst 12: ret ; }} function %sshr_i128_i128(i128, i128) -> i128 { @@ -1300,7 +1314,7 @@ block0(v0: i128, v1: i128): ; Entry block: 0 ; Block 0: ; (original IR block: block0) -; (instruction range: 0 .. 11) +; (instruction range: 0 .. 13) ; Inst 0: lsr x3, x0, x2 ; Inst 1: asr x0, x1, x2 ; Inst 2: orn w4, wzr, w2 @@ -1309,8 +1323,10 @@ block0(v0: i128, v1: i128): ; Inst 5: asr x1, x1, #63 ; Inst 6: orr x3, x3, x4 ; Inst 7: ands xzr, x2, #64 -; Inst 8: csel x1, x1, x0, ne -; Inst 9: csel x0, x0, x3, ne -; Inst 10: ret +; Inst 8: csel x2, x0, x3, ne +; Inst 9: csel x0, x1, x0, ne +; Inst 10: mov x1, x0 +; Inst 11: mov x0, x2 +; Inst 12: ret ; }} diff --git a/cranelift/filetests/filetests/isa/aarch64/shift-rotate.clif b/cranelift/filetests/filetests/isa/aarch64/shift-rotate.clif index 60eb2b157c..e4b602beb8 100644 --- a/cranelift/filetests/filetests/isa/aarch64/shift-rotate.clif +++ b/cranelift/filetests/filetests/isa/aarch64/shift-rotate.clif @@ -16,19 +16,19 @@ block0(v0: i128, v1: i128): ; Entry block: 0 ; Block 0: ; (original IR block: block0) -; (instruction range: 0 .. 24) +; (instruction range: 0 .. 25) ; Inst 0: mov x4, x1 ; Inst 1: orr x1, xzr, #128 ; Inst 2: sub x1, x1, x2 -; Inst 3: lsr x3, x0, x2 -; Inst 4: lsr x5, x4, x2 +; Inst 3: lsr x5, x0, x2 +; Inst 4: lsr x3, x4, x2 ; Inst 5: orn w6, wzr, w2 ; Inst 6: lsl x7, x4, #1 ; Inst 7: lsl x6, x7, x6 -; Inst 8: orr x6, x3, x6 +; Inst 8: orr x5, x5, x6 ; Inst 9: ands xzr, x2, #64 -; Inst 10: csel x3, xzr, x5, ne -; Inst 11: csel x2, x5, x6, ne +; Inst 10: csel x2, x3, x5, ne +; Inst 11: csel x3, xzr, x3, ne ; Inst 12: lsl x5, x0, x1 ; Inst 13: lsl x4, x4, x1 ; Inst 14: orn w6, wzr, w1 @@ -36,11 +36,12 @@ block0(v0: i128, v1: i128): ; Inst 16: lsr x0, x0, x6 ; Inst 17: orr x0, x4, x0 ; Inst 18: ands xzr, x1, #64 -; Inst 19: csel x1, x5, x0, ne -; Inst 20: csel x0, xzr, x5, ne -; Inst 21: orr x1, x3, x1 -; Inst 22: orr x0, x2, x0 -; Inst 23: ret +; Inst 19: csel x1, xzr, x5, ne +; Inst 20: csel x0, x5, x0, ne +; Inst 21: orr x3, x3, x0 +; Inst 22: orr x0, x2, x1 +; Inst 23: mov x1, x3 +; Inst 24: ret ; }} function %f0(i64, i64) -> i64 { @@ -125,7 +126,7 @@ block0(v0: i128, v1: i128): ; Entry block: 0 ; Block 0: ; (original IR block: block0) -; (instruction range: 0 .. 27) +; (instruction range: 0 .. 24) ; Inst 0: mov x4, x0 ; Inst 1: orr x0, xzr, #128 ; Inst 2: sub x0, x0, x2 @@ -136,8 +137,8 @@ block0(v0: i128, v1: i128): ; Inst 7: lsr x6, x7, x6 ; Inst 8: orr x5, x5, x6 ; Inst 9: ands xzr, x2, #64 -; Inst 10: csel x2, x3, x5, ne -; Inst 11: csel x3, xzr, x3, ne +; Inst 10: csel x2, xzr, x3, ne +; Inst 11: csel x3, x3, x5, ne ; Inst 12: lsr x5, x4, x0 ; Inst 13: lsr x4, x1, x0 ; Inst 14: orn w6, wzr, w0 @@ -145,14 +146,11 @@ block0(v0: i128, v1: i128): ; Inst 16: lsl x1, x1, x6 ; Inst 17: orr x1, x5, x1 ; Inst 18: ands xzr, x0, #64 -; Inst 19: csel x0, xzr, x4, ne -; Inst 20: csel x1, x4, x1, ne -; Inst 21: orr x1, x3, x1 -; Inst 22: orr x0, x2, x0 -; Inst 23: mov x2, x0 -; Inst 24: mov x0, x1 -; Inst 25: mov x1, x2 -; Inst 26: ret +; Inst 19: csel x0, x4, x1, ne +; Inst 20: csel x1, xzr, x4, ne +; Inst 21: orr x0, x2, x0 +; Inst 22: orr x1, x3, x1 +; Inst 23: ret ; }} function %f4(i64, i64) -> i64 { diff --git a/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif b/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif index 801f00a2f5..dbb67b4b59 100644 --- a/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif +++ b/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif @@ -43,7 +43,7 @@ block0(v0: f64, v1: i64): ; Entry block: 0 ; Block 0: ; (original IR block: block0) -; (instruction range: 0 .. 16) +; (instruction range: 0 .. 17) ; Inst 0: pushq %rbp ; Inst 1: movq %rsp, %rbp ; Inst 2: movsd 0(%rdi), %xmm1 @@ -54,10 +54,12 @@ block0(v0: f64, v1: i64): ; Inst 7: andq $1, %rsi ; Inst 8: ucomisd %xmm0, %xmm1 ; Inst 9: movaps %xmm0, %xmm1 -; Inst 10: jz $check; movsd %xmm0, %xmm1; $check: jnp $next; movsd %xmm0, %xmm1; $next -; Inst 11: movq %rsi, %rax -; Inst 12: movaps %xmm1, %xmm0 -; Inst 13: movq %rbp, %rsp -; Inst 14: popq %rbp -; Inst 15: ret +; Inst 10: jz $next; movsd %xmm0, %xmm1; $next: +; Inst 11: jnp $next; movsd %xmm0, %xmm1; $next: +; Inst 12: movq %rsi, %rax +; Inst 13: movaps %xmm1, %xmm0 +; Inst 14: movq %rbp, %rsp +; Inst 15: popq %rbp +; Inst 16: ret ; }} + diff --git a/cranelift/filetests/filetests/isa/x64/i128.clif b/cranelift/filetests/filetests/isa/x64/i128.clif index 20064dac82..201e20147d 100644 --- a/cranelift/filetests/filetests/isa/x64/i128.clif +++ b/cranelift/filetests/filetests/isa/x64/i128.clif @@ -600,57 +600,55 @@ block0(v0: i128): ; Entry block: 0 ; Block 0: ; (original IR block: block0) -; (instruction range: 0 .. 50) +; (instruction range: 0 .. 48) ; Inst 0: pushq %rbp ; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rsi, %rdx -; Inst 3: movq %rdi, %rsi -; Inst 4: shrq $1, %rsi -; Inst 5: movabsq $8608480567731124087, %rcx -; Inst 6: andq %rcx, %rsi -; Inst 7: movq %rdi, %rax -; Inst 8: subq %rsi, %rax -; Inst 9: shrq $1, %rsi -; Inst 10: andq %rcx, %rsi -; Inst 11: subq %rsi, %rax -; Inst 12: shrq $1, %rsi -; Inst 13: andq %rcx, %rsi -; Inst 14: subq %rsi, %rax -; Inst 15: movq %rax, %rsi -; Inst 16: shrq $4, %rsi -; Inst 17: addq %rax, %rsi -; Inst 18: movabsq $1085102592571150095, %rdi -; Inst 19: andq %rdi, %rsi -; Inst 20: movabsq $72340172838076673, %rdi -; Inst 21: imulq %rdi, %rsi -; Inst 22: shrq $56, %rsi -; Inst 23: movq %rdx, %rax -; Inst 24: shrq $1, %rax -; Inst 25: movabsq $8608480567731124087, %rcx -; Inst 26: andq %rcx, %rax -; Inst 27: movq %rdx, %rdi -; Inst 28: subq %rax, %rdi -; Inst 29: shrq $1, %rax -; Inst 30: andq %rcx, %rax -; Inst 31: subq %rax, %rdi -; Inst 32: shrq $1, %rax -; Inst 33: andq %rcx, %rax -; Inst 34: subq %rax, %rdi -; Inst 35: movq %rdi, %rax -; Inst 36: shrq $4, %rax -; Inst 37: addq %rdi, %rax -; Inst 38: movabsq $1085102592571150095, %rdi -; Inst 39: andq %rdi, %rax -; Inst 40: movabsq $72340172838076673, %rdi -; Inst 41: imulq %rdi, %rax -; Inst 42: shrq $56, %rax -; Inst 43: addq %rax, %rsi -; Inst 44: xorq %rdi, %rdi -; Inst 45: movq %rsi, %rax -; Inst 46: movq %rdi, %rdx -; Inst 47: movq %rbp, %rsp -; Inst 48: popq %rbp -; Inst 49: ret +; Inst 2: movq %rdi, %rax +; Inst 3: movq %rax, %rcx +; Inst 4: shrq $1, %rcx +; Inst 5: movabsq $8608480567731124087, %rdi +; Inst 6: andq %rdi, %rcx +; Inst 7: subq %rcx, %rax +; Inst 8: shrq $1, %rcx +; Inst 9: andq %rdi, %rcx +; Inst 10: subq %rcx, %rax +; Inst 11: shrq $1, %rcx +; Inst 12: andq %rdi, %rcx +; Inst 13: subq %rcx, %rax +; Inst 14: movq %rax, %rdi +; Inst 15: shrq $4, %rdi +; Inst 16: addq %rax, %rdi +; Inst 17: movabsq $1085102592571150095, %rax +; Inst 18: andq %rax, %rdi +; Inst 19: movabsq $72340172838076673, %rax +; Inst 20: imulq %rax, %rdi +; Inst 21: shrq $56, %rdi +; Inst 22: movq %rsi, %rcx +; Inst 23: shrq $1, %rcx +; Inst 24: movabsq $8608480567731124087, %rax +; Inst 25: andq %rax, %rcx +; Inst 26: subq %rcx, %rsi +; Inst 27: shrq $1, %rcx +; Inst 28: andq %rax, %rcx +; Inst 29: subq %rcx, %rsi +; Inst 30: shrq $1, %rcx +; Inst 31: andq %rax, %rcx +; Inst 32: subq %rcx, %rsi +; Inst 33: movq %rsi, %rax +; Inst 34: shrq $4, %rax +; Inst 35: addq %rsi, %rax +; Inst 36: movabsq $1085102592571150095, %rsi +; Inst 37: andq %rsi, %rax +; Inst 38: movabsq $72340172838076673, %rsi +; Inst 39: imulq %rsi, %rax +; Inst 40: shrq $56, %rax +; Inst 41: addq %rax, %rdi +; Inst 42: xorq %rsi, %rsi +; Inst 43: movq %rdi, %rax +; Inst 44: movq %rsi, %rdx +; Inst 45: movq %rbp, %rsp +; Inst 46: popq %rbp +; Inst 47: ret ; }} function %f20(i128) -> i128 { @@ -663,108 +661,97 @@ block0(v0: i128): ; Entry block: 0 ; Block 0: ; (original IR block: block0) -; (instruction range: 0 .. 101) +; (instruction range: 0 .. 90) ; Inst 0: pushq %rbp ; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rdi, %rcx -; Inst 3: movq %rcx, %rdi -; Inst 4: movabsq $6148914691236517205, %rax -; Inst 5: shrq $1, %rdi -; Inst 6: andq %rax, %rdi -; Inst 7: andq %rcx, %rax +; Inst 2: movq %rsi, %rcx +; Inst 3: movabsq $6148914691236517205, %rsi +; Inst 4: movq %rcx, %rax +; Inst 5: andq %rsi, %rax +; Inst 6: shrq $1, %rcx +; Inst 7: andq %rsi, %rcx ; Inst 8: shlq $1, %rax -; Inst 9: movq %rax, %rcx -; Inst 10: orq %rdi, %rcx -; Inst 11: movq %rcx, %rdi -; Inst 12: movabsq $3689348814741910323, %rax -; Inst 13: shrq $2, %rdi -; Inst 14: andq %rax, %rdi -; Inst 15: andq %rcx, %rax -; Inst 16: shlq $2, %rax -; Inst 17: movq %rax, %rcx -; Inst 18: orq %rdi, %rcx -; Inst 19: movq %rcx, %rdi -; Inst 20: movabsq $1085102592571150095, %rax -; Inst 21: shrq $4, %rdi -; Inst 22: andq %rax, %rdi -; Inst 23: andq %rcx, %rax -; Inst 24: shlq $4, %rax +; Inst 9: orq %rcx, %rax +; Inst 10: movabsq $3689348814741910323, %rsi +; Inst 11: movq %rax, %rcx +; Inst 12: andq %rsi, %rcx +; Inst 13: shrq $2, %rax +; Inst 14: andq %rsi, %rax +; Inst 15: shlq $2, %rcx +; Inst 16: orq %rax, %rcx +; Inst 17: movabsq $1085102592571150095, %rsi +; Inst 18: movq %rcx, %rax +; Inst 19: andq %rsi, %rax +; Inst 20: shrq $4, %rcx +; Inst 21: andq %rsi, %rcx +; Inst 22: shlq $4, %rax +; Inst 23: orq %rcx, %rax +; Inst 24: movabsq $71777214294589695, %rsi ; Inst 25: movq %rax, %rcx -; Inst 26: orq %rdi, %rcx -; Inst 27: movq %rcx, %rdi -; Inst 28: movabsq $71777214294589695, %rax -; Inst 29: shrq $8, %rdi -; Inst 30: andq %rax, %rdi -; Inst 31: andq %rcx, %rax -; Inst 32: shlq $8, %rax -; Inst 33: movq %rax, %rcx -; Inst 34: orq %rdi, %rcx -; Inst 35: movq %rcx, %rdi -; Inst 36: movabsq $281470681808895, %rax -; Inst 37: shrq $16, %rdi -; Inst 38: andq %rax, %rdi -; Inst 39: andq %rcx, %rax -; Inst 40: shlq $16, %rax -; Inst 41: orq %rdi, %rax -; Inst 42: movq %rax, %rcx -; Inst 43: movl $-1, %edi -; Inst 44: shrq $32, %rcx -; Inst 45: andq %rdi, %rcx -; Inst 46: andq %rax, %rdi -; Inst 47: shlq $32, %rdi -; Inst 48: orq %rcx, %rdi -; Inst 49: movq %rsi, %rcx -; Inst 50: movq %rcx, %rsi -; Inst 51: movabsq $6148914691236517205, %rax -; Inst 52: shrq $1, %rsi -; Inst 53: andq %rax, %rsi -; Inst 54: andq %rcx, %rax -; Inst 55: shlq $1, %rax -; Inst 56: movq %rax, %rcx -; Inst 57: orq %rsi, %rcx -; Inst 58: movq %rcx, %rsi -; Inst 59: movabsq $3689348814741910323, %rax -; Inst 60: shrq $2, %rsi -; Inst 61: andq %rax, %rsi -; Inst 62: andq %rcx, %rax -; Inst 63: shlq $2, %rax -; Inst 64: movq %rax, %rcx -; Inst 65: orq %rsi, %rcx -; Inst 66: movq %rcx, %rsi -; Inst 67: movabsq $1085102592571150095, %rax -; Inst 68: shrq $4, %rsi -; Inst 69: andq %rax, %rsi -; Inst 70: andq %rcx, %rax -; Inst 71: shlq $4, %rax -; Inst 72: movq %rax, %rcx -; Inst 73: orq %rsi, %rcx -; Inst 74: movq %rcx, %rsi -; Inst 75: movabsq $71777214294589695, %rax -; Inst 76: shrq $8, %rsi -; Inst 77: andq %rax, %rsi -; Inst 78: andq %rcx, %rax -; Inst 79: shlq $8, %rax -; Inst 80: movq %rax, %rcx -; Inst 81: orq %rsi, %rcx -; Inst 82: movq %rcx, %rsi -; Inst 83: movabsq $281470681808895, %rax -; Inst 84: shrq $16, %rsi -; Inst 85: andq %rax, %rsi -; Inst 86: andq %rcx, %rax -; Inst 87: shlq $16, %rax -; Inst 88: orq %rsi, %rax -; Inst 89: movq %rax, %rsi -; Inst 90: movl $-1, %ecx -; Inst 91: shrq $32, %rsi -; Inst 92: andq %rcx, %rsi -; Inst 93: andq %rax, %rcx -; Inst 94: shlq $32, %rcx -; Inst 95: orq %rsi, %rcx -; Inst 96: movq %rcx, %rax -; Inst 97: movq %rdi, %rdx -; Inst 98: movq %rbp, %rsp -; Inst 99: popq %rbp -; Inst 100: ret +; Inst 26: andq %rsi, %rcx +; Inst 27: shrq $8, %rax +; Inst 28: andq %rsi, %rax +; Inst 29: shlq $8, %rcx +; Inst 30: orq %rax, %rcx +; Inst 31: movabsq $281470681808895, %rsi +; Inst 32: movq %rcx, %rax +; Inst 33: andq %rsi, %rax +; Inst 34: shrq $16, %rcx +; Inst 35: andq %rsi, %rcx +; Inst 36: shlq $16, %rax +; Inst 37: orq %rcx, %rax +; Inst 38: movabsq $4294967295, %rcx +; Inst 39: movq %rax, %rsi +; Inst 40: andq %rcx, %rsi +; Inst 41: shrq $32, %rax +; Inst 42: shlq $32, %rsi +; Inst 43: orq %rax, %rsi +; Inst 44: movabsq $6148914691236517205, %rax +; Inst 45: movq %rdi, %rcx +; Inst 46: andq %rax, %rcx +; Inst 47: shrq $1, %rdi +; Inst 48: andq %rax, %rdi +; Inst 49: shlq $1, %rcx +; Inst 50: orq %rdi, %rcx +; Inst 51: movabsq $3689348814741910323, %rdi +; Inst 52: movq %rcx, %rax +; Inst 53: andq %rdi, %rax +; Inst 54: shrq $2, %rcx +; Inst 55: andq %rdi, %rcx +; Inst 56: shlq $2, %rax +; Inst 57: orq %rcx, %rax +; Inst 58: movabsq $1085102592571150095, %rdi +; Inst 59: movq %rax, %rcx +; Inst 60: andq %rdi, %rcx +; Inst 61: shrq $4, %rax +; Inst 62: andq %rdi, %rax +; Inst 63: shlq $4, %rcx +; Inst 64: orq %rax, %rcx +; Inst 65: movabsq $71777214294589695, %rdi +; Inst 66: movq %rcx, %rax +; Inst 67: andq %rdi, %rax +; Inst 68: shrq $8, %rcx +; Inst 69: andq %rdi, %rcx +; Inst 70: shlq $8, %rax +; Inst 71: orq %rcx, %rax +; Inst 72: movabsq $281470681808895, %rdi +; Inst 73: movq %rax, %rcx +; Inst 74: andq %rdi, %rcx +; Inst 75: shrq $16, %rax +; Inst 76: andq %rdi, %rax +; Inst 77: shlq $16, %rcx +; Inst 78: orq %rax, %rcx +; Inst 79: movabsq $4294967295, %rax +; Inst 80: movq %rcx, %rdi +; Inst 81: andq %rax, %rdi +; Inst 82: shrq $32, %rcx +; Inst 83: shlq $32, %rdi +; Inst 84: orq %rcx, %rdi +; Inst 85: movq %rsi, %rax +; Inst 86: movq %rdi, %rdx +; Inst 87: movq %rbp, %rsp +; Inst 88: popq %rbp +; Inst 89: ret ; }} function %f21(i128, i64) { @@ -1020,11 +1007,11 @@ block0(v0: i128): ; Inst 4: cmovzq %rcx, %rax ; Inst 5: movl $63, %esi ; Inst 6: subq %rax, %rsi -; Inst 7: movabsq $-1, %rcx -; Inst 8: bsrq %rdi, %rax -; Inst 9: cmovzq %rcx, %rax +; Inst 7: movabsq $-1, %rax +; Inst 8: bsrq %rdi, %rcx +; Inst 9: cmovzq %rax, %rcx ; Inst 10: movl $63, %edi -; Inst 11: subq %rax, %rdi +; Inst 11: subq %rcx, %rdi ; Inst 12: addq $64, %rdi ; Inst 13: cmpq $64, %rsi ; Inst 14: cmovnzq %rsi, %rdi @@ -1098,7 +1085,7 @@ block0(v0: i128, v1: i128): ; Entry block: 0 ; Block 0: ; (original IR block: block0) -; (instruction range: 0 .. 25) +; (instruction range: 0 .. 24) ; Inst 0: pushq %rbp ; Inst 1: movq %rsp, %rbp ; Inst 2: movq %rdi, %rax @@ -1116,14 +1103,13 @@ block0(v0: i128, v1: i128): ; Inst 14: cmovzq %rcx, %rax ; Inst 15: orq %rdi, %rax ; Inst 16: testq $64, %rdx -; Inst 17: movq %rsi, %rdi -; Inst 18: cmovzq %rax, %rdi -; Inst 19: cmovzq %rsi, %rcx -; Inst 20: movq %rcx, %rax -; Inst 21: movq %rdi, %rdx -; Inst 22: movq %rbp, %rsp -; Inst 23: popq %rbp -; Inst 24: ret +; Inst 17: cmovzq %rsi, %rcx +; Inst 18: cmovzq %rax, %rsi +; Inst 19: movq %rcx, %rax +; Inst 20: movq %rsi, %rdx +; Inst 21: movq %rbp, %rsp +; Inst 22: popq %rbp +; Inst 23: ret ; }} function %f31(i128, i128) -> i128 { @@ -1136,7 +1122,7 @@ block0(v0: i128, v1: i128): ; Entry block: 0 ; Block 0: ; (original IR block: block0) -; (instruction range: 0 .. 24) +; (instruction range: 0 .. 25) ; Inst 0: pushq %rbp ; Inst 1: movq %rsp, %rbp ; Inst 2: movq %rsi, %rax @@ -1152,15 +1138,16 @@ block0(v0: i128, v1: i128): ; Inst 12: testq $127, %rdx ; Inst 13: cmovzq %rcx, %rax ; Inst 14: orq %rdi, %rax -; Inst 15: xorq %rdi, %rdi +; Inst 15: xorq %rcx, %rcx ; Inst 16: testq $64, %rdx -; Inst 17: cmovzq %rsi, %rdi -; Inst 18: cmovzq %rax, %rsi -; Inst 19: movq %rsi, %rax -; Inst 20: movq %rdi, %rdx -; Inst 21: movq %rbp, %rsp -; Inst 22: popq %rbp -; Inst 23: ret +; Inst 17: movq %rsi, %rdi +; Inst 18: cmovzq %rax, %rdi +; Inst 19: cmovzq %rsi, %rcx +; Inst 20: movq %rdi, %rax +; Inst 21: movq %rcx, %rdx +; Inst 22: movq %rbp, %rsp +; Inst 23: popq %rbp +; Inst 24: ret ; }} function %f32(i128, i128) -> i128 { @@ -1173,7 +1160,7 @@ block0(v0: i128, v1: i128): ; Entry block: 0 ; Block 0: ; (original IR block: block0) -; (instruction range: 0 .. 25) +; (instruction range: 0 .. 26) ; Inst 0: pushq %rbp ; Inst 1: movq %rsp, %rbp ; Inst 2: movq %rdi, %rax @@ -1192,13 +1179,14 @@ block0(v0: i128, v1: i128): ; Inst 15: orq %r8, %rax ; Inst 16: sarq $63, %rsi ; Inst 17: testq $64, %rdx -; Inst 18: cmovzq %rdi, %rsi -; Inst 19: cmovzq %rax, %rdi -; Inst 20: movq %rdi, %rax -; Inst 21: movq %rsi, %rdx -; Inst 22: movq %rbp, %rsp -; Inst 23: popq %rbp -; Inst 24: ret +; Inst 18: movq %rdi, %rcx +; Inst 19: cmovzq %rax, %rcx +; Inst 20: cmovzq %rdi, %rsi +; Inst 21: movq %rcx, %rax +; Inst 22: movq %rsi, %rdx +; Inst 23: movq %rbp, %rsp +; Inst 24: popq %rbp +; Inst 25: ret ; }} function %f33(i128, i128) -> i128 { @@ -1211,27 +1199,27 @@ block0(v0: i128, v1: i128): ; Entry block: 0 ; Block 0: ; (original IR block: block0) -; (instruction range: 0 .. 46) +; (instruction range: 0 .. 48) ; Inst 0: pushq %rbp ; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rdi, %r9 +; Inst 2: movq %rdi, %rax ; Inst 3: movq %rdx, %rcx -; Inst 4: shlq %cl, %r9 -; Inst 5: movq %rsi, %rax +; Inst 4: shlq %cl, %rax +; Inst 5: movq %rsi, %r8 ; Inst 6: movq %rdx, %rcx -; Inst 7: shlq %cl, %rax +; Inst 7: shlq %cl, %r8 ; Inst 8: movl $64, %ecx ; Inst 9: subq %rdx, %rcx -; Inst 10: movq %rdi, %r10 -; Inst 11: shrq %cl, %r10 -; Inst 12: xorq %r8, %r8 +; Inst 10: movq %rdi, %r9 +; Inst 11: shrq %cl, %r9 +; Inst 12: xorq %rcx, %rcx ; Inst 13: testq $127, %rdx -; Inst 14: cmovzq %r8, %r10 -; Inst 15: orq %rax, %r10 +; Inst 14: cmovzq %rcx, %r9 +; Inst 15: orq %r8, %r9 ; Inst 16: testq $64, %rdx -; Inst 17: movq %r9, %rax -; Inst 18: cmovzq %r10, %rax -; Inst 19: cmovzq %r9, %r8 +; Inst 17: movq %rcx, %r8 +; Inst 18: cmovzq %rax, %r8 +; Inst 19: cmovzq %r9, %rax ; Inst 20: movl $128, %r9d ; Inst 21: subq %rdx, %r9 ; Inst 22: movq %rdi, %rdx @@ -1247,17 +1235,19 @@ block0(v0: i128, v1: i128): ; Inst 32: testq $127, %r9 ; Inst 33: cmovzq %rcx, %rsi ; Inst 34: orq %rdx, %rsi -; Inst 35: xorq %rcx, %rcx +; Inst 35: xorq %rdx, %rdx ; Inst 36: testq $64, %r9 -; Inst 37: cmovzq %rdi, %rcx -; Inst 38: cmovzq %rsi, %rdi -; Inst 39: orq %rdi, %r8 -; Inst 40: orq %rcx, %rax -; Inst 41: movq %rax, %rdx -; Inst 42: movq %r8, %rax -; Inst 43: movq %rbp, %rsp -; Inst 44: popq %rbp -; Inst 45: ret +; Inst 37: movq %rdi, %rcx +; Inst 38: cmovzq %rsi, %rcx +; Inst 39: movq %rdx, %rsi +; Inst 40: cmovzq %rdi, %rsi +; Inst 41: orq %rcx, %r8 +; Inst 42: orq %rsi, %rax +; Inst 43: movq %rax, %rdx +; Inst 44: movq %r8, %rax +; Inst 45: movq %rbp, %rsp +; Inst 46: popq %rbp +; Inst 47: ret ; }} function %f34(i128, i128) -> i128 { @@ -1270,52 +1260,51 @@ block0(v0: i128, v1: i128): ; Entry block: 0 ; Block 0: ; (original IR block: block0) -; (instruction range: 0 .. 46) +; (instruction range: 0 .. 45) ; Inst 0: pushq %rbp ; Inst 1: movq %rsp, %rbp ; Inst 2: movq %rdi, %rax ; Inst 3: movq %rdx, %rcx ; Inst 4: shrq %cl, %rax -; Inst 5: movq %rsi, %r8 +; Inst 5: movq %rsi, %r9 ; Inst 6: movq %rdx, %rcx -; Inst 7: shrq %cl, %r8 +; Inst 7: shrq %cl, %r9 ; Inst 8: movl $64, %ecx ; Inst 9: subq %rdx, %rcx -; Inst 10: movq %rsi, %r9 -; Inst 11: shlq %cl, %r9 +; Inst 10: movq %rsi, %r8 +; Inst 11: shlq %cl, %r8 ; Inst 12: xorq %rcx, %rcx ; Inst 13: testq $127, %rdx -; Inst 14: cmovzq %rcx, %r9 -; Inst 15: movq %r9, %rcx -; Inst 16: orq %rax, %rcx -; Inst 17: xorq %rax, %rax -; Inst 18: testq $64, %rdx +; Inst 14: cmovzq %rcx, %r8 +; Inst 15: orq %rax, %r8 +; Inst 16: xorq %rcx, %rcx +; Inst 17: testq $64, %rdx +; Inst 18: movq %r9, %rax ; Inst 19: cmovzq %r8, %rax -; Inst 20: cmovzq %rcx, %r8 -; Inst 21: movl $128, %r9d -; Inst 22: subq %rdx, %r9 -; Inst 23: movq %rdi, %rdx -; Inst 24: movq %r9, %rcx -; Inst 25: shlq %cl, %rdx -; Inst 26: movq %r9, %rcx -; Inst 27: shlq %cl, %rsi -; Inst 28: movl $64, %ecx -; Inst 29: subq %r9, %rcx -; Inst 30: shrq %cl, %rdi -; Inst 31: xorq %rcx, %rcx -; Inst 32: testq $127, %r9 -; Inst 33: cmovzq %rcx, %rdi -; Inst 34: orq %rsi, %rdi -; Inst 35: testq $64, %r9 -; Inst 36: movq %rdx, %rsi -; Inst 37: cmovzq %rdi, %rsi -; Inst 38: cmovzq %rdx, %rcx -; Inst 39: orq %rcx, %r8 -; Inst 40: orq %rsi, %rax -; Inst 41: movq %rax, %rdx -; Inst 42: movq %r8, %rax -; Inst 43: movq %rbp, %rsp -; Inst 44: popq %rbp -; Inst 45: ret +; Inst 20: movq %rcx, %r8 +; Inst 21: cmovzq %r9, %r8 +; Inst 22: movl $128, %r9d +; Inst 23: subq %rdx, %r9 +; Inst 24: movq %rdi, %rdx +; Inst 25: movq %r9, %rcx +; Inst 26: shlq %cl, %rdx +; Inst 27: movq %r9, %rcx +; Inst 28: shlq %cl, %rsi +; Inst 29: movl $64, %ecx +; Inst 30: subq %r9, %rcx +; Inst 31: shrq %cl, %rdi +; Inst 32: xorq %rcx, %rcx +; Inst 33: testq $127, %r9 +; Inst 34: cmovzq %rcx, %rdi +; Inst 35: orq %rsi, %rdi +; Inst 36: testq $64, %r9 +; Inst 37: cmovzq %rdx, %rcx +; Inst 38: cmovzq %rdi, %rdx +; Inst 39: orq %rcx, %rax +; Inst 40: orq %rdx, %r8 +; Inst 41: movq %r8, %rdx +; Inst 42: movq %rbp, %rsp +; Inst 43: popq %rbp +; Inst 44: ret ; }} diff --git a/cranelift/filetests/filetests/isa/x64/popcnt.clif b/cranelift/filetests/filetests/isa/x64/popcnt.clif index 8f03f27951..549fa9c4bf 100644 --- a/cranelift/filetests/filetests/isa/x64/popcnt.clif +++ b/cranelift/filetests/filetests/isa/x64/popcnt.clif @@ -14,17 +14,17 @@ block0(v0: i64): ; (instruction range: 0 .. 25) ; Inst 0: pushq %rbp ; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rdi, %rsi -; Inst 3: shrq $1, %rsi -; Inst 4: movabsq $8608480567731124087, %rax -; Inst 5: andq %rax, %rsi -; Inst 6: subq %rsi, %rdi -; Inst 7: shrq $1, %rsi -; Inst 8: andq %rax, %rsi -; Inst 9: subq %rsi, %rdi -; Inst 10: shrq $1, %rsi -; Inst 11: andq %rax, %rsi -; Inst 12: subq %rsi, %rdi +; Inst 2: movq %rdi, %rax +; Inst 3: shrq $1, %rax +; Inst 4: movabsq $8608480567731124087, %rsi +; Inst 5: andq %rsi, %rax +; Inst 6: subq %rax, %rdi +; Inst 7: shrq $1, %rax +; Inst 8: andq %rsi, %rax +; Inst 9: subq %rax, %rdi +; Inst 10: shrq $1, %rax +; Inst 11: andq %rsi, %rax +; Inst 12: subq %rax, %rdi ; Inst 13: movq %rdi, %rsi ; Inst 14: shrq $4, %rsi ; Inst 15: addq %rdi, %rsi @@ -54,17 +54,17 @@ block0(v0: i64): ; Inst 0: pushq %rbp ; Inst 1: movq %rsp, %rbp ; Inst 2: movq 0(%rdi), %rdi -; Inst 3: movq %rdi, %rsi -; Inst 4: shrq $1, %rsi -; Inst 5: movabsq $8608480567731124087, %rax -; Inst 6: andq %rax, %rsi -; Inst 7: subq %rsi, %rdi -; Inst 8: shrq $1, %rsi -; Inst 9: andq %rax, %rsi -; Inst 10: subq %rsi, %rdi -; Inst 11: shrq $1, %rsi -; Inst 12: andq %rax, %rsi -; Inst 13: subq %rsi, %rdi +; Inst 3: movq %rdi, %rax +; Inst 4: shrq $1, %rax +; Inst 5: movabsq $8608480567731124087, %rsi +; Inst 6: andq %rsi, %rax +; Inst 7: subq %rax, %rdi +; Inst 8: shrq $1, %rax +; Inst 9: andq %rsi, %rax +; Inst 10: subq %rax, %rdi +; Inst 11: shrq $1, %rax +; Inst 12: andq %rsi, %rax +; Inst 13: subq %rax, %rdi ; Inst 14: movq %rdi, %rsi ; Inst 15: shrq $4, %rsi ; Inst 16: addq %rdi, %rsi @@ -89,29 +89,30 @@ block0(v0: i32): ; Entry block: 0 ; Block 0: ; (original IR block: block0) -; (instruction range: 0 .. 22) +; (instruction range: 0 .. 23) ; Inst 0: pushq %rbp ; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rdi, %rsi -; Inst 3: shrl $1, %esi -; Inst 4: andl $2004318071, %esi -; Inst 5: subl %esi, %edi -; Inst 6: shrl $1, %esi -; Inst 7: andl $2004318071, %esi -; Inst 8: subl %esi, %edi -; Inst 9: shrl $1, %esi -; Inst 10: andl $2004318071, %esi -; Inst 11: subl %esi, %edi -; Inst 12: movq %rdi, %rsi -; Inst 13: shrl $4, %esi -; Inst 14: addl %edi, %esi -; Inst 15: andl $252645135, %esi -; Inst 16: imull $16843009, %esi -; Inst 17: shrl $24, %esi -; Inst 18: movq %rsi, %rax -; Inst 19: movq %rbp, %rsp -; Inst 20: popq %rbp -; Inst 21: ret +; Inst 2: movq %rdi, %rax +; Inst 3: shrl $1, %eax +; Inst 4: movl $2004318071, %esi +; Inst 5: andl %esi, %eax +; Inst 6: subl %eax, %edi +; Inst 7: shrl $1, %eax +; Inst 8: andl %esi, %eax +; Inst 9: subl %eax, %edi +; Inst 10: shrl $1, %eax +; Inst 11: andl %esi, %eax +; Inst 12: subl %eax, %edi +; Inst 13: movq %rdi, %rsi +; Inst 14: shrl $4, %esi +; Inst 15: addl %edi, %esi +; Inst 16: andl $252645135, %esi +; Inst 17: imull $16843009, %esi +; Inst 18: shrl $24, %esi +; Inst 19: movq %rsi, %rax +; Inst 20: movq %rbp, %rsp +; Inst 21: popq %rbp +; Inst 22: ret ; }} function %popcnt32load(i64) -> i32 { @@ -125,29 +126,30 @@ block0(v0: i64): ; Entry block: 0 ; Block 0: ; (original IR block: block0) -; (instruction range: 0 .. 23) +; (instruction range: 0 .. 24) ; Inst 0: pushq %rbp ; Inst 1: movq %rsp, %rbp ; Inst 2: movl 0(%rdi), %edi -; Inst 3: movq %rdi, %rsi -; Inst 4: shrl $1, %esi -; Inst 5: andl $2004318071, %esi -; Inst 6: subl %esi, %edi -; Inst 7: shrl $1, %esi -; Inst 8: andl $2004318071, %esi -; Inst 9: subl %esi, %edi -; Inst 10: shrl $1, %esi -; Inst 11: andl $2004318071, %esi -; Inst 12: subl %esi, %edi -; Inst 13: movq %rdi, %rsi -; Inst 14: shrl $4, %esi -; Inst 15: addl %edi, %esi -; Inst 16: andl $252645135, %esi -; Inst 17: imull $16843009, %esi -; Inst 18: shrl $24, %esi -; Inst 19: movq %rsi, %rax -; Inst 20: movq %rbp, %rsp -; Inst 21: popq %rbp -; Inst 22: ret +; Inst 3: movq %rdi, %rax +; Inst 4: shrl $1, %eax +; Inst 5: movl $2004318071, %esi +; Inst 6: andl %esi, %eax +; Inst 7: subl %eax, %edi +; Inst 8: shrl $1, %eax +; Inst 9: andl %esi, %eax +; Inst 10: subl %eax, %edi +; Inst 11: shrl $1, %eax +; Inst 12: andl %esi, %eax +; Inst 13: subl %eax, %edi +; Inst 14: movq %rdi, %rsi +; Inst 15: shrl $4, %esi +; Inst 16: addl %edi, %esi +; Inst 17: andl $252645135, %esi +; Inst 18: imull $16843009, %esi +; Inst 19: shrl $24, %esi +; Inst 20: movq %rsi, %rax +; Inst 21: movq %rbp, %rsp +; Inst 22: popq %rbp +; Inst 23: ret ; }}