diff --git a/cranelift/codegen/src/isa/aarch64/inst.isle b/cranelift/codegen/src/isa/aarch64/inst.isle index ba99c03b10..12198d8db0 100644 --- a/cranelift/codegen/src/isa/aarch64/inst.isle +++ b/cranelift/codegen/src/isa/aarch64/inst.isle @@ -1455,6 +1455,38 @@ (_ Unit (emit (MInst.VecMisc op dst src size)))) (writable_reg_to_reg dst))) +;; Helper for emitting `MInst.VecRRRLong` instructions. +(decl vec_rrr_long (VecRRRLongOp Reg Reg bool) Reg) +(rule (vec_rrr_long op src1 src2 high_half) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.VecRRRLong op dst src1 src2 high_half)))) + (writable_reg_to_reg dst))) + +;; Helper for emitting `MInst.VecRRRLong` instructions, but for variants +;; where the operation both reads and modifies the destination register. +;; +;; Currently this is only used for `VecRRRLongOp.Umlal*` +(decl vec_rrrr_long (VecRRRLongOp Reg Reg Reg bool) Reg) +(rule (vec_rrrr_long op src1 src2 src3 high_half) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (_1 Unit (emit (MInst.FpuMove128 dst src1))) + (_2 Unit (emit (MInst.VecRRRLong op dst src2 src3 high_half)))) + (writable_reg_to_reg dst))) + +;; Helper for emitting `MInst.VecRRNarrow` instructions. +(decl vec_rr_narrow (VecRRNarrowOp Reg bool) Reg) +(rule (vec_rr_narrow op src high_half) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.VecRRNarrow op dst src high_half)))) + (writable_reg_to_reg dst))) + +;; Helper for emitting `MInst.VecRRLong` instructions. +(decl vec_rr_long (VecRRLongOp Reg bool) Reg) +(rule (vec_rr_long op src high_half) + (let ((dst WritableReg (temp_writable_reg $I8X16)) + (_ Unit (emit (MInst.VecRRLong op dst src high_half)))) + (writable_reg_to_reg dst))) + ;; Immediate value helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (decl imm (Type u64) Reg) diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle index de548231b1..5dce2a3741 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.isle +++ b/cranelift/codegen/src/isa/aarch64/lower.isle @@ -189,3 +189,183 @@ ;; vectors. (rule (lower (has_type (vec128 ty) (ineg x))) (value_reg (vec_misc (VecMisc2.Neg) (put_in_reg x) (vector_size ty)))) + +;;;; Rules for `imul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; `i64` and smaller. +(rule (lower (has_type (fits_in_64 ty) (imul x y))) + (value_reg (alu_rrrr (madd_op ty) (put_in_reg x) (put_in_reg y) (zero_reg)))) + +;; `i128`. +(rule (lower (has_type $I128 (imul x y))) + (let ( + ;; Get the high/low registers for `x`. + (x_regs ValueRegs (put_in_regs x)) + (x_lo Reg (value_regs_get x_regs 0)) + (x_hi Reg (value_regs_get x_regs 1)) + + ;; Get the high/low registers for `y`. + (y_regs ValueRegs (put_in_regs y)) + (y_lo Reg (value_regs_get y_regs 0)) + (y_hi Reg (value_regs_get y_regs 1)) + + ;; 128bit mul formula: + ;; dst_lo = x_lo * y_lo + ;; dst_hi = umulhi(x_lo, y_lo) + (x_lo * y_hi) + (x_hi * y_lo) + ;; + ;; We can convert the above formula into the following + ;; umulh dst_hi, x_lo, y_lo + ;; madd dst_hi, x_lo, y_hi, dst_hi + ;; madd dst_hi, x_hi, y_lo, dst_hi + ;; madd dst_lo, x_lo, y_lo, zero + (dst_hi1 Reg (alu_rrr (ALUOp.UMulH) x_lo y_lo)) + (dst_hi2 Reg (alu_rrrr (ALUOp3.MAdd64) x_lo y_hi dst_hi1)) + (dst_hi Reg (alu_rrrr (ALUOp3.MAdd64) x_hi y_lo dst_hi2)) + (dst_lo Reg (alu_rrrr (ALUOp3.MAdd64) x_lo y_lo (zero_reg))) + ) + (value_regs dst_lo dst_hi))) + +;; Case for i8x16, i16x8, and i32x4. +(rule (lower (has_type (vec128 ty @ (not_i64x2)) (imul x y))) + (value_reg (vec_rrr (VecALUOp.Mul) (put_in_reg x) (put_in_reg y) (vector_size ty)))) + +;; Special lowering for i64x2. +;; +;; This I64X2 multiplication is performed with several 32-bit +;; operations. +;; +;; 64-bit numbers x and y, can be represented as: +;; x = a + 2^32(b) +;; y = c + 2^32(d) +;; +;; A 64-bit multiplication is: +;; x * y = ac + 2^32(ad + bc) + 2^64(bd) +;; note: `2^64(bd)` can be ignored, the value is too large to fit in +;; 64 bits. +;; +;; This sequence implements a I64X2 multiply, where the registers +;; `rn` and `rm` are split up into 32-bit components: +;; rn = |d|c|b|a| +;; rm = |h|g|f|e| +;; +;; rn * rm = |cg + 2^32(ch + dg)|ae + 2^32(af + be)| +;; +;; The sequence is: +;; rev64 rd.4s, rm.4s +;; mul rd.4s, rd.4s, rn.4s +;; xtn tmp1.2s, rn.2d +;; addp rd.4s, rd.4s, rd.4s +;; xtn tmp2.2s, rm.2d +;; shll rd.2d, rd.2s, #32 +;; umlal rd.2d, tmp2.2s, tmp1.2s +(rule (lower (has_type $I64X2 (imul x y))) + (let ( + (rn Reg (put_in_reg x)) + (rm Reg (put_in_reg y)) + ;; Reverse the 32-bit elements in the 64-bit words. + ;; rd = |g|h|e|f| + (rev Reg (vec_misc (VecMisc2.Rev64) rm (VectorSize.Size32x4))) + + ;; Calculate the high half components. + ;; rd = |dg|ch|be|af| + ;; + ;; Note that this 32-bit multiply of the high half + ;; discards the bits that would overflow, same as + ;; if 64-bit operations were used. Also the Shll + ;; below would shift out the overflow bits anyway. + (mul Reg (vec_rrr (VecALUOp.Mul) rev rn (VectorSize.Size32x4))) + + ;; Extract the low half components of rn. + ;; tmp1 = |c|a| + (tmp1 Reg (vec_rr_narrow (VecRRNarrowOp.Xtn64) rn $false)) + + ;; Sum the respective high half components. + ;; rd = |dg+ch|be+af||dg+ch|be+af| + (sum Reg (vec_rrr (VecALUOp.Addp) mul mul (VectorSize.Size32x4))) + + ;; Extract the low half components of rm. + ;; tmp2 = |g|e| + (tmp2 Reg (vec_rr_narrow (VecRRNarrowOp.Xtn64) rm $false)) + + ;; Shift the high half components, into the high half. + ;; rd = |dg+ch << 32|be+af << 32| + (shift Reg (vec_rr_long (VecRRLongOp.Shll32) sum $false)) + + ;; Multiply the low components together, and accumulate with the high + ;; half. + ;; rd = |rd[1] + cg|rd[0] + ae| + (result Reg (vec_rrrr_long (VecRRRLongOp.Umlal32) shift tmp2 tmp1 $false)) + ) + (value_reg result))) + +;; Special case for `i16x8.extmul_low_i8x16_s`. +(rule (lower (has_type $I16X8 + (imul (def_inst (swiden_low x @ (value_type $I8X16))) + (def_inst (swiden_low y @ (value_type $I8X16)))))) + (value_reg (vec_rrr_long (VecRRRLongOp.Smull8) (put_in_reg x) (put_in_reg y) $false))) + +;; Special case for `i16x8.extmul_high_i8x16_s`. +(rule (lower (has_type $I16X8 + (imul (def_inst (swiden_high x @ (value_type $I8X16))) + (def_inst (swiden_high y @ (value_type $I8X16)))))) + (value_reg (vec_rrr_long (VecRRRLongOp.Smull8) (put_in_reg x) (put_in_reg y) $true))) + +;; Special case for `i16x8.extmul_low_i8x16_u`. +(rule (lower (has_type $I16X8 + (imul (def_inst (uwiden_low x @ (value_type $I8X16))) + (def_inst (uwiden_low y @ (value_type $I8X16)))))) + (value_reg (vec_rrr_long (VecRRRLongOp.Umull8) (put_in_reg x) (put_in_reg y) $false))) + +;; Special case for `i16x8.extmul_high_i8x16_u`. +(rule (lower (has_type $I16X8 + (imul (def_inst (uwiden_high x @ (value_type $I8X16))) + (def_inst (uwiden_high y @ (value_type $I8X16)))))) + (value_reg (vec_rrr_long (VecRRRLongOp.Umull8) (put_in_reg x) (put_in_reg y) $true))) + +;; Special case for `i32x4.extmul_low_i16x8_s`. +(rule (lower (has_type $I32X4 + (imul (def_inst (swiden_low x @ (value_type $I16X8))) + (def_inst (swiden_low y @ (value_type $I16X8)))))) + (value_reg (vec_rrr_long (VecRRRLongOp.Smull16) (put_in_reg x) (put_in_reg y) $false))) + +;; Special case for `i32x4.extmul_high_i16x8_s`. +(rule (lower (has_type $I32X4 + (imul (def_inst (swiden_high x @ (value_type $I16X8))) + (def_inst (swiden_high y @ (value_type $I16X8)))))) + (value_reg (vec_rrr_long (VecRRRLongOp.Smull16) (put_in_reg x) (put_in_reg y) $true))) + +;; Special case for `i32x4.extmul_low_i16x8_u`. +(rule (lower (has_type $I32X4 + (imul (def_inst (uwiden_low x @ (value_type $I16X8))) + (def_inst (uwiden_low y @ (value_type $I16X8)))))) + (value_reg (vec_rrr_long (VecRRRLongOp.Umull16) (put_in_reg x) (put_in_reg y) $false))) + +;; Special case for `i32x4.extmul_high_i16x8_u`. +(rule (lower (has_type $I32X4 + (imul (def_inst (uwiden_high x @ (value_type $I16X8))) + (def_inst (uwiden_high y @ (value_type $I16X8)))))) + (value_reg (vec_rrr_long (VecRRRLongOp.Umull16) (put_in_reg x) (put_in_reg y) $true))) + +;; Special case for `i64x2.extmul_low_i32x4_s`. +(rule (lower (has_type $I64X2 + (imul (def_inst (swiden_low x @ (value_type $I32X4))) + (def_inst (swiden_low y @ (value_type $I32X4)))))) + (value_reg (vec_rrr_long (VecRRRLongOp.Smull32) (put_in_reg x) (put_in_reg y) $false))) + +;; Special case for `i64x2.extmul_high_i32x4_s`. +(rule (lower (has_type $I64X2 + (imul (def_inst (swiden_high x @ (value_type $I32X4))) + (def_inst (swiden_high y @ (value_type $I32X4)))))) + (value_reg (vec_rrr_long (VecRRRLongOp.Smull32) (put_in_reg x) (put_in_reg y) $true))) + +;; Special case for `i64x2.extmul_low_i32x4_u`. +(rule (lower (has_type $I64X2 + (imul (def_inst (uwiden_low x @ (value_type $I32X4))) + (def_inst (uwiden_low y @ (value_type $I32X4)))))) + (value_reg (vec_rrr_long (VecRRRLongOp.Umull32) (put_in_reg x) (put_in_reg y) $false))) + +;; Special case for `i64x2.extmul_high_i32x4_u`. +(rule (lower (has_type $I64X2 + (imul (def_inst (uwiden_high x @ (value_type $I32X4))) + (def_inst (uwiden_high y @ (value_type $I32X4)))))) + (value_reg (vec_rrr_long (VecRRRLongOp.Umull32) (put_in_reg x) (put_in_reg y) $true))) diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index 7d6a10b537..ac185daa2d 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -1285,153 +1285,6 @@ pub(crate) fn maybe_input_insn_via_conv>( None } -/// Pattern match an extending vector multiplication. -/// Returns a tuple of the opcode to use, the two input registers and whether -/// it's the 'high half' version of the instruction. -pub(crate) fn match_vec_long_mul>( - c: &mut C, - insn: IRInst, - ext_op: Opcode, -) -> Option<(VecRRRLongOp, regalloc::Reg, regalloc::Reg, bool)> { - let inputs = insn_inputs(c, insn); - if let Some(lhs) = maybe_input_insn(c, inputs[0], ext_op) { - if let Some(rhs) = maybe_input_insn(c, inputs[1], ext_op) { - let lhs_input = insn_inputs(c, lhs)[0]; - let rhs_input = insn_inputs(c, rhs)[0]; - let rn = put_input_in_reg(c, lhs_input, NarrowValueMode::None); - let rm = put_input_in_reg(c, rhs_input, NarrowValueMode::None); - let lane_type = c.output_ty(insn, 0).lane_type(); - match (lane_type, ext_op) { - (I16, Opcode::SwidenLow) => return Some((VecRRRLongOp::Smull8, rn, rm, false)), - (I16, Opcode::SwidenHigh) => return Some((VecRRRLongOp::Smull8, rn, rm, true)), - (I16, Opcode::UwidenLow) => return Some((VecRRRLongOp::Umull8, rn, rm, false)), - (I16, Opcode::UwidenHigh) => return Some((VecRRRLongOp::Umull8, rn, rm, true)), - (I32, Opcode::SwidenLow) => return Some((VecRRRLongOp::Smull16, rn, rm, false)), - (I32, Opcode::SwidenHigh) => return Some((VecRRRLongOp::Smull16, rn, rm, true)), - (I32, Opcode::UwidenLow) => return Some((VecRRRLongOp::Umull16, rn, rm, false)), - (I32, Opcode::UwidenHigh) => return Some((VecRRRLongOp::Umull16, rn, rm, true)), - (I64, Opcode::SwidenLow) => return Some((VecRRRLongOp::Smull32, rn, rm, false)), - (I64, Opcode::SwidenHigh) => return Some((VecRRRLongOp::Smull32, rn, rm, true)), - (I64, Opcode::UwidenLow) => return Some((VecRRRLongOp::Umull32, rn, rm, false)), - (I64, Opcode::UwidenHigh) => return Some((VecRRRLongOp::Umull32, rn, rm, true)), - _ => {} - }; - } - } - None -} - -pub(crate) fn lower_i64x2_mul>(c: &mut C, insn: IRInst) { - let inputs = insn_inputs(c, insn); - let outputs = insn_outputs(c, insn); - let rd = get_output_reg(c, outputs[0]).regs()[0]; - let rn = put_input_in_regs(c, inputs[0]).regs()[0]; - let rm = put_input_in_regs(c, inputs[1]).regs()[0]; - - let tmp1 = c.alloc_tmp(I64X2).only_reg().unwrap(); - let tmp2 = c.alloc_tmp(I64X2).only_reg().unwrap(); - - // This I64X2 multiplication is performed with several 32-bit - // operations. - - // 64-bit numbers x and y, can be represented as: - // x = a + 2^32(b) - // y = c + 2^32(d) - - // A 64-bit multiplication is: - // x * y = ac + 2^32(ad + bc) + 2^64(bd) - // note: `2^64(bd)` can be ignored, the value is too large to fit in - // 64 bits. - - // This sequence implements a I64X2 multiply, where the registers - // `rn` and `rm` are split up into 32-bit components: - // rn = |d|c|b|a| - // rm = |h|g|f|e| - // - // rn * rm = |cg + 2^32(ch + dg)|ae + 2^32(af + be)| - // - // The sequence is: - // rev64 rd.4s, rm.4s - // mul rd.4s, rd.4s, rn.4s - // xtn tmp1.2s, rn.2d - // addp rd.4s, rd.4s, rd.4s - // xtn tmp2.2s, rm.2d - // shll rd.2d, rd.2s, #32 - // umlal rd.2d, tmp2.2s, tmp1.2s - - // Reverse the 32-bit elements in the 64-bit words. - // rd = |g|h|e|f| - c.emit(Inst::VecMisc { - op: VecMisc2::Rev64, - rd, - rn: rm, - size: VectorSize::Size32x4, - }); - - // Calculate the high half components. - // rd = |dg|ch|be|af| - // - // Note that this 32-bit multiply of the high half - // discards the bits that would overflow, same as - // if 64-bit operations were used. Also the Shll - // below would shift out the overflow bits anyway. - c.emit(Inst::VecRRR { - alu_op: VecALUOp::Mul, - rd, - rn: rd.to_reg(), - rm: rn, - size: VectorSize::Size32x4, - }); - - // Extract the low half components of rn. - // tmp1 = |c|a| - c.emit(Inst::VecRRNarrow { - op: VecRRNarrowOp::Xtn64, - rd: tmp1, - rn, - high_half: false, - }); - - // Sum the respective high half components. - // rd = |dg+ch|be+af||dg+ch|be+af| - c.emit(Inst::VecRRR { - alu_op: VecALUOp::Addp, - rd, - rn: rd.to_reg(), - rm: rd.to_reg(), - size: VectorSize::Size32x4, - }); - - // Extract the low half components of rm. - // tmp2 = |g|e| - c.emit(Inst::VecRRNarrow { - op: VecRRNarrowOp::Xtn64, - rd: tmp2, - rn: rm, - high_half: false, - }); - - // Shift the high half components, into the high half. - // rd = |dg+ch << 32|be+af << 32| - c.emit(Inst::VecRRLong { - op: VecRRLongOp::Shll32, - rd, - rn: rd.to_reg(), - high_half: false, - }); - - // Multiply the low components together, and accumulate with the high - // half. - // rd = |rd[1] + cg|rd[0] + ae| - c.emit(Inst::VecRRRLong { - alu_op: VecRRRLongOp::Umlal32, - rd, - rn: tmp2.to_reg(), - rm: tmp1.to_reg(), - high_half: false, - }); -} - /// Specifies what [lower_icmp] should do when lowering #[derive(Debug, Clone, PartialEq)] pub(crate) enum IcmpOutput { diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest b/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest index c976fb5a4b..39e1f0fc04 100644 --- a/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest +++ b/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest @@ -1,4 +1,4 @@ src/clif.isle 9c0563583e5500de00ec5e226edc0547ac3ea789c8d76f1da0401c80ec619320fdc9a6f17fd76bbcac74a5894f85385c1f51c900c2b83bc9906d03d0f29bf5cb -src/prelude.isle e4933f2bcb6cd9e00cb6dc0c47c43d096d0c4e37468af17a38fad8906b864d975e0a8b98d15c6a5e2bccf255ec2ced2466991c3405533e9cafefbf4d9ac46823 -src/isa/aarch64/inst.isle c90a42ae8e0d932d200c6150777fa6a8b6d113f2e9ef24a9328669d9d9bebf137004e70eaef91b9be1880eb71e5b1cb28f84d53e2a11c0c45db3c57f5c32441e -src/isa/aarch64/lower.isle 5b9b2423ff641cb9bc3b297a0fba87813421200de7b83c8d575e52e643439971fb912be8d41043ecbe65107678451a74dfec0012df13dfca34bbfed4857504af +src/prelude.isle fc3ca134da0df8e7309db0f6969c8f1db85ca7b7590d2e43552ef3134b9a55bd358a93e3aadf79d5c31d3fc95ce5c9c52f8313183c688259c027ee494913869c +src/isa/aarch64/inst.isle 30c88514c23dfda849aa4a98b981b52b569994cdf3424a93d77429246ebce8c45575a76387ae2f3e4901ba6b21c846a419231da413f2df6c5dcea681eab6bf0c +src/isa/aarch64/lower.isle 97392236095b99e93c97732b2af0778aba409f81da22b6879cf6e8f2513f0de5b3017bfa072dc60e7f6bf21aac91a5153133c01b041fb174fab0680d2fd4886c diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs b/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs index 816306b477..682d2a98e2 100644 --- a/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs +++ b/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs @@ -34,6 +34,7 @@ pub trait Context { fn fits_in_32(&mut self, arg0: Type) -> Option; fn fits_in_64(&mut self, arg0: Type) -> Option; fn vec128(&mut self, arg0: Type) -> Option; + fn not_i64x2(&mut self, arg0: Type) -> Option<()>; fn value_list_slice(&mut self, arg0: ValueList) -> ValueSlice; fn unwrap_head_value_list_1(&mut self, arg0: ValueList) -> (Value, ValueSlice); fn unwrap_head_value_list_2(&mut self, arg0: ValueList) -> (Value, Value, ValueSlice); @@ -63,13 +64,13 @@ pub trait Context { fn load_constant64_full(&mut self, arg0: u64) -> Reg; } -/// Internal type ProducesFlags: defined at src/prelude.isle line 234. +/// Internal type ProducesFlags: defined at src/prelude.isle line 238. #[derive(Clone, Debug)] pub enum ProducesFlags { ProducesFlags { inst: MInst, result: Reg }, } -/// Internal type ConsumesFlags: defined at src/prelude.isle line 237. +/// Internal type ConsumesFlags: defined at src/prelude.isle line 241. #[derive(Clone, Debug)] pub enum ConsumesFlags { ConsumesFlags { inst: MInst, result: Reg }, @@ -983,7 +984,7 @@ pub fn constructor_with_flags( result: pattern3_1, } = pattern2_0 { - // Rule at src/prelude.isle line 247. + // Rule at src/prelude.isle line 251. let expr0_0 = C::emit(ctx, &pattern1_0); let expr1_0 = C::emit(ctx, &pattern3_0); let expr2_0 = C::value_regs(ctx, pattern1_1, pattern3_1); @@ -1011,7 +1012,7 @@ pub fn constructor_with_flags_1( result: pattern3_1, } = pattern2_0 { - // Rule at src/prelude.isle line 255. + // Rule at src/prelude.isle line 259. let expr0_0 = C::emit(ctx, &pattern1_0); let expr1_0 = C::emit(ctx, &pattern3_0); return Some(pattern3_1); @@ -1045,7 +1046,7 @@ pub fn constructor_with_flags_2( result: pattern5_1, } = pattern4_0 { - // Rule at src/prelude.isle line 265. + // Rule at src/prelude.isle line 269. let expr0_0 = C::emit(ctx, &pattern1_0); let expr1_0 = C::emit(ctx, &pattern3_0); let expr2_0 = C::emit(ctx, &pattern5_0); @@ -1452,31 +1453,140 @@ pub fn constructor_vec_misc( return Some(expr4_0); } +// Generated as internal constructor for term vec_rrr_long. +pub fn constructor_vec_rrr_long( + ctx: &mut C, + arg0: &VecRRRLongOp, + arg1: Reg, + arg2: Reg, + arg3: bool, +) -> Option { + let pattern0_0 = arg0; + let pattern1_0 = arg1; + let pattern2_0 = arg2; + let pattern3_0 = arg3; + // Rule at src/isa/aarch64/inst.isle line 1460. + let expr0_0: Type = I8X16; + let expr1_0 = C::temp_writable_reg(ctx, expr0_0); + let expr2_0 = MInst::VecRRRLong { + alu_op: pattern0_0.clone(), + rd: expr1_0, + rn: pattern1_0, + rm: pattern2_0, + high_half: pattern3_0, + }; + let expr3_0 = C::emit(ctx, &expr2_0); + let expr4_0 = C::writable_reg_to_reg(ctx, expr1_0); + return Some(expr4_0); +} + +// Generated as internal constructor for term vec_rrrr_long. +pub fn constructor_vec_rrrr_long( + ctx: &mut C, + arg0: &VecRRRLongOp, + arg1: Reg, + arg2: Reg, + arg3: Reg, + arg4: bool, +) -> Option { + let pattern0_0 = arg0; + let pattern1_0 = arg1; + let pattern2_0 = arg2; + let pattern3_0 = arg3; + let pattern4_0 = arg4; + // Rule at src/isa/aarch64/inst.isle line 1470. + let expr0_0: Type = I8X16; + let expr1_0 = C::temp_writable_reg(ctx, expr0_0); + let expr2_0 = MInst::FpuMove128 { + rd: expr1_0, + rn: pattern1_0, + }; + let expr3_0 = C::emit(ctx, &expr2_0); + let expr4_0 = MInst::VecRRRLong { + alu_op: pattern0_0.clone(), + rd: expr1_0, + rn: pattern2_0, + rm: pattern3_0, + high_half: pattern4_0, + }; + let expr5_0 = C::emit(ctx, &expr4_0); + let expr6_0 = C::writable_reg_to_reg(ctx, expr1_0); + return Some(expr6_0); +} + +// Generated as internal constructor for term vec_rr_narrow. +pub fn constructor_vec_rr_narrow( + ctx: &mut C, + arg0: &VecRRNarrowOp, + arg1: Reg, + arg2: bool, +) -> Option { + let pattern0_0 = arg0; + let pattern1_0 = arg1; + let pattern2_0 = arg2; + // Rule at src/isa/aarch64/inst.isle line 1478. + let expr0_0: Type = I8X16; + let expr1_0 = C::temp_writable_reg(ctx, expr0_0); + let expr2_0 = MInst::VecRRNarrow { + op: pattern0_0.clone(), + rd: expr1_0, + rn: pattern1_0, + high_half: pattern2_0, + }; + let expr3_0 = C::emit(ctx, &expr2_0); + let expr4_0 = C::writable_reg_to_reg(ctx, expr1_0); + return Some(expr4_0); +} + +// Generated as internal constructor for term vec_rr_long. +pub fn constructor_vec_rr_long( + ctx: &mut C, + arg0: &VecRRLongOp, + arg1: Reg, + arg2: bool, +) -> Option { + let pattern0_0 = arg0; + let pattern1_0 = arg1; + let pattern2_0 = arg2; + // Rule at src/isa/aarch64/inst.isle line 1485. + let expr0_0: Type = I8X16; + let expr1_0 = C::temp_writable_reg(ctx, expr0_0); + let expr2_0 = MInst::VecRRLong { + op: pattern0_0.clone(), + rd: expr1_0, + rn: pattern1_0, + high_half: pattern2_0, + }; + let expr3_0 = C::emit(ctx, &expr2_0); + let expr4_0 = C::writable_reg_to_reg(ctx, expr1_0); + return Some(expr4_0); +} + // Generated as internal constructor for term imm. pub fn constructor_imm(ctx: &mut C, arg0: Type, arg1: u64) -> Option { let pattern0_0 = arg0; if let Some(pattern1_0) = C::integral_ty(ctx, pattern0_0) { let pattern2_0 = arg1; if let Some(pattern3_0) = C::imm_logic_from_u64(ctx, pattern2_0) { - // Rule at src/isa/aarch64/inst.isle line 1471. + // Rule at src/isa/aarch64/inst.isle line 1503. let expr0_0 = ALUOp::Orr64; let expr1_0 = C::zero_reg(ctx); let expr2_0 = constructor_alu_rr_imm_logic(ctx, &expr0_0, expr1_0, pattern3_0)?; return Some(expr2_0); } if let Some(pattern3_0) = C::move_wide_const_from_u64(ctx, pattern2_0) { - // Rule at src/isa/aarch64/inst.isle line 1463. + // Rule at src/isa/aarch64/inst.isle line 1495. let expr0_0 = OperandSize::Size64; let expr1_0 = constructor_movz(ctx, pattern3_0, &expr0_0)?; return Some(expr1_0); } if let Some(pattern3_0) = C::move_wide_const_from_negated_u64(ctx, pattern2_0) { - // Rule at src/isa/aarch64/inst.isle line 1467. + // Rule at src/isa/aarch64/inst.isle line 1499. let expr0_0 = OperandSize::Size64; let expr1_0 = constructor_movn(ctx, pattern3_0, &expr0_0)?; return Some(expr1_0); } - // Rule at src/isa/aarch64/inst.isle line 1478. + // Rule at src/isa/aarch64/inst.isle line 1510. let expr0_0 = C::load_constant64_full(ctx, pattern2_0); return Some(expr0_0); } @@ -1532,10 +1642,535 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { + let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); + // Rule at src/isa/aarch64/lower.isle line 200. + let expr0_0 = C::put_in_regs(ctx, pattern7_0); + let expr1_0: usize = 0; + let expr2_0 = C::value_regs_get(ctx, expr0_0, expr1_0); + let expr3_0: usize = 1; + let expr4_0 = C::value_regs_get(ctx, expr0_0, expr3_0); + let expr5_0 = C::put_in_regs(ctx, pattern7_1); + let expr6_0: usize = 0; + let expr7_0 = C::value_regs_get(ctx, expr5_0, expr6_0); + let expr8_0: usize = 1; + let expr9_0 = C::value_regs_get(ctx, expr5_0, expr8_0); + let expr10_0 = ALUOp::UMulH; + let expr11_0 = constructor_alu_rrr(ctx, &expr10_0, expr2_0, expr7_0)?; + let expr12_0 = ALUOp3::MAdd64; + let expr13_0 = + constructor_alu_rrrr(ctx, &expr12_0, expr2_0, expr9_0, expr11_0)?; + let expr14_0 = ALUOp3::MAdd64; + let expr15_0 = + constructor_alu_rrrr(ctx, &expr14_0, expr4_0, expr7_0, expr13_0)?; + let expr16_0 = ALUOp3::MAdd64; + let expr17_0 = C::zero_reg(ctx); + let expr18_0 = + constructor_alu_rrrr(ctx, &expr16_0, expr2_0, expr7_0, expr17_0)?; + let expr19_0 = C::value_regs(ctx, expr18_0, expr15_0); + return Some(expr19_0); + } _ => {} } } } + if pattern2_0 == I16X8 { + let pattern4_0 = C::inst_data(ctx, pattern0_0); + if let &InstructionData::Binary { + opcode: ref pattern5_0, + args: ref pattern5_1, + } = &pattern4_0 + { + if let &Opcode::Imul = &pattern5_0 { + let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); + if let Some(pattern8_0) = C::def_inst(ctx, pattern7_0) { + let pattern9_0 = C::inst_data(ctx, pattern8_0); + if let &InstructionData::Unary { + opcode: ref pattern10_0, + arg: pattern10_1, + } = &pattern9_0 + { + match &pattern10_0 { + &Opcode::SwidenLow => { + let pattern12_0 = C::value_type(ctx, pattern10_1); + if pattern12_0 == I8X16 { + if let Some(pattern14_0) = C::def_inst(ctx, pattern7_1) { + let pattern15_0 = C::inst_data(ctx, pattern14_0); + if let &InstructionData::Unary { + opcode: ref pattern16_0, + arg: pattern16_1, + } = &pattern15_0 + { + if let &Opcode::SwidenLow = &pattern16_0 { + let pattern18_0 = + C::value_type(ctx, pattern16_1); + if pattern18_0 == I8X16 { + // Rule at src/isa/aarch64/lower.isle line 302. + let expr0_0 = VecRRRLongOp::Smull8; + let expr1_0 = + C::put_in_reg(ctx, pattern10_1); + let expr2_0 = + C::put_in_reg(ctx, pattern16_1); + let expr3_0: bool = false; + let expr4_0 = constructor_vec_rrr_long( + ctx, &expr0_0, expr1_0, expr2_0, + expr3_0, + )?; + let expr5_0 = C::value_reg(ctx, expr4_0); + return Some(expr5_0); + } + } + } + } + } + } + &Opcode::SwidenHigh => { + let pattern12_0 = C::value_type(ctx, pattern10_1); + if pattern12_0 == I8X16 { + if let Some(pattern14_0) = C::def_inst(ctx, pattern7_1) { + let pattern15_0 = C::inst_data(ctx, pattern14_0); + if let &InstructionData::Unary { + opcode: ref pattern16_0, + arg: pattern16_1, + } = &pattern15_0 + { + if let &Opcode::SwidenHigh = &pattern16_0 { + let pattern18_0 = + C::value_type(ctx, pattern16_1); + if pattern18_0 == I8X16 { + // Rule at src/isa/aarch64/lower.isle line 308. + let expr0_0 = VecRRRLongOp::Smull8; + let expr1_0 = + C::put_in_reg(ctx, pattern10_1); + let expr2_0 = + C::put_in_reg(ctx, pattern16_1); + let expr3_0: bool = true; + let expr4_0 = constructor_vec_rrr_long( + ctx, &expr0_0, expr1_0, expr2_0, + expr3_0, + )?; + let expr5_0 = C::value_reg(ctx, expr4_0); + return Some(expr5_0); + } + } + } + } + } + } + &Opcode::UwidenLow => { + let pattern12_0 = C::value_type(ctx, pattern10_1); + if pattern12_0 == I8X16 { + if let Some(pattern14_0) = C::def_inst(ctx, pattern7_1) { + let pattern15_0 = C::inst_data(ctx, pattern14_0); + if let &InstructionData::Unary { + opcode: ref pattern16_0, + arg: pattern16_1, + } = &pattern15_0 + { + if let &Opcode::UwidenLow = &pattern16_0 { + let pattern18_0 = + C::value_type(ctx, pattern16_1); + if pattern18_0 == I8X16 { + // Rule at src/isa/aarch64/lower.isle line 314. + let expr0_0 = VecRRRLongOp::Umull8; + let expr1_0 = + C::put_in_reg(ctx, pattern10_1); + let expr2_0 = + C::put_in_reg(ctx, pattern16_1); + let expr3_0: bool = false; + let expr4_0 = constructor_vec_rrr_long( + ctx, &expr0_0, expr1_0, expr2_0, + expr3_0, + )?; + let expr5_0 = C::value_reg(ctx, expr4_0); + return Some(expr5_0); + } + } + } + } + } + } + &Opcode::UwidenHigh => { + let pattern12_0 = C::value_type(ctx, pattern10_1); + if pattern12_0 == I8X16 { + if let Some(pattern14_0) = C::def_inst(ctx, pattern7_1) { + let pattern15_0 = C::inst_data(ctx, pattern14_0); + if let &InstructionData::Unary { + opcode: ref pattern16_0, + arg: pattern16_1, + } = &pattern15_0 + { + if let &Opcode::UwidenHigh = &pattern16_0 { + let pattern18_0 = + C::value_type(ctx, pattern16_1); + if pattern18_0 == I8X16 { + // Rule at src/isa/aarch64/lower.isle line 320. + let expr0_0 = VecRRRLongOp::Umull8; + let expr1_0 = + C::put_in_reg(ctx, pattern10_1); + let expr2_0 = + C::put_in_reg(ctx, pattern16_1); + let expr3_0: bool = true; + let expr4_0 = constructor_vec_rrr_long( + ctx, &expr0_0, expr1_0, expr2_0, + expr3_0, + )?; + let expr5_0 = C::value_reg(ctx, expr4_0); + return Some(expr5_0); + } + } + } + } + } + } + _ => {} + } + } + } + } + } + } + if pattern2_0 == I32X4 { + let pattern4_0 = C::inst_data(ctx, pattern0_0); + if let &InstructionData::Binary { + opcode: ref pattern5_0, + args: ref pattern5_1, + } = &pattern4_0 + { + if let &Opcode::Imul = &pattern5_0 { + let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); + if let Some(pattern8_0) = C::def_inst(ctx, pattern7_0) { + let pattern9_0 = C::inst_data(ctx, pattern8_0); + if let &InstructionData::Unary { + opcode: ref pattern10_0, + arg: pattern10_1, + } = &pattern9_0 + { + match &pattern10_0 { + &Opcode::SwidenLow => { + let pattern12_0 = C::value_type(ctx, pattern10_1); + if pattern12_0 == I16X8 { + if let Some(pattern14_0) = C::def_inst(ctx, pattern7_1) { + let pattern15_0 = C::inst_data(ctx, pattern14_0); + if let &InstructionData::Unary { + opcode: ref pattern16_0, + arg: pattern16_1, + } = &pattern15_0 + { + if let &Opcode::SwidenLow = &pattern16_0 { + let pattern18_0 = + C::value_type(ctx, pattern16_1); + if pattern18_0 == I16X8 { + // Rule at src/isa/aarch64/lower.isle line 326. + let expr0_0 = VecRRRLongOp::Smull16; + let expr1_0 = + C::put_in_reg(ctx, pattern10_1); + let expr2_0 = + C::put_in_reg(ctx, pattern16_1); + let expr3_0: bool = false; + let expr4_0 = constructor_vec_rrr_long( + ctx, &expr0_0, expr1_0, expr2_0, + expr3_0, + )?; + let expr5_0 = C::value_reg(ctx, expr4_0); + return Some(expr5_0); + } + } + } + } + } + } + &Opcode::SwidenHigh => { + let pattern12_0 = C::value_type(ctx, pattern10_1); + if pattern12_0 == I16X8 { + if let Some(pattern14_0) = C::def_inst(ctx, pattern7_1) { + let pattern15_0 = C::inst_data(ctx, pattern14_0); + if let &InstructionData::Unary { + opcode: ref pattern16_0, + arg: pattern16_1, + } = &pattern15_0 + { + if let &Opcode::SwidenHigh = &pattern16_0 { + let pattern18_0 = + C::value_type(ctx, pattern16_1); + if pattern18_0 == I16X8 { + // Rule at src/isa/aarch64/lower.isle line 332. + let expr0_0 = VecRRRLongOp::Smull16; + let expr1_0 = + C::put_in_reg(ctx, pattern10_1); + let expr2_0 = + C::put_in_reg(ctx, pattern16_1); + let expr3_0: bool = true; + let expr4_0 = constructor_vec_rrr_long( + ctx, &expr0_0, expr1_0, expr2_0, + expr3_0, + )?; + let expr5_0 = C::value_reg(ctx, expr4_0); + return Some(expr5_0); + } + } + } + } + } + } + &Opcode::UwidenLow => { + let pattern12_0 = C::value_type(ctx, pattern10_1); + if pattern12_0 == I16X8 { + if let Some(pattern14_0) = C::def_inst(ctx, pattern7_1) { + let pattern15_0 = C::inst_data(ctx, pattern14_0); + if let &InstructionData::Unary { + opcode: ref pattern16_0, + arg: pattern16_1, + } = &pattern15_0 + { + if let &Opcode::UwidenLow = &pattern16_0 { + let pattern18_0 = + C::value_type(ctx, pattern16_1); + if pattern18_0 == I16X8 { + // Rule at src/isa/aarch64/lower.isle line 338. + let expr0_0 = VecRRRLongOp::Umull16; + let expr1_0 = + C::put_in_reg(ctx, pattern10_1); + let expr2_0 = + C::put_in_reg(ctx, pattern16_1); + let expr3_0: bool = false; + let expr4_0 = constructor_vec_rrr_long( + ctx, &expr0_0, expr1_0, expr2_0, + expr3_0, + )?; + let expr5_0 = C::value_reg(ctx, expr4_0); + return Some(expr5_0); + } + } + } + } + } + } + &Opcode::UwidenHigh => { + let pattern12_0 = C::value_type(ctx, pattern10_1); + if pattern12_0 == I16X8 { + if let Some(pattern14_0) = C::def_inst(ctx, pattern7_1) { + let pattern15_0 = C::inst_data(ctx, pattern14_0); + if let &InstructionData::Unary { + opcode: ref pattern16_0, + arg: pattern16_1, + } = &pattern15_0 + { + if let &Opcode::UwidenHigh = &pattern16_0 { + let pattern18_0 = + C::value_type(ctx, pattern16_1); + if pattern18_0 == I16X8 { + // Rule at src/isa/aarch64/lower.isle line 344. + let expr0_0 = VecRRRLongOp::Umull16; + let expr1_0 = + C::put_in_reg(ctx, pattern10_1); + let expr2_0 = + C::put_in_reg(ctx, pattern16_1); + let expr3_0: bool = true; + let expr4_0 = constructor_vec_rrr_long( + ctx, &expr0_0, expr1_0, expr2_0, + expr3_0, + )?; + let expr5_0 = C::value_reg(ctx, expr4_0); + return Some(expr5_0); + } + } + } + } + } + } + _ => {} + } + } + } + } + } + } + if pattern2_0 == I64X2 { + let pattern4_0 = C::inst_data(ctx, pattern0_0); + if let &InstructionData::Binary { + opcode: ref pattern5_0, + args: ref pattern5_1, + } = &pattern4_0 + { + if let &Opcode::Imul = &pattern5_0 { + let (pattern7_0, pattern7_1) = C::unpack_value_array_2(ctx, &pattern5_1); + if let Some(pattern8_0) = C::def_inst(ctx, pattern7_0) { + let pattern9_0 = C::inst_data(ctx, pattern8_0); + if let &InstructionData::Unary { + opcode: ref pattern10_0, + arg: pattern10_1, + } = &pattern9_0 + { + match &pattern10_0 { + &Opcode::SwidenLow => { + let pattern12_0 = C::value_type(ctx, pattern10_1); + if pattern12_0 == I32X4 { + if let Some(pattern14_0) = C::def_inst(ctx, pattern7_1) { + let pattern15_0 = C::inst_data(ctx, pattern14_0); + if let &InstructionData::Unary { + opcode: ref pattern16_0, + arg: pattern16_1, + } = &pattern15_0 + { + if let &Opcode::SwidenLow = &pattern16_0 { + let pattern18_0 = + C::value_type(ctx, pattern16_1); + if pattern18_0 == I32X4 { + // Rule at src/isa/aarch64/lower.isle line 350. + let expr0_0 = VecRRRLongOp::Smull32; + let expr1_0 = + C::put_in_reg(ctx, pattern10_1); + let expr2_0 = + C::put_in_reg(ctx, pattern16_1); + let expr3_0: bool = false; + let expr4_0 = constructor_vec_rrr_long( + ctx, &expr0_0, expr1_0, expr2_0, + expr3_0, + )?; + let expr5_0 = C::value_reg(ctx, expr4_0); + return Some(expr5_0); + } + } + } + } + } + } + &Opcode::SwidenHigh => { + let pattern12_0 = C::value_type(ctx, pattern10_1); + if pattern12_0 == I32X4 { + if let Some(pattern14_0) = C::def_inst(ctx, pattern7_1) { + let pattern15_0 = C::inst_data(ctx, pattern14_0); + if let &InstructionData::Unary { + opcode: ref pattern16_0, + arg: pattern16_1, + } = &pattern15_0 + { + if let &Opcode::SwidenHigh = &pattern16_0 { + let pattern18_0 = + C::value_type(ctx, pattern16_1); + if pattern18_0 == I32X4 { + // Rule at src/isa/aarch64/lower.isle line 356. + let expr0_0 = VecRRRLongOp::Smull32; + let expr1_0 = + C::put_in_reg(ctx, pattern10_1); + let expr2_0 = + C::put_in_reg(ctx, pattern16_1); + let expr3_0: bool = true; + let expr4_0 = constructor_vec_rrr_long( + ctx, &expr0_0, expr1_0, expr2_0, + expr3_0, + )?; + let expr5_0 = C::value_reg(ctx, expr4_0); + return Some(expr5_0); + } + } + } + } + } + } + &Opcode::UwidenLow => { + let pattern12_0 = C::value_type(ctx, pattern10_1); + if pattern12_0 == I32X4 { + if let Some(pattern14_0) = C::def_inst(ctx, pattern7_1) { + let pattern15_0 = C::inst_data(ctx, pattern14_0); + if let &InstructionData::Unary { + opcode: ref pattern16_0, + arg: pattern16_1, + } = &pattern15_0 + { + if let &Opcode::UwidenLow = &pattern16_0 { + let pattern18_0 = + C::value_type(ctx, pattern16_1); + if pattern18_0 == I32X4 { + // Rule at src/isa/aarch64/lower.isle line 362. + let expr0_0 = VecRRRLongOp::Umull32; + let expr1_0 = + C::put_in_reg(ctx, pattern10_1); + let expr2_0 = + C::put_in_reg(ctx, pattern16_1); + let expr3_0: bool = false; + let expr4_0 = constructor_vec_rrr_long( + ctx, &expr0_0, expr1_0, expr2_0, + expr3_0, + )?; + let expr5_0 = C::value_reg(ctx, expr4_0); + return Some(expr5_0); + } + } + } + } + } + } + &Opcode::UwidenHigh => { + let pattern12_0 = C::value_type(ctx, pattern10_1); + if pattern12_0 == I32X4 { + if let Some(pattern14_0) = C::def_inst(ctx, pattern7_1) { + let pattern15_0 = C::inst_data(ctx, pattern14_0); + if let &InstructionData::Unary { + opcode: ref pattern16_0, + arg: pattern16_1, + } = &pattern15_0 + { + if let &Opcode::UwidenHigh = &pattern16_0 { + let pattern18_0 = + C::value_type(ctx, pattern16_1); + if pattern18_0 == I32X4 { + // Rule at src/isa/aarch64/lower.isle line 368. + let expr0_0 = VecRRRLongOp::Umull32; + let expr1_0 = + C::put_in_reg(ctx, pattern10_1); + let expr2_0 = + C::put_in_reg(ctx, pattern16_1); + let expr3_0: bool = true; + let expr4_0 = constructor_vec_rrr_long( + ctx, &expr0_0, expr1_0, expr2_0, + expr3_0, + )?; + let expr5_0 = C::value_reg(ctx, expr4_0); + return Some(expr5_0); + } + } + } + } + } + } + _ => {} + } + } + } + // Rule at src/isa/aarch64/lower.isle line 261. + let expr0_0 = C::put_in_reg(ctx, pattern7_0); + let expr1_0 = C::put_in_reg(ctx, pattern7_1); + let expr2_0 = VecMisc2::Rev64; + let expr3_0 = VectorSize::Size32x4; + let expr4_0 = constructor_vec_misc(ctx, &expr2_0, expr1_0, &expr3_0)?; + let expr5_0 = VecALUOp::Mul; + let expr6_0 = VectorSize::Size32x4; + let expr7_0 = constructor_vec_rrr(ctx, &expr5_0, expr4_0, expr0_0, &expr6_0)?; + let expr8_0 = VecRRNarrowOp::Xtn64; + let expr9_0: bool = false; + let expr10_0 = constructor_vec_rr_narrow(ctx, &expr8_0, expr0_0, expr9_0)?; + let expr11_0 = VecALUOp::Addp; + let expr12_0 = VectorSize::Size32x4; + let expr13_0 = + constructor_vec_rrr(ctx, &expr11_0, expr7_0, expr7_0, &expr12_0)?; + let expr14_0 = VecRRNarrowOp::Xtn64; + let expr15_0: bool = false; + let expr16_0 = constructor_vec_rr_narrow(ctx, &expr14_0, expr1_0, expr15_0)?; + let expr17_0 = VecRRLongOp::Shll32; + let expr18_0: bool = false; + let expr19_0 = constructor_vec_rr_long(ctx, &expr17_0, expr13_0, expr18_0)?; + let expr20_0 = VecRRRLongOp::Umlal32; + let expr21_0: bool = false; + let expr22_0 = constructor_vec_rrrr_long( + ctx, &expr20_0, expr19_0, expr16_0, expr10_0, expr21_0, + )?; + let expr23_0 = C::value_reg(ctx, expr22_0); + return Some(expr23_0); + } + } + } let pattern3_0 = C::inst_data(ctx, pattern0_0); match &pattern3_0 { &InstructionData::NullAry { @@ -2029,6 +2664,19 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { + let (pattern7_0, pattern7_1) = + C::unpack_value_array_2(ctx, &pattern5_1); + // Rule at src/isa/aarch64/lower.isle line 196. + let expr0_0 = constructor_madd_op(ctx, pattern3_0)?; + let expr1_0 = C::put_in_reg(ctx, pattern7_0); + let expr2_0 = C::put_in_reg(ctx, pattern7_1); + let expr3_0 = C::zero_reg(ctx); + let expr4_0 = + constructor_alu_rrrr(ctx, &expr0_0, expr1_0, expr2_0, expr3_0)?; + let expr5_0 = C::value_reg(ctx, expr4_0); + return Some(expr5_0); + } _ => {} } } @@ -2128,6 +2776,27 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option {} } + if let Some(()) = C::not_i64x2(ctx, pattern3_0) { + let pattern5_0 = C::inst_data(ctx, pattern0_0); + if let &InstructionData::Binary { + opcode: ref pattern6_0, + args: ref pattern6_1, + } = &pattern5_0 + { + if let &Opcode::Imul = &pattern6_0 { + let (pattern8_0, pattern8_1) = C::unpack_value_array_2(ctx, &pattern6_1); + // Rule at src/isa/aarch64/lower.isle line 229. + let expr0_0 = VecALUOp::Mul; + let expr1_0 = C::put_in_reg(ctx, pattern8_0); + let expr2_0 = C::put_in_reg(ctx, pattern8_1); + let expr3_0 = constructor_vector_size(ctx, pattern3_0)?; + let expr4_0 = + constructor_vec_rrr(ctx, &expr0_0, expr1_0, expr2_0, &expr3_0)?; + let expr5_0 = C::value_reg(ctx, expr4_0); + return Some(expr5_0); + } + } + } } } return None; diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 396a9cf6d3..b46acd59de 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -71,102 +71,7 @@ pub(crate) fn lower_insn_to_regs>( Opcode::Ineg => implemented_in_isle(ctx), - Opcode::Imul => { - let ty = ty.unwrap(); - if ty == I128 { - let lhs = put_input_in_regs(ctx, inputs[0]); - let rhs = put_input_in_regs(ctx, inputs[1]); - let dst = get_output_reg(ctx, outputs[0]); - assert_eq!(lhs.len(), 2); - assert_eq!(rhs.len(), 2); - assert_eq!(dst.len(), 2); - - // 128bit mul formula: - // dst_lo = lhs_lo * rhs_lo - // dst_hi = umulhi(lhs_lo, rhs_lo) + (lhs_lo * rhs_hi) + (lhs_hi * rhs_lo) - // - // We can convert the above formula into the following - // umulh dst_hi, lhs_lo, rhs_lo - // madd dst_hi, lhs_lo, rhs_hi, dst_hi - // madd dst_hi, lhs_hi, rhs_lo, dst_hi - // mul dst_lo, lhs_lo, rhs_lo - - ctx.emit(Inst::AluRRR { - alu_op: ALUOp::UMulH, - rd: dst.regs()[1], - rn: lhs.regs()[0], - rm: rhs.regs()[0], - }); - ctx.emit(Inst::AluRRRR { - alu_op: ALUOp3::MAdd64, - rd: dst.regs()[1], - rn: lhs.regs()[0], - rm: rhs.regs()[1], - ra: dst.regs()[1].to_reg(), - }); - ctx.emit(Inst::AluRRRR { - alu_op: ALUOp3::MAdd64, - rd: dst.regs()[1], - rn: lhs.regs()[1], - rm: rhs.regs()[0], - ra: dst.regs()[1].to_reg(), - }); - ctx.emit(Inst::AluRRRR { - alu_op: ALUOp3::MAdd64, - rd: dst.regs()[0], - rn: lhs.regs()[0], - rm: rhs.regs()[0], - ra: zero_reg(), - }); - } else if ty.is_vector() { - for ext_op in &[ - Opcode::SwidenLow, - Opcode::SwidenHigh, - Opcode::UwidenLow, - Opcode::UwidenHigh, - ] { - if let Some((alu_op, rn, rm, high_half)) = - match_vec_long_mul(ctx, insn, *ext_op) - { - let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - ctx.emit(Inst::VecRRRLong { - alu_op, - rd, - rn, - rm, - high_half, - }); - return Ok(()); - } - } - if ty == I64X2 { - lower_i64x2_mul(ctx, insn); - } else { - let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); - let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); - let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - ctx.emit(Inst::VecRRR { - alu_op: VecALUOp::Mul, - rd, - rn, - rm, - size: VectorSize::from_ty(ty), - }); - } - } else { - let alu_op = choose_32_64(ty, ALUOp3::MAdd32, ALUOp3::MAdd64); - let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); - let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); - let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - ctx.emit(Inst::AluRRRR { - alu_op, - rd, - rn, - rm, - ra: zero_reg(), - }); - } - } + Opcode::Imul => implemented_in_isle(ctx), Opcode::Umulhi | Opcode::Smulhi => { let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); diff --git a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest index cf052dd533..f2c580d485 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest +++ b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest @@ -1,4 +1,4 @@ src/clif.isle 9c0563583e5500de00ec5e226edc0547ac3ea789c8d76f1da0401c80ec619320fdc9a6f17fd76bbcac74a5894f85385c1f51c900c2b83bc9906d03d0f29bf5cb -src/prelude.isle e4933f2bcb6cd9e00cb6dc0c47c43d096d0c4e37468af17a38fad8906b864d975e0a8b98d15c6a5e2bccf255ec2ced2466991c3405533e9cafefbf4d9ac46823 +src/prelude.isle fc3ca134da0df8e7309db0f6969c8f1db85ca7b7590d2e43552ef3134b9a55bd358a93e3aadf79d5c31d3fc95ce5c9c52f8313183c688259c027ee494913869c src/isa/x64/inst.isle 12dc8fa43cbba6e9c5cf46a2472e2754abfe33b7fd38f80e271afa3f6c002efad7a4202c8f00ff27d5e6176de8fec97e1887d382cbd4ef06eaac177a0b5992e3 src/isa/x64/lower.isle 333e1be62f602bb835a3cebc3299290a3d386438e9190d2db219263d974e097bfc3f1afdaac9401853806d21d548cad70bab2ffbc3b1cf5c3bebdd971a961f70 diff --git a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs index a2836b1614..d5273dae90 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs @@ -34,6 +34,7 @@ pub trait Context { fn fits_in_32(&mut self, arg0: Type) -> Option; fn fits_in_64(&mut self, arg0: Type) -> Option; fn vec128(&mut self, arg0: Type) -> Option; + fn not_i64x2(&mut self, arg0: Type) -> Option<()>; fn value_list_slice(&mut self, arg0: ValueList) -> ValueSlice; fn unwrap_head_value_list_1(&mut self, arg0: ValueList) -> (Value, ValueSlice); fn unwrap_head_value_list_2(&mut self, arg0: ValueList) -> (Value, Value, ValueSlice); @@ -66,13 +67,13 @@ pub trait Context { fn sse_insertps_lane_imm(&mut self, arg0: u8) -> u8; } -/// Internal type ProducesFlags: defined at src/prelude.isle line 234. +/// Internal type ProducesFlags: defined at src/prelude.isle line 238. #[derive(Clone, Debug)] pub enum ProducesFlags { ProducesFlags { inst: MInst, result: Reg }, } -/// Internal type ConsumesFlags: defined at src/prelude.isle line 237. +/// Internal type ConsumesFlags: defined at src/prelude.isle line 241. #[derive(Clone, Debug)] pub enum ConsumesFlags { ConsumesFlags { inst: MInst, result: Reg }, @@ -122,7 +123,7 @@ pub fn constructor_with_flags( result: pattern3_1, } = pattern2_0 { - // Rule at src/prelude.isle line 247. + // Rule at src/prelude.isle line 251. let expr0_0 = C::emit(ctx, &pattern1_0); let expr1_0 = C::emit(ctx, &pattern3_0); let expr2_0 = C::value_regs(ctx, pattern1_1, pattern3_1); @@ -150,7 +151,7 @@ pub fn constructor_with_flags_1( result: pattern3_1, } = pattern2_0 { - // Rule at src/prelude.isle line 255. + // Rule at src/prelude.isle line 259. let expr0_0 = C::emit(ctx, &pattern1_0); let expr1_0 = C::emit(ctx, &pattern3_0); return Some(pattern3_1); @@ -184,7 +185,7 @@ pub fn constructor_with_flags_2( result: pattern5_1, } = pattern4_0 { - // Rule at src/prelude.isle line 265. + // Rule at src/prelude.isle line 269. let expr0_0 = C::emit(ctx, &pattern1_0); let expr1_0 = C::emit(ctx, &pattern3_0); let expr2_0 = C::emit(ctx, &pattern5_0); diff --git a/cranelift/codegen/src/machinst/isle.rs b/cranelift/codegen/src/machinst/isle.rs index 9098dc5df2..a93b2aff45 100644 --- a/cranelift/codegen/src/machinst/isle.rs +++ b/cranelift/codegen/src/machinst/isle.rs @@ -196,6 +196,14 @@ macro_rules! isle_prelude_methods { fn u8_from_uimm8(&mut self, val: Uimm8) -> u8 { val } + + fn not_i64x2(&mut self, ty: Type) -> Option<()> { + if ty == I64X2 { + None + } else { + Some(()) + } + } }; } diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index 6c2af0b581..dd4ef9530b 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -156,6 +156,10 @@ (decl vec128 (Type) Type) (extern extractor vec128 vec128) +;; An extractor that matches everything except i64x2 +(decl not_i64x2 () Type) +(extern extractor not_i64x2 not_i64x2) + ;; Extractor to get a `ValueSlice` out of a `ValueList`. (decl value_list_slice (ValueSlice) ValueList) (extern extractor infallible value_list_slice value_list_slice)