diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest b/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest index f02a0fefe3..ee3279ed8e 100644 --- a/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest +++ b/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest @@ -1,4 +1,4 @@ src/clif.isle 9c0563583e5500de00ec5e226edc0547ac3ea789c8d76f1da0401c80ec619320fdc9a6f17fd76bbcac74a5894f85385c1f51c900c2b83bc9906d03d0f29bf5cb -src/prelude.isle a069d14321601afc63959af23086709d67d189dafcdc7d1fc8534b32d89d49008acb8368b7b5a7bc51a353736a378197ac352ccce2bb3be89d93afb6979e480a +src/prelude.isle c1391bcd436c23caf46b909ba7b5a352405014f0c393e3886cf1b9ad37f610b0563e8a64daad215f107395e6bb55744d955dd9c6344bb19b96587c2deb703462 src/isa/aarch64/inst.isle 841748c9c5900821b7086a09a41c6dcdb2172eb47a45293b6ef10f2e1f1389620bf6a2c75152af807d8bc8929029a357af5191f5d87bac2c9ec54bf63a9a2a8f src/isa/aarch64/lower.isle b3cd0834484e543f39d477d47ee66042276e99955c21fb8c9340a5f27ac317936acb2907a30f758bf596066e36db801a179fda6dbcecaee758a0187a5a5f1412 diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs b/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs index 137cff579a..4ab9919db5 100644 --- a/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs +++ b/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs @@ -36,6 +36,7 @@ pub trait Context { fn unwrap_head_value_list_1(&mut self, arg0: ValueList) -> (Value, ValueSlice); fn unwrap_head_value_list_2(&mut self, arg0: ValueList) -> (Value, Value, ValueSlice); fn writable_reg_to_reg(&mut self, arg0: WritableReg) -> Reg; + fn u8_from_uimm8(&mut self, arg0: Uimm8) -> u8; fn u64_from_imm64(&mut self, arg0: Imm64) -> u64; fn u64_from_ieee32(&mut self, arg0: Ieee32) -> u64; fn u64_from_ieee64(&mut self, arg0: Ieee64) -> u64; diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index 530f774858..33ac1fc785 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -1115,3 +1115,33 @@ (let ((dst WritableReg (temp_writable_reg ty)) (_ Unit (emit (MInst.GprToXmm op src dst size)))) (writable_reg_to_reg dst))) + +;; Helper for creating `pinsrb` instructions. +(decl pinsrb (Reg RegMem u8) Reg) +(rule (pinsrb src1 src2 lane) + (xmm_rm_r_imm (SseOpcode.Pinsrb) src1 src2 lane (OperandSize.Size32))) + +;; Helper for creating `pinsrw` instructions. +(decl pinsrw (Reg RegMem u8) Reg) +(rule (pinsrw src1 src2 lane) + (xmm_rm_r_imm (SseOpcode.Pinsrw) src1 src2 lane (OperandSize.Size32))) + +;; Helper for creating `pinsrd` instructions. +(decl pinsrd (Reg RegMem u8 OperandSize) Reg) +(rule (pinsrd src1 src2 lane size) + (xmm_rm_r_imm (SseOpcode.Pinsrd) src1 src2 lane size)) + +;; Helper for creating `insertps` instructions. +(decl insertps (Reg RegMem u8) Reg) +(rule (insertps src1 src2 lane) + (xmm_rm_r_imm (SseOpcode.Insertps) src1 src2 lane (OperandSize.Size32))) + +;; Helper for creating `movsd` instructions. +(decl movsd (Reg RegMem) Reg) +(rule (movsd src1 src2) + (xmm_rm_r $I8X16 (SseOpcode.Movsd) src1 src2)) + +;; Helper for creating `movlhps` instructions. +(decl movlhps (Reg RegMem) Reg) +(rule (movlhps src1 src2) + (xmm_rm_r $I8X16 (SseOpcode.Movlhps) src1 src2)) diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index fb688f3121..13b0aa77b8 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -964,3 +964,60 @@ ;; all-one value (rule (lower (has_type ty @ (multi_lane _bits _lanes) (bnot x))) (value_reg (sse_xor ty (put_in_reg x) (RegMem.Reg (vector_all_ones ty))))) + +;;;; Rules for `insertlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (insertlane vec @ (value_type ty) val (u8_from_uimm8 idx))) + (value_reg (vec_insert_lane ty (put_in_reg vec) (put_in_reg_mem val) idx))) + +;; Helper function used below for `insertlane` but also here for other +;; lowerings. +;; +;; Note that the `Type` used here is the type of vector the insertion is +;; happening into, or the type of the first `Reg` argument. +(decl vec_insert_lane (Type Reg RegMem u8) Reg) + +;; i8x16.replace_lane +(rule (vec_insert_lane $I8X16 vec val idx) (pinsrb vec val idx)) + +;; i16x8.replace_lane +(rule (vec_insert_lane $I16X8 vec val idx) (pinsrw vec val idx)) + +;; i32x4.replace_lane +(rule (vec_insert_lane $I32X4 vec val idx) (pinsrd vec val idx (OperandSize.Size32))) + +;; i64x2.replace_lane +(rule (vec_insert_lane $I64X2 vec val idx) (pinsrd vec val idx (OperandSize.Size64))) + +;; f32x4.replace_lane +(rule (vec_insert_lane $F32X4 vec val idx) (insertps vec val (sse_insertps_lane_imm idx))) + +;; external rust code used to calculate the immediate value to `insertps` +(decl sse_insertps_lane_imm (u8) u8) +(extern constructor sse_insertps_lane_imm sse_insertps_lane_imm) + +;; f64x2.replace_lane 0 +;; +;; Here the `movsd` instruction is used specifically to specialize moving +;; into the fist lane where unlike above cases we're not using the lane +;; immediate as an immediate to the instruction itself. +;; +;; Note, though, the `movsd` has different behavior with respect to the second +;; lane of the f64x2 depending on whether the RegMem operand is a register or +;; memory. When loading from a register `movsd` preserves the upper bits, but +;; when loading from memory it zeros the upper bits. We specifically want to +;; preserve the upper bits so if a `RegMem.Mem` is passed in we need to emit +;; two `movsd` instructions. The first `movsd` (used as `xmm_unary_rm_r`) will +;; load from memory into a temp register and then the second `movsd` (modeled +;; internally as `xmm_rm_r` will merge the temp register into our `vec` +;; register. +(rule (vec_insert_lane $F64X2 vec (RegMem.Reg val) 0) (movsd vec (RegMem.Reg val))) +(rule (vec_insert_lane $F64X2 vec mem 0) + (movsd vec (RegMem.Reg (xmm_unary_rm_r (SseOpcode.Movsd) mem)))) + +;; f64x2.replace_lane 1 +;; +;; Here the `movlhps` instruction is used specifically to specialize moving +;; into the second lane where unlike above cases we're not using the lane +;; immediate as an immediate to the instruction itself. +(rule (vec_insert_lane $F64X2 vec val 1) (movlhps vec val)) diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 972d264c4e..41e986f401 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -5641,22 +5641,11 @@ fn lower_insn_to_regs>( } Opcode::Insertlane => { - // The instruction format maps to variables like: %dst = insertlane %in_vec, %src, %lane - let ty = ty.unwrap(); - let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - let in_vec = put_input_in_reg(ctx, inputs[0]); - let src_ty = ctx.input_ty(insn, 1); - debug_assert!(!src_ty.is_vector()); - let src = input_to_reg_mem(ctx, inputs[1]); - let lane = if let InstructionData::TernaryImm8 { imm, .. } = ctx.data(insn) { - *imm - } else { - unreachable!(); - }; - debug_assert!(lane < ty.lane_count() as u8); - - ctx.emit(Inst::gen_move(dst, in_vec, ty)); - emit_insert_lane(ctx, src, dst, lane, ty.lane_type()); + unreachable!( + "implemented in ISLE: inst = `{}`, type = `{:?}`", + ctx.dfg().display_inst(insn), + ty + ); } Opcode::Extractlane => { diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index 179b95fa72..12b4602c84 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -207,6 +207,13 @@ where None } } + + #[inline] + fn sse_insertps_lane_imm(&mut self, lane: u8) -> u8 { + // Insert 32-bits from replacement (at index 00, bits 7:8) to vector (lane + // shifted into bits 5:6). + 0b00_00_00_00 | lane << 4 + } } #[inline] diff --git a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest index 89fd2808d2..174aa529fd 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest +++ b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest @@ -1,4 +1,4 @@ src/clif.isle 9c0563583e5500de00ec5e226edc0547ac3ea789c8d76f1da0401c80ec619320fdc9a6f17fd76bbcac74a5894f85385c1f51c900c2b83bc9906d03d0f29bf5cb -src/prelude.isle a069d14321601afc63959af23086709d67d189dafcdc7d1fc8534b32d89d49008acb8368b7b5a7bc51a353736a378197ac352ccce2bb3be89d93afb6979e480a -src/isa/x64/inst.isle fdfbfc6dfad1fc5ed252e0a14ccc69baba51d0538e05cfb9916f6213e5a6fcfc9d22605a29bd684d6a66f6d5e1c8ec36a963660d52c2e8b3fb6e0758f7adb7b5 -src/isa/x64/lower.isle 8555abdae385431c96aaabc392b7b3a8b1bbe733be08b007ef776850860cb77e85a140db02f427586c155c0b0129f9ffd531abd2e4a772c72667535cc015e609 +src/prelude.isle c1391bcd436c23caf46b909ba7b5a352405014f0c393e3886cf1b9ad37f610b0563e8a64daad215f107395e6bb55744d955dd9c6344bb19b96587c2deb703462 +src/isa/x64/inst.isle 6065d3b9e0fa3361d179d9b87d09568ff474f8bac7eeabd29b328ace723041f96045bc82cfa2d7feda4490ce7e5d4be1a1c7ebe25c99916564d43a51550cd093 +src/isa/x64/lower.isle e51b7a67343dba342a43b3c9e4b9ed7df9b2c66a677018acf7054ba48c27e4e93a4421fd892b9bf7c0e5b790bcfafab7cb3e93ce2b8206c04d456918d2ad0b5a diff --git a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs index 61bac2a6b8..a62050787b 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs @@ -36,6 +36,7 @@ pub trait Context { fn unwrap_head_value_list_1(&mut self, arg0: ValueList) -> (Value, ValueSlice); fn unwrap_head_value_list_2(&mut self, arg0: ValueList) -> (Value, Value, ValueSlice); fn writable_reg_to_reg(&mut self, arg0: WritableReg) -> Reg; + fn u8_from_uimm8(&mut self, arg0: Uimm8) -> u8; fn u64_from_imm64(&mut self, arg0: Imm64) -> u64; fn u64_from_ieee32(&mut self, arg0: Ieee32) -> u64; fn u64_from_ieee64(&mut self, arg0: Ieee64) -> u64; @@ -58,6 +59,7 @@ pub trait Context { fn ext_mode(&mut self, arg0: u16, arg1: u16) -> ExtMode; fn emit(&mut self, arg0: &MInst) -> Unit; fn nonzero_u64_fits_in_u32(&mut self, arg0: u64) -> Option; + fn sse_insertps_lane_imm(&mut self, arg0: u8) -> u8; } /// Internal type ProducesFlags: defined at src/isa/x64/inst.isle line 392. @@ -1724,6 +1726,102 @@ pub fn constructor_gpr_to_xmm( return Some(expr3_0); } +// Generated as internal constructor for term pinsrb. +pub fn constructor_pinsrb( + ctx: &mut C, + arg0: Reg, + arg1: &RegMem, + arg2: u8, +) -> Option { + let pattern0_0 = arg0; + let pattern1_0 = arg1; + let pattern2_0 = arg2; + // Rule at src/isa/x64/inst.isle line 1121. + let expr0_0 = SseOpcode::Pinsrb; + let expr1_0 = OperandSize::Size32; + let expr2_0 = + constructor_xmm_rm_r_imm(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0, &expr1_0)?; + return Some(expr2_0); +} + +// Generated as internal constructor for term pinsrw. +pub fn constructor_pinsrw( + ctx: &mut C, + arg0: Reg, + arg1: &RegMem, + arg2: u8, +) -> Option { + let pattern0_0 = arg0; + let pattern1_0 = arg1; + let pattern2_0 = arg2; + // Rule at src/isa/x64/inst.isle line 1126. + let expr0_0 = SseOpcode::Pinsrw; + let expr1_0 = OperandSize::Size32; + let expr2_0 = + constructor_xmm_rm_r_imm(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0, &expr1_0)?; + return Some(expr2_0); +} + +// Generated as internal constructor for term pinsrd. +pub fn constructor_pinsrd( + ctx: &mut C, + arg0: Reg, + arg1: &RegMem, + arg2: u8, + arg3: &OperandSize, +) -> Option { + let pattern0_0 = arg0; + let pattern1_0 = arg1; + let pattern2_0 = arg2; + let pattern3_0 = arg3; + // Rule at src/isa/x64/inst.isle line 1131. + let expr0_0 = SseOpcode::Pinsrd; + let expr1_0 = constructor_xmm_rm_r_imm( + ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0, pattern3_0, + )?; + return Some(expr1_0); +} + +// Generated as internal constructor for term insertps. +pub fn constructor_insertps( + ctx: &mut C, + arg0: Reg, + arg1: &RegMem, + arg2: u8, +) -> Option { + let pattern0_0 = arg0; + let pattern1_0 = arg1; + let pattern2_0 = arg2; + // Rule at src/isa/x64/inst.isle line 1136. + let expr0_0 = SseOpcode::Insertps; + let expr1_0 = OperandSize::Size32; + let expr2_0 = + constructor_xmm_rm_r_imm(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0, &expr1_0)?; + return Some(expr2_0); +} + +// Generated as internal constructor for term movsd. +pub fn constructor_movsd(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { + let pattern0_0 = arg0; + let pattern1_0 = arg1; + // Rule at src/isa/x64/inst.isle line 1141. + let expr0_0: Type = I8X16; + let expr1_0 = SseOpcode::Movsd; + let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; + return Some(expr2_0); +} + +// Generated as internal constructor for term movlhps. +pub fn constructor_movlhps(ctx: &mut C, arg0: Reg, arg1: &RegMem) -> Option { + let pattern0_0 = arg0; + let pattern1_0 = arg1; + // Rule at src/isa/x64/inst.isle line 1146. + let expr0_0: Type = I8X16; + let expr1_0 = SseOpcode::Movlhps; + let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; + return Some(expr2_0); +} + // Generated as internal constructor for term lower. pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { let pattern0_0 = arg0; @@ -1755,6 +1853,24 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { + if let &Opcode::Insertlane = &pattern2_0 { + let (pattern4_0, pattern4_1) = C::unpack_value_array_2(ctx, &pattern2_1); + let pattern5_0 = C::value_type(ctx, pattern4_0); + let pattern6_0 = C::u8_from_uimm8(ctx, pattern2_2); + // Rule at src/isa/x64/lower.isle line 970. + let expr0_0 = C::put_in_reg(ctx, pattern4_0); + let expr1_0 = C::put_in_reg_mem(ctx, pattern4_1); + let expr2_0 = + constructor_vec_insert_lane(ctx, pattern5_0, expr0_0, &expr1_0, pattern6_0)?; + let expr3_0 = C::value_reg(ctx, expr2_0); + return Some(expr3_0); + } + } _ => {} } if let Some(pattern1_0) = C::first_result(ctx, pattern0_0) { @@ -3967,3 +4083,85 @@ pub fn constructor_shr_i128( let expr49_0 = constructor_with_flags_2(ctx, &expr37_0, &expr41_0, &expr48_0)?; return Some(expr49_0); } + +// Generated as internal constructor for term vec_insert_lane. +pub fn constructor_vec_insert_lane( + ctx: &mut C, + arg0: Type, + arg1: Reg, + arg2: &RegMem, + arg3: u8, +) -> Option { + let pattern0_0 = arg0; + if pattern0_0 == I8X16 { + let pattern2_0 = arg1; + let pattern3_0 = arg2; + let pattern4_0 = arg3; + // Rule at src/isa/x64/lower.isle line 981. + let expr0_0 = constructor_pinsrb(ctx, pattern2_0, pattern3_0, pattern4_0)?; + return Some(expr0_0); + } + if pattern0_0 == I16X8 { + let pattern2_0 = arg1; + let pattern3_0 = arg2; + let pattern4_0 = arg3; + // Rule at src/isa/x64/lower.isle line 984. + let expr0_0 = constructor_pinsrw(ctx, pattern2_0, pattern3_0, pattern4_0)?; + return Some(expr0_0); + } + if pattern0_0 == I32X4 { + let pattern2_0 = arg1; + let pattern3_0 = arg2; + let pattern4_0 = arg3; + // Rule at src/isa/x64/lower.isle line 987. + let expr0_0 = OperandSize::Size32; + let expr1_0 = constructor_pinsrd(ctx, pattern2_0, pattern3_0, pattern4_0, &expr0_0)?; + return Some(expr1_0); + } + if pattern0_0 == I64X2 { + let pattern2_0 = arg1; + let pattern3_0 = arg2; + let pattern4_0 = arg3; + // Rule at src/isa/x64/lower.isle line 990. + let expr0_0 = OperandSize::Size64; + let expr1_0 = constructor_pinsrd(ctx, pattern2_0, pattern3_0, pattern4_0, &expr0_0)?; + return Some(expr1_0); + } + if pattern0_0 == F32X4 { + let pattern2_0 = arg1; + let pattern3_0 = arg2; + let pattern4_0 = arg3; + // Rule at src/isa/x64/lower.isle line 993. + let expr0_0 = C::sse_insertps_lane_imm(ctx, pattern4_0); + let expr1_0 = constructor_insertps(ctx, pattern2_0, pattern3_0, expr0_0)?; + return Some(expr1_0); + } + if pattern0_0 == F64X2 { + let pattern2_0 = arg1; + let pattern3_0 = arg2; + if let &RegMem::Reg { reg: pattern4_0 } = pattern3_0 { + let pattern5_0 = arg3; + if pattern5_0 == 0 { + // Rule at src/isa/x64/lower.isle line 1014. + let expr0_0 = RegMem::Reg { reg: pattern4_0 }; + let expr1_0 = constructor_movsd(ctx, pattern2_0, &expr0_0)?; + return Some(expr1_0); + } + } + let pattern4_0 = arg3; + if pattern4_0 == 0 { + // Rule at src/isa/x64/lower.isle line 1015. + let expr0_0 = SseOpcode::Movsd; + let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern3_0)?; + let expr2_0 = RegMem::Reg { reg: expr1_0 }; + let expr3_0 = constructor_movsd(ctx, pattern2_0, &expr2_0)?; + return Some(expr3_0); + } + if pattern4_0 == 1 { + // Rule at src/isa/x64/lower.isle line 1023. + let expr0_0 = constructor_movlhps(ctx, pattern2_0, pattern3_0)?; + return Some(expr0_0); + } + } + return None; +} diff --git a/cranelift/codegen/src/machinst/isle.rs b/cranelift/codegen/src/machinst/isle.rs index 99e4c8f49f..ddc579a3b4 100644 --- a/cranelift/codegen/src/machinst/isle.rs +++ b/cranelift/codegen/src/machinst/isle.rs @@ -175,6 +175,10 @@ macro_rules! isle_prelude_methods { fn u64_from_ieee64(&mut self, val: Ieee64) -> u64 { val.bits() } + + fn u8_from_uimm8(&mut self, val: Uimm8) -> u8 { + val + } }; } diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index c1367cc575..4951c0bd7b 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -166,6 +166,10 @@ (decl writable_reg_to_reg (WritableReg) Reg) (extern constructor writable_reg_to_reg writable_reg_to_reg) +;; Extract a `u8` from an `Uimm8`. +(decl u8_from_uimm8 (u8) Uimm8) +(extern extractor infallible u8_from_uimm8 u8_from_uimm8) + ;; Extract a `u64` from an `Imm64`. (decl u64_from_imm64 (u64) Imm64) (extern extractor infallible u64_from_imm64 u64_from_imm64) diff --git a/tests/misc_testsuite/simd/replace-lane-preserve.wast b/tests/misc_testsuite/simd/replace-lane-preserve.wast new file mode 100644 index 0000000000..2f7903d9d2 --- /dev/null +++ b/tests/misc_testsuite/simd/replace-lane-preserve.wast @@ -0,0 +1,12 @@ +;; originally from #3216 +(module + (func (result i64) + v128.const i64x2 -1 1 + global.get 0 + f64x2.replace_lane 0 + i64x2.extract_lane 1 + ) + (global f64 (f64.const 1)) + (export "" (func 0))) + +(assert_return (invoke "") (i64.const 1))