diff --git a/cranelift/codegen/meta/src/isa/x86/instructions.rs b/cranelift/codegen/meta/src/isa/x86/instructions.rs index 2675937353..b15be24546 100644 --- a/cranelift/codegen/meta/src/isa/x86/instructions.rs +++ b/cranelift/codegen/meta/src/isa/x86/instructions.rs @@ -342,9 +342,9 @@ pub(crate) fn define( The lane index, ``Idx``, is an immediate value, not an SSA value. It must indicate a valid lane index for the type of ``x``. "#, - &formats.insert_lane, + &formats.ternary_imm8, ) - .operands_in(vec![x, Idx, y]) + .operands_in(vec![x, y, Idx]) .operands_out(vec![a]), ); @@ -369,9 +369,9 @@ pub(crate) fn define( extracted from and which it is inserted to. This is similar to x86_pinsr but inserts floats, which are already stored in an XMM register. "#, - &formats.insert_lane, + &formats.ternary_imm8, ) - .operands_in(vec![x, Idx, y]) + .operands_in(vec![x, y, Idx]) .operands_out(vec![a]), ); diff --git a/cranelift/codegen/meta/src/isa/x86/legalize.rs b/cranelift/codegen/meta/src/isa/x86/legalize.rs index 13da4a365a..5d7e3c7619 100644 --- a/cranelift/codegen/meta/src/isa/x86/legalize.rs +++ b/cranelift/codegen/meta/src/isa/x86/legalize.rs @@ -460,7 +460,7 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro // Move into the lowest 16 bits of an XMM register. def!(a = scalar_to_vector(x)), // Insert the value again but in the next lowest 16 bits. - def!(b = insertlane(a, uimm8_one, x)), + def!(b = insertlane(a, x, uimm8_one)), // No instruction emitted; pretend this is an I32x4 so we can use PSHUFD. def!(c = raw_bitcast_any16x8_to_i32x4(b)), // Broadcast the bytes in the XMM register with PSHUFD. @@ -494,7 +494,7 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro // Move into the lowest 64 bits of an XMM register. def!(a = scalar_to_vector(x)), // Move into the highest 64 bits of the same XMM register. - def!(y = insertlane(a, uimm8_one, x)), + def!(y = insertlane(a, x, uimm8_one)), ], ); } @@ -568,11 +568,11 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro // Use scalar operations to shift the first lane. def!(a = extractlane(x, uimm8_zero)), def!(b = sshr_scalar_lane0(a, y)), - def!(c = insertlane(x, uimm8_zero, b)), + def!(c = insertlane(x, b, uimm8_zero)), // Do the same for the second lane. def!(d = extractlane(x, uimm8_one)), def!(e = sshr_scalar_lane1(d, y)), - def!(z = insertlane(c, uimm8_one, e)), + def!(z = insertlane(c, e, uimm8_one)), ], ); } diff --git a/cranelift/codegen/meta/src/isa/x86/recipes.rs b/cranelift/codegen/meta/src/isa/x86/recipes.rs index 42e45d0328..db28f73e03 100644 --- a/cranelift/codegen/meta/src/isa/x86/recipes.rs +++ b/cranelift/codegen/meta/src/isa/x86/recipes.rs @@ -608,12 +608,12 @@ pub(crate) fn define<'shared>( // XX /r with FPR ins and outs. A form with a byte immediate. { recipes.add_template_inferred( - EncodingRecipeBuilder::new("fa_ib", &formats.insert_lane, 2) + EncodingRecipeBuilder::new("fa_ib", &formats.ternary_imm8, 2) .operands_in(vec![fpr, fpr]) .operands_out(vec![0]) .inst_predicate(InstructionPredicate::new_is_unsigned_int( - &*formats.insert_lane, - "lane", + &*formats.ternary_imm8, + "imm", 8, 0, )) @@ -621,7 +621,7 @@ pub(crate) fn define<'shared>( r#" {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); modrm_rr(in_reg1, in_reg0, sink); - let imm:i64 = lane.into(); + let imm: i64 = imm.into(); sink.put1(imm as u8); "#, ), @@ -1040,12 +1040,12 @@ pub(crate) fn define<'shared>( // XX /r ib with 8-bit unsigned immediate (e.g. for insertlane) { recipes.add_template_inferred( - EncodingRecipeBuilder::new("r_ib_unsigned_r", &formats.insert_lane, 2) + EncodingRecipeBuilder::new("r_ib_unsigned_r", &formats.ternary_imm8, 2) .operands_in(vec![fpr, gpr]) .operands_out(vec![0]) .inst_predicate(InstructionPredicate::new_is_unsigned_int( - &*formats.insert_lane, - "lane", + &*formats.ternary_imm8, + "imm", 8, 0, )) @@ -1053,7 +1053,7 @@ pub(crate) fn define<'shared>( r#" {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); modrm_rr(in_reg1, in_reg0, sink); - let imm:i64 = lane.into(); + let imm: i64 = imm.into(); sink.put1(imm as u8); "#, ), diff --git a/cranelift/codegen/meta/src/shared/formats.rs b/cranelift/codegen/meta/src/shared/formats.rs index 03c09e2e2b..455920eba5 100644 --- a/cranelift/codegen/meta/src/shared/formats.rs +++ b/cranelift/codegen/meta/src/shared/formats.rs @@ -24,7 +24,6 @@ pub(crate) struct Formats { pub(crate) func_addr: Rc, pub(crate) heap_addr: Rc, pub(crate) indirect_jump: Rc, - pub(crate) insert_lane: Rc, pub(crate) int_compare: Rc, pub(crate) int_compare_imm: Rc, pub(crate) int_cond: Rc, @@ -45,6 +44,7 @@ pub(crate) struct Formats { pub(crate) store_complex: Rc, pub(crate) table_addr: Rc, pub(crate) ternary: Rc, + pub(crate) ternary_imm8: Rc, pub(crate) trap: Rc, pub(crate) unary: Rc, pub(crate) unary_bool: Rc, @@ -88,18 +88,18 @@ impl Formats { .typevar_operand(1) .build(), + ternary_imm8: Builder::new("TernaryImm8") + .value() + .imm(&imm.uimm8) + .value() + .build(), + // Catch-all for instructions with many outputs and inputs and no immediate // operands. multiary: Builder::new("MultiAry").varargs().build(), nullary: Builder::new("NullAry").build(), - insert_lane: Builder::new("InsertLane") - .value() - .imm_with_name("lane", &imm.uimm8) - .value() - .build(), - extract_lane: Builder::new("ExtractLane") .value() .imm_with_name("lane", &imm.uimm8) diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs index 3a85f80599..f2bed0e330 100644 --- a/cranelift/codegen/meta/src/shared/instructions.rs +++ b/cranelift/codegen/meta/src/shared/instructions.rs @@ -559,9 +559,9 @@ fn define_simd_lane_access( The lane index, ``Idx``, is an immediate value, not an SSA value. It must indicate a valid lane index for the type of ``x``. "#, - &formats.insert_lane, + &formats.ternary_imm8, ) - .operands_in(vec![x, Idx, y]) + .operands_in(vec![x, y, Idx]) .operands_out(vec![a]), ); diff --git a/cranelift/codegen/src/isa/x86/enc_tables.rs b/cranelift/codegen/src/isa/x86/enc_tables.rs index 1d071d643b..a2897c6d78 100644 --- a/cranelift/codegen/src/isa/x86/enc_tables.rs +++ b/cranelift/codegen/src/isa/x86/enc_tables.rs @@ -1251,10 +1251,10 @@ fn convert_insertlane( let mut pos = FuncCursor::new(func).at_inst(inst); pos.use_srcloc(inst); - if let ir::InstructionData::InsertLane { + if let ir::InstructionData::TernaryImm8 { opcode: ir::Opcode::Insertlane, args: [vector, replacement], - lane, + imm: lane, } = pos.func.dfg[inst] { let value_type = pos.func.dfg.value_type(vector); @@ -1269,7 +1269,7 @@ fn convert_insertlane( pos.func .dfg .replace(inst) - .x86_insertps(vector, immediate, replacement) + .x86_insertps(vector, replacement, immediate) } F64X2 => { let replacement_as_vector = pos.ins().raw_bitcast(F64X2, replacement); // only necessary due to SSA types @@ -1297,7 +1297,7 @@ fn convert_insertlane( pos.func .dfg .replace(inst) - .x86_pinsr(vector, lane, replacement); + .x86_pinsr(vector, replacement, lane); } } } @@ -1340,7 +1340,7 @@ fn expand_dword_to_xmm<'f>( if arg_type == I64 { let (arg_lo, arg_hi) = pos.ins().isplit(arg); let arg = pos.ins().scalar_to_vector(I32X4, arg_lo); - let arg = pos.ins().insertlane(arg, 1, arg_hi); + let arg = pos.ins().insertlane(arg, arg_hi, 1); let arg = pos.ins().raw_bitcast(I64X2, arg); arg } else { diff --git a/cranelift/codegen/src/verifier/mod.rs b/cranelift/codegen/src/verifier/mod.rs index 7f6953c7e5..5010446309 100644 --- a/cranelift/codegen/src/verifier/mod.rs +++ b/cranelift/codegen/src/verifier/mod.rs @@ -758,7 +758,7 @@ impl<'a> Verifier<'a> { | Binary { .. } | BinaryImm { .. } | Ternary { .. } - | InsertLane { .. } + | TernaryImm8 { .. } | ExtractLane { .. } | Shuffle { .. } | IntCompare { .. } @@ -1918,14 +1918,14 @@ impl<'a> Verifier<'a> { arg, .. } - | ir::InstructionData::InsertLane { + | ir::InstructionData::TernaryImm8 { opcode: ir::instructions::Opcode::Insertlane, - lane, + imm: lane, args: [arg, _], .. } => { // We must be specific about the opcodes above because other instructions are using - // the ExtractLane/InsertLane formats. + // the same formats. let ty = self.func.dfg.value_type(arg); if u16::from(lane) >= ty.lane_count() { errors.fatal(( diff --git a/cranelift/codegen/src/write.rs b/cranelift/codegen/src/write.rs index acf181af2b..0aada7d79d 100644 --- a/cranelift/codegen/src/write.rs +++ b/cranelift/codegen/src/write.rs @@ -518,7 +518,7 @@ pub fn write_operands( } } NullAry { .. } => write!(w, " "), - InsertLane { lane, args, .. } => write!(w, " {}, {}, {}", args[0], lane, args[1]), + TernaryImm8 { imm, args, .. } => write!(w, " {}, {}, {}", args[0], args[1], imm), ExtractLane { lane, arg, .. } => write!(w, " {}, {}", arg, lane), Shuffle { mask, args, .. } => { let data = dfg.immediates.get(mask).expect( diff --git a/cranelift/filetests/filetests/isa/x86/simd-bitwise-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-bitwise-legalize.clif index 102719351b..7193aa2b54 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-bitwise-legalize.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-bitwise-legalize.clif @@ -91,10 +91,10 @@ block0: v2 = sshr v1, v0 ; check: v3 = x86_pextr v1, 0 ; nextln: v4 = sshr v3, v0 - ; nextln: v5 = x86_pinsr v1, 0, v4 + ; nextln: v5 = x86_pinsr v1, v4, 0 ; nextln: v6 = x86_pextr v1, 1 ; nextln: v7 = sshr v6, v0 - ; nextln: v2 = x86_pinsr v5, 1, v7 + ; nextln: v2 = x86_pinsr v5, v7, 1 return v2 } diff --git a/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit.clif index a1ffac1822..e5eea1f637 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit.clif @@ -10,7 +10,7 @@ block0: [-, %rax] v0 = bconst.b8 true [-, %rbx] v1 = bconst.b8 false [-, %xmm0] v2 = splat.b8x16 v0 -[-, %xmm0] v3 = x86_pinsr v2, 10, v1 ; bin: 66 0f 3a 20 c3 0a +[-, %xmm0] v3 = x86_pinsr v2, v1, 10 ; bin: 66 0f 3a 20 c3 0a return } @@ -19,7 +19,7 @@ block0: [-, %rax] v0 = iconst.i16 4 [-, %rbx] v1 = iconst.i16 5 [-, %xmm1] v2 = splat.i16x8 v0 -[-, %xmm1] v3 = x86_pinsr v2, 4, v1 ; bin: 66 0f c4 cb 04 +[-, %xmm1] v3 = x86_pinsr v2, v1, 4 ; bin: 66 0f c4 cb 04 return } @@ -28,7 +28,7 @@ block0: [-, %rax] v0 = iconst.i32 42 [-, %rbx] v1 = iconst.i32 99 [-, %xmm4] v2 = splat.i32x4 v0 -[-, %xmm4] v3 = x86_pinsr v2, 2, v1 ; bin: 66 0f 3a 22 e3 02 +[-, %xmm4] v3 = x86_pinsr v2, v1, 2 ; bin: 66 0f 3a 22 e3 02 return } @@ -37,7 +37,7 @@ block0: [-, %rax] v0 = bconst.b64 true [-, %rbx] v1 = bconst.b64 false [-, %xmm2] v2 = splat.b64x2 v0 -[-, %xmm2] v3 = x86_pinsr v2, 1, v1 ; bin: 66 48 0f 3a 22 d3 01 +[-, %xmm2] v3 = x86_pinsr v2, v1, 1 ; bin: 66 48 0f 3a 22 d3 01 return } diff --git a/cranelift/filetests/filetests/isa/x86/simd-lane-access-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-lane-access-legalize.clif index 5480116404..0f22ed3669 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-lane-access-legalize.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-lane-access-legalize.clif @@ -55,7 +55,7 @@ block0: ; check: block0: ; nextln: v0 = iconst.i64 42 ; nextln: v2 = scalar_to_vector.i64x2 v0 -; nextln: v1 = x86_pinsr v2, 1, v0 +; nextln: v1 = x86_pinsr v2, v0, 1 ; nextln: return v1 function %splat_b16() -> b16x8 { @@ -67,7 +67,7 @@ block0: ; check: block0: ; nextln: v0 = bconst.b16 true ; nextln: v2 = scalar_to_vector.b16x8 v0 -; nextln: v3 = x86_pinsr v2, 1, v0 +; nextln: v3 = x86_pinsr v2, v0, 1 ; nextln: v4 = raw_bitcast.i32x4 v3 ; nextln: v5 = x86_pshufd v4, 0 ; nextln: v1 = raw_bitcast.b16x8 v5 diff --git a/cranelift/filetests/filetests/isa/x86/simd-lane-access-run.clif b/cranelift/filetests/filetests/isa/x86/simd-lane-access-run.clif index 115a0be7cb..00ebae26f6 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-lane-access-run.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-lane-access-run.clif @@ -62,7 +62,7 @@ block0: v1 = bconst.b8 true v2 = vconst.b8x16 [false false false false false false false false false false false false false false false false] - v3 = insertlane v2, 10, v1 + v3 = insertlane v2, v1, 10 v4 = extractlane v3, 10 return v4 } @@ -72,7 +72,7 @@ function %insertlane_f32() -> b1 { block0: v0 = f32const 0x42.42 v1 = vconst.f32x4 0x00 - v2 = insertlane v1, 1, v0 + v2 = insertlane v1, v0, 1 v3 = extractlane v2, 1 v4 = fcmp eq v3, v0 return v4 @@ -83,7 +83,7 @@ function %insertlane_f64_lane1() -> b1 { block0: v0 = f64const 0x42.42 v1 = vconst.f64x2 0x00 - v2 = insertlane v1, 1, v0 + v2 = insertlane v1, v0, 1 v3 = extractlane v2, 1 v4 = fcmp eq v3, v0 return v4 @@ -94,7 +94,7 @@ function %insertlane_f64_lane0() -> b1 { block0: v0 = f64const 0x42.42 v1 = vconst.f64x2 0x00 - v2 = insertlane v1, 0, v0 + v2 = insertlane v1, v0, 0 v3 = extractlane v2, 0 v4 = fcmp eq v3, v0 return v4 @@ -135,7 +135,7 @@ block0: v1 = iconst.i32 99 v2 = splat.i32x4 v0 - v3 = insertlane v2, 2, v1 + v3 = insertlane v2, v1, 2 v4 = extractlane v3, 3 v5 = icmp eq v4, v0 @@ -154,7 +154,7 @@ block0: v1 = f32const 0x99.99 v2 = splat.f32x4 v0 - v3 = insertlane v2, 2, v1 + v3 = insertlane v2, v1, 2 v4 = extractlane v3, 3 v5 = fcmp eq v4, v0 diff --git a/cranelift/filetests/filetests/parser/tiny.clif b/cranelift/filetests/filetests/parser/tiny.clif index 3f825e6eac..42fa5a8157 100644 --- a/cranelift/filetests/filetests/parser/tiny.clif +++ b/cranelift/filetests/filetests/parser/tiny.clif @@ -67,13 +67,13 @@ function %lanes() { block0: v0 = iconst.i32x4 2 v1 = extractlane v0, 3 - v2 = insertlane v0, 1, v1 + v2 = insertlane v0, v1, 1 } ; sameln: function %lanes() fast { ; nextln: block0: ; nextln: v0 = iconst.i32x4 2 ; nextln: v1 = extractlane v0, 3 -; nextln: v2 = insertlane v0, 1, v1 +; nextln: v2 = insertlane v0, v1, 1 ; nextln: } ; Integer condition codes. diff --git a/cranelift/filetests/filetests/verifier/simd-lane-index.clif b/cranelift/filetests/filetests/verifier/simd-lane-index.clif index 2f7ca8d095..b8051a6b5a 100644 --- a/cranelift/filetests/filetests/verifier/simd-lane-index.clif +++ b/cranelift/filetests/filetests/verifier/simd-lane-index.clif @@ -6,7 +6,7 @@ function %insertlane_i32x4() { block0: v0 = vconst.i32x4 [0 0 0 0] v1 = iconst.i32 42 - v2 = insertlane v0, 4, v1 ; error: The lane 4 does not index into the type i32x4 + v2 = insertlane v0, v1, 4 ; error: The lane 4 does not index into the type i32x4 return } @@ -14,7 +14,7 @@ function %insertlane_b16x8() { block0: v0 = vconst.b16x8 [false false false false false false false false] v1 = bconst.b16 true - v2 = insertlane v0, 8, v1 ; error: The lane 8 does not index into the type b16x8 + v2 = insertlane v0, v1, 8 ; error: The lane 8 does not index into the type b16x8 return } @@ -22,7 +22,7 @@ function %insertlane_f64x2() { block0: v0 = vconst.f64x2 0x00 v1 = f64const 0x0.1 - v2 = insertlane v0, 2, v1 ; error: The lane 2 does not index into the type f64x2 + v2 = insertlane v0, v1, 2 ; error: The lane 2 does not index into the type f64x2 return } diff --git a/cranelift/reader/src/parser.rs b/cranelift/reader/src/parser.rs index 08942962ed..47e6e695fe 100644 --- a/cranelift/reader/src/parser.rs +++ b/cranelift/reader/src/parser.rs @@ -2887,15 +2887,15 @@ impl<'a> Parser<'a> { ctx.check_jt(table, self.loc)?; InstructionData::IndirectJump { opcode, arg, table } } - InstructionFormat::InsertLane => { + InstructionFormat::TernaryImm8 => { let lhs = self.match_value("expected SSA value first operand")?; self.match_token(Token::Comma, "expected ',' between operands")?; - let lane = self.match_uimm8("expected lane number")?; - self.match_token(Token::Comma, "expected ',' between operands")?; let rhs = self.match_value("expected SSA value last operand")?; - InstructionData::InsertLane { + self.match_token(Token::Comma, "expected ',' between operands")?; + let imm = self.match_uimm8("expected 8-bit immediate")?; + InstructionData::TernaryImm8 { opcode, - lane, + imm, args: [lhs, rhs], } } diff --git a/cranelift/serde/src/serde_clif_json.rs b/cranelift/serde/src/serde_clif_json.rs index 2d950cf3a8..efa0ee2815 100644 --- a/cranelift/serde/src/serde_clif_json.rs +++ b/cranelift/serde/src/serde_clif_json.rs @@ -41,6 +41,11 @@ pub enum SerInstData { opcode: String, args: [String; 3], }, + TernaryImm8 { + opcode: String, + args: [String; 2], + imm: String, + }, MultiAry { opcode: String, args: Vec, @@ -48,11 +53,6 @@ pub enum SerInstData { NullAry { opcode: String, }, - InsertLane { - opcode: String, - args: [String; 2], - lane: String, - }, ExtractLane { opcode: String, arg: String, @@ -323,12 +323,12 @@ pub fn get_inst_data(inst_index: Inst, func: &Function) -> SerInstData { InstructionData::NullAry { opcode } => SerInstData::NullAry { opcode: opcode.to_string(), }, - InstructionData::InsertLane { opcode, args, lane } => { + InstructionData::TernaryImm8 { opcode, args, imm } => { let hold_args = [args[0].to_string(), args[1].to_string()]; - SerInstData::InsertLane { + SerInstData::TernaryImm8 { opcode: opcode.to_string(), args: hold_args, - lane: lane.to_string(), + imm: imm.to_string(), } } InstructionData::ExtractLane { opcode, arg, lane } => SerInstData::ExtractLane { diff --git a/cranelift/wasm/src/code_translator.rs b/cranelift/wasm/src/code_translator.rs index f7a8ee8bfd..a8db0433bc 100644 --- a/cranelift/wasm/src/code_translator.rs +++ b/cranelift/wasm/src/code_translator.rs @@ -1306,7 +1306,7 @@ pub fn translate_operator( let ty = type_of(op); let reduced = builder.ins().ireduce(ty.lane_type(), replacement); let vector = optionally_bitcast_vector(vector, ty, builder); - state.push1(builder.ins().insertlane(vector, *lane, reduced)) + state.push1(builder.ins().insertlane(vector, reduced, *lane)) } Operator::I32x4ReplaceLane { lane } | Operator::I64x2ReplaceLane { lane } @@ -1314,7 +1314,7 @@ pub fn translate_operator( | Operator::F64x2ReplaceLane { lane } => { let (vector, replacement) = state.pop2(); let vector = optionally_bitcast_vector(vector, type_of(op), builder); - state.push1(builder.ins().insertlane(vector, *lane, replacement)) + state.push1(builder.ins().insertlane(vector, replacement, *lane)) } Operator::V8x16Shuffle { lanes, .. } => { let (a, b) = pop2_with_bitcast(state, I8X16, builder);