diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 1a0568a9d6..363299c975 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -2003,7 +2003,7 @@ pub(crate) fn emit( sink.bind_label(constant_end_label); } - Inst::XmmFakeDef { .. } => { + Inst::XmmUninitializedValue { .. } => { // This instruction format only exists to declare a register as a `def`; no code is // emitted. } diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index c334956f8a..f15521d35c 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -342,10 +342,6 @@ pub enum Inst { is64: bool, }, - /// Provides a way to tell the register allocator that the upcoming sequence of instructions - /// will overwrite `dst` so it should be considered as a `def`; use with care. - XmmFakeDef { dst: Writable }, - // ===================================== // Control flow instructions. /// Direct call: call simm32. @@ -486,6 +482,20 @@ pub enum Inst { /// Marker, no-op in generated code: SP "virtual offset" is adjusted. This /// controls how MemArg::NominalSPOffset args are lowered. VirtualSPOffsetAdj { offset: i64 }, + + /// Provides a way to tell the register allocator that the upcoming sequence of instructions + /// will overwrite `dst` so it should be considered as a `def`; use this with care. + /// + /// This is useful when we have a sequence of instructions whose register usages are nominally + /// `mod`s, but such that the combination of operations creates a result that is independent of + /// the initial register value. It's thus semantically a `def`, not a `mod`, when all the + /// instructions are taken together, so we want to ensure the register is defined (its + /// live-range starts) prior to the sequence to keep analyses happy. + /// + /// One alternative would be a compound instruction that somehow encapsulates the others and + /// reports its own `def`s/`use`s/`mod`s; this adds complexity (the instruction list is no + /// longer flat) and requires knowledge about semantics and initial-value independence anyway. + XmmUninitializedValue { dst: Writable }, } pub(crate) fn low32_will_sign_extend_to_64(x: u64) -> bool { @@ -644,9 +654,9 @@ impl Inst { Inst::XMM_RM_R { op, src, dst } } - pub(crate) fn xmm_fake_def(dst: Writable) -> Self { + pub(crate) fn xmm_uninit_value(dst: Writable) -> Self { debug_assert!(dst.to_reg().get_class() == RegClass::V128); - Inst::XmmFakeDef { dst } + Inst::XmmUninitializedValue { dst } } pub(crate) fn xmm_mov_r_m( @@ -1333,9 +1343,9 @@ impl ShowWithRRU for Inst { dst.show_rru(mb_rru), ), - Inst::XmmFakeDef { dst } => format!( + Inst::XmmUninitializedValue { dst } => format!( "{} {}", - ljustify("fake_def".into()), + ljustify("uninit".into()), dst.show_rru(mb_rru), ), @@ -1769,7 +1779,7 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { collector.add_mod(*dst); } } - Inst::XmmFakeDef { dst } => collector.add_def(*dst), + Inst::XmmUninitializedValue { dst } => collector.add_def(*dst), Inst::XmmLoadConstSeq { dst, .. } => collector.add_def(*dst), Inst::XmmMinMaxSeq { lhs, rhs_dst, .. } => { collector.add_use(*lhs); @@ -2104,7 +2114,7 @@ fn x64_map_regs(inst: &mut Inst, mapper: &RUM) { src.map_uses(mapper); map_mod(mapper, dst); } - Inst::XmmFakeDef { ref mut dst, .. } => { + Inst::XmmUninitializedValue { ref mut dst, .. } => { map_def(mapper, dst); } Inst::XmmLoadConstSeq { ref mut dst, .. } => { diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 249f803305..4b8a4e1805 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -2995,7 +2995,7 @@ fn lower_insn_to_regs>( // We know that splat will overwrite all of the lanes of `dst` but it takes several // instructions to do so. Because of the multiple instructions, there is no good way to // declare `dst` a `def` except with the following pseudo-instruction. - ctx.emit(Inst::xmm_fake_def(dst)); + ctx.emit(Inst::xmm_uninit_value(dst)); match ty.lane_bits() { 8 => { emit_insert_lane(ctx, src, dst, 0, ty.lane_type()); diff --git a/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif index 17c04df772..5d397409ab 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif @@ -58,7 +58,7 @@ block0(v0: i8): v1 = splat.i8x16 v0 return v1 } -; check: fake_def %xmm0 +; check: uninit %xmm0 ; nextln: pinsrb $$0, %rdi, %xmm0 ; nextln: pxor %xmm1, %xmm1 ; nextln: pshufb %xmm1, %xmm0 @@ -69,7 +69,7 @@ block0: v1 = splat.b16x8 v0 return v1 } -; check: fake_def %xmm0 +; check: uninit %xmm0 ; nextln: pinsrw $$0, %r12, %xmm0 ; nextln: pinsrw $$1, %r12, %xmm0 ; nextln: pshufd $$0, %xmm0, %xmm0 @@ -79,7 +79,7 @@ block0(v0: i32): v1 = splat.i32x4 v0 return v1 } -; check: fake_def %xmm0 +; check: uninit %xmm0 ; nextln: pinsrd $$0, %rdi, %xmm0 ; nextln: pshufd $$0, %xmm0, %xmm0 @@ -88,6 +88,6 @@ block0(v0: f64): v1 = splat.f64x2 v0 return v1 } -; check: fake_def %xmm1 +; check: uninit %xmm1 ; nextln: movsd %xmm0, %xmm1 ; nextln: movlhps %xmm0, %xmm1