diff --git a/cranelift/codegen/src/binemit/mod.rs b/cranelift/codegen/src/binemit/mod.rs index 90cff8bf51..a9d43d51e3 100644 --- a/cranelift/codegen/src/binemit/mod.rs +++ b/cranelift/codegen/src/binemit/mod.rs @@ -35,6 +35,10 @@ pub enum Reloc { X86CallPLTRel4, /// x86 GOT PC-relative 4-byte X86GOTPCRel4, + /// The 32-bit offset of the target from the beginning of its section. + /// Equivalent to `IMAGE_REL_AMD64_SECREL`. + /// See: [PE Format](https://docs.microsoft.com/en-us/windows/win32/debug/pe-format) + X86SecRel, /// Arm32 call target Arm32Call, /// Arm64 call target. Encoded as bottom 26 bits of instruction. This @@ -81,6 +85,7 @@ impl fmt::Display for Reloc { Self::X86CallPCRel4 => write!(f, "CallPCRel4"), Self::X86CallPLTRel4 => write!(f, "CallPLTRel4"), Self::X86GOTPCRel4 => write!(f, "GOTPCRel4"), + Self::X86SecRel => write!(f, "SecRel"), Self::Arm32Call | Self::Arm64Call => write!(f, "Call"), Self::ElfX86_64TlsGd => write!(f, "ElfX86_64TlsGd"), diff --git a/cranelift/codegen/src/ir/known_symbol.rs b/cranelift/codegen/src/ir/known_symbol.rs index 8120899218..0dd5274d7e 100644 --- a/cranelift/codegen/src/ir/known_symbol.rs +++ b/cranelift/codegen/src/ir/known_symbol.rs @@ -9,6 +9,9 @@ use serde::{Deserialize, Serialize}; pub enum KnownSymbol { /// ELF well-known linker symbol _GLOBAL_OFFSET_TABLE_ ElfGlobalOffsetTable, + /// TLS index symbol for the current thread. + /// Used in COFF/PE file formats. + CoffTlsIndex, } impl fmt::Display for KnownSymbol { @@ -23,6 +26,7 @@ impl FromStr for KnownSymbol { fn from_str(s: &str) -> Result { match s { "ElfGlobalOffsetTable" => Ok(Self::ElfGlobalOffsetTable), + "CoffTlsIndex" => Ok(Self::CoffTlsIndex), _ => Err(()), } } @@ -38,5 +42,6 @@ mod tests { "ElfGlobalOffsetTable".parse(), Ok(KnownSymbol::ElfGlobalOffsetTable) ); + assert_eq!("CoffTlsIndex".parse(), Ok(KnownSymbol::CoffTlsIndex)); } } diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index bc6771d35f..329a1432d9 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -493,6 +493,10 @@ ;; `rax`. (MachOTlsGetAddr (symbol ExternalName)) + ;; A Coff TLS symbol access. Returns address of the TLS symbol in + ;; `rax`. + (CoffTlsGetAddr (symbol ExternalName)) + ;; An unwind pseudoinstruction describing the state of the machine at ;; this program point. (Unwind (inst UnwindInst)) diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index fc02bd7571..64baebd95c 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1,7 +1,7 @@ use crate::binemit::{Addend, Reloc}; use crate::ir::immediates::{Ieee32, Ieee64}; -use crate::ir::LibCall; use crate::ir::TrapCode; +use crate::ir::{KnownSymbol, LibCall}; use crate::isa::x64::encoding::evex::{EvexInstruction, EvexVectorLength}; use crate::isa::x64::encoding::rex::{ emit_simm, emit_std_enc_enc, emit_std_enc_mem, emit_std_reg_mem, emit_std_reg_reg, int_reg_enc, @@ -2952,6 +2952,52 @@ pub(crate) fn emit( sink.put1(0x17); } + Inst::CoffTlsGetAddr { ref symbol } => { + // See: https://gcc.godbolt.org/z/M8or9x6ss + // And: https://github.com/bjorn3/rustc_codegen_cranelift/issues/388#issuecomment-532930282 + + // Emit the following sequence + // movl (%rip), %eax ; IMAGE_REL_AMD64_REL32 _tls_index + // movq %gs:88, %rcx + // movq (%rcx,%rax,8), %rax + // leaq (%rax), %rax ; Reloc: IMAGE_REL_AMD64_SECREL symbol + + // Load TLS index for current thread + // movl (%rip), %eax + sink.put1(0x8b); // mov + sink.put1(0x05); + emit_reloc( + sink, + Reloc::X86PCRel4, + &ExternalName::KnownSymbol(KnownSymbol::CoffTlsIndex), + -4, + ); + sink.put4(0); // offset + + // movq %gs:88, %rcx + // Load the TLS Storage Array pointer + // The gs segment register refers to the base address of the TEB on x64. + // 0x58 is the offset in the TEB for the ThreadLocalStoragePointer member on x64: + sink.put_data(&[ + 0x65, 0x48, // REX.W + 0x8b, // MOV + 0x0c, 0x25, 0x58, // 0x58 - ThreadLocalStoragePointer offset + 0x00, 0x00, 0x00, + ]); + + // movq (%rcx,%rax,8), %rax + // Load the actual TLS entry for this thread. + // Computes ThreadLocalStoragePointer + _tls_index*8 + sink.put_data(&[0x48, 0x8b, 0x04, 0xc1]); + + // leaq (%rax), %rax + sink.put1(0x48); + sink.put1(0x8d); + sink.put1(0x80); + emit_reloc(sink, Reloc::X86SecRel, symbol, 0); + sink.put4(0); // offset + } + Inst::Unwind { ref inst } => { sink.add_unwind(inst.clone()); } diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index 088c0bd15f..0bca3f9f65 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -4688,6 +4688,17 @@ fn test_x64_emit() { "%rax = macho_tls_get_addr User { namespace: 0, index: 0 }", )); + insns.push(( + Inst::CoffTlsGetAddr { + symbol: ExternalName::User { + namespace: 0, + index: 0, + }, + }, + "8B050000000065488B0C2558000000488B04C1488D8000000000", + "%rax = coff_tls_get_addr User { namespace: 0, index: 0 }", + )); + // ======================================================== // Actually run the tests! let mut flag_builder = settings::builder(); diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 4b2a02a02c..83f61f993d 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -116,6 +116,7 @@ impl Inst { | Inst::XmmUninitializedValue { .. } | Inst::ElfTlsGetAddr { .. } | Inst::MachOTlsGetAddr { .. } + | Inst::CoffTlsGetAddr { .. } | Inst::Unwind { .. } | Inst::DummyUse { .. } => smallvec![], @@ -1709,6 +1710,10 @@ impl PrettyPrint for Inst { format!("%rax = macho_tls_get_addr {:?}", symbol) } + Inst::CoffTlsGetAddr { ref symbol } => { + format!("%rax = coff_tls_get_addr {:?}", symbol) + } + Inst::Unwind { inst } => { format!("unwind {:?}", inst) } @@ -2155,6 +2160,17 @@ fn x64_get_operands VReg>(inst: &Inst, collector: &mut OperandCol collector.reg_clobbers(clobbers); } + Inst::CoffTlsGetAddr { .. } => { + // We also use the gs register. But that register is not allocatable by the + // register allocator, so we don't need to mark it as used here. + + // We use %rax to set the address + collector.reg_def(Writable::from_reg(regs::rax())); + + // We use %rcx as a temporary variable to load the _tls_index + collector.reg_def(Writable::from_reg(regs::rcx())); + } + Inst::Unwind { .. } => {} Inst::DummyUse { reg } => { diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index ebf2eca435..c3bd38710a 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -2222,28 +2222,30 @@ fn lower_insn_to_regs>( ctx.emit(Inst::gen_move(dst_hi, src.regs()[1], types::I64)); } - Opcode::TlsValue => match flags.tls_model() { - TlsModel::ElfGd => { - let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - let (name, _, _) = ctx.symbol_value(insn).unwrap(); - let symbol = name.clone(); - ctx.emit(Inst::ElfTlsGetAddr { symbol }); - ctx.emit(Inst::gen_move(dst, regs::rax(), types::I64)); - } - TlsModel::Macho => { - let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - let (name, _, _) = ctx.symbol_value(insn).unwrap(); - let symbol = name.clone(); - ctx.emit(Inst::MachOTlsGetAddr { symbol }); - ctx.emit(Inst::gen_move(dst, regs::rax(), types::I64)); - } - _ => { - todo!( + Opcode::TlsValue => { + let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let (name, _, _) = ctx.symbol_value(insn).unwrap(); + let symbol = name.clone(); + + match flags.tls_model() { + TlsModel::ElfGd => { + ctx.emit(Inst::ElfTlsGetAddr { symbol }); + ctx.emit(Inst::gen_move(dst, regs::rax(), types::I64)); + } + TlsModel::Macho => { + ctx.emit(Inst::MachOTlsGetAddr { symbol }); + ctx.emit(Inst::gen_move(dst, regs::rax(), types::I64)); + } + TlsModel::Coff => { + ctx.emit(Inst::CoffTlsGetAddr { symbol }); + ctx.emit(Inst::gen_move(dst, regs::rax(), types::I64)); + } + _ => todo!( "Unimplemented TLS model in x64 backend: {:?}", flags.tls_model() - ); + ), } - }, + } Opcode::SqmulRoundSat => { // Lane-wise saturating rounding multiplication in Q15 format diff --git a/cranelift/filetests/filetests/isa/x64/tls_coff.clif b/cranelift/filetests/filetests/isa/x64/tls_coff.clif new file mode 100644 index 0000000000..ad05bd5a52 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/tls_coff.clif @@ -0,0 +1,20 @@ +test compile precise-output +set tls_model=coff +target x86_64 + + +function u0:0(i32) -> i64 { +gv0 = symbol colocated tls u1:0 + +block0(v0: i32): + v1 = global_value.i64 gv0 + return v1 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; %rax = coff_tls_get_addr User { namespace: 1, index: 0 } +; movq %rbp, %rsp +; popq %rbp +; ret diff --git a/cranelift/module/src/lib.rs b/cranelift/module/src/lib.rs index 36ed9abd54..5997e14709 100644 --- a/cranelift/module/src/lib.rs +++ b/cranelift/module/src/lib.rs @@ -50,8 +50,8 @@ pub use crate::traps::TrapSite; /// Version number of this crate. pub const VERSION: &str = env!("CARGO_PKG_VERSION"); -/// Default names for `ir::LibCall`s. A function by this name is imported into the object as -/// part of the translation of a `ir::ExternalName::LibCall` variant. +/// Default names for [ir::LibCall]s. A function by this name is imported into the object as +/// part of the translation of a [ir::ExternalName::LibCall] variant. pub fn default_libcall_names() -> Box String + Send + Sync> { Box::new(move |libcall| match libcall { ir::LibCall::Probestack => "__cranelift_probestack".to_owned(), diff --git a/cranelift/object/src/backend.rs b/cranelift/object/src/backend.rs index a619f5b02c..88cca41bbe 100644 --- a/cranelift/object/src/backend.rs +++ b/cranelift/object/src/backend.rs @@ -40,10 +40,10 @@ impl ObjectBuilder { /// Create a new `ObjectBuilder` using the given Cranelift target, that /// can be passed to [`ObjectModule::new`]. /// - /// The `libcall_names` function provides a way to translate `cranelift_codegen`'s `ir::LibCall` + /// The `libcall_names` function provides a way to translate `cranelift_codegen`'s [ir::LibCall] /// enum to symbols. LibCalls are inserted in the IR as part of the legalization for certain /// floating point instructions, and for stack probes. If you don't know what to use for this - /// argument, use `cranelift_module::default_libcall_names()`. + /// argument, use [cranelift_module::default_libcall_names](). pub fn new>>( isa: Box, name: V, @@ -556,9 +556,9 @@ impl ObjectModule { if let Some(symbol) = self.known_symbols.get(known_symbol) { *symbol } else { - let symbol = match known_symbol { - ir::KnownSymbol::ElfGlobalOffsetTable => self.object.add_symbol(Symbol { - name: "_GLOBAL_OFFSET_TABLE_".as_bytes().to_vec(), + let symbol = self.object.add_symbol(match known_symbol { + ir::KnownSymbol::ElfGlobalOffsetTable => Symbol { + name: b"_GLOBAL_OFFSET_TABLE_".to_vec(), value: 0, size: 0, kind: SymbolKind::Data, @@ -566,8 +566,18 @@ impl ObjectModule { weak: false, section: SymbolSection::Undefined, flags: SymbolFlags::None, - }), - }; + }, + ir::KnownSymbol::CoffTlsIndex => Symbol { + name: b"_tls_index".to_vec(), + value: 0, + size: 32, + kind: SymbolKind::Tls, + scope: SymbolScope::Unknown, + weak: false, + section: SymbolSection::Undefined, + flags: SymbolFlags::None, + }, + }); self.known_symbols.insert(*known_symbol, symbol); symbol } @@ -590,6 +600,11 @@ impl ObjectModule { RelocationEncoding::X86Branch, 32, ), + Reloc::X86SecRel => ( + RelocationKind::SectionOffset, + RelocationEncoding::Generic, + 32, + ), Reloc::X86GOTPCRel4 => (RelocationKind::GotRelative, RelocationEncoding::Generic, 32), Reloc::Arm64Call => ( RelocationKind::Relative,