cranelift: Implement TLS on aarch64 Mach-O (Apple Silicon) (#5434)

* Implement TLS on Aarch64 Mach-O

* Add aarch64 macho TLS filetest

* Address review comments

- `Aarch64` instead of `AArch64` in comments
- Remove unnecessary guard in tls_value lowering
- Remove unnecessary regalloc metadata in emission

* Use x1 as temporary register in emission

- Instead of passing in a temporary register to use when emitting
the TLS code, just use `x1`, as it's already in the clobber set.
This also keeps the size of `aarch64::inst::Inst` at 32 bytes.
- Update filetest accordingly

* Update aarch64 mach-o TLS filetest
This commit is contained in:
Nathan Whitaker
2023-03-24 10:54:01 -07:00
committed by GitHub
parent d0570a77ce
commit c3decdf910
7 changed files with 173 additions and 4 deletions

View File

@@ -56,12 +56,20 @@ pub enum Reloc {
/// Mach-O x86_64 32 bit signed PC relative offset to a `__thread_vars` entry. /// Mach-O x86_64 32 bit signed PC relative offset to a `__thread_vars` entry.
MachOX86_64Tlv, MachOX86_64Tlv,
/// AArch64 TLS GD /// Mach-O Aarch64 TLS
/// PC-relative distance to the page of the TLVP slot.
MachOAarch64TlsAdrPage21,
/// Mach-O Aarch64 TLS
/// Offset within page of TLVP slot.
MachOAarch64TlsAdrPageOff12,
/// Aarch64 TLS GD
/// Set an ADRP immediate field to the top 21 bits of the final address. Checks for overflow. /// Set an ADRP immediate field to the top 21 bits of the final address. Checks for overflow.
/// This is equivalent to `R_AARCH64_TLSGD_ADR_PAGE21` in the [aaelf64](https://github.com/ARM-software/abi-aa/blob/2bcab1e3b22d55170c563c3c7940134089176746/aaelf64/aaelf64.rst#relocations-for-thread-local-storage) /// This is equivalent to `R_AARCH64_TLSGD_ADR_PAGE21` in the [aaelf64](https://github.com/ARM-software/abi-aa/blob/2bcab1e3b22d55170c563c3c7940134089176746/aaelf64/aaelf64.rst#relocations-for-thread-local-storage)
Aarch64TlsGdAdrPage21, Aarch64TlsGdAdrPage21,
/// AArch64 TLS GD /// Aarch64 TLS GD
/// Set the add immediate field to the low 12 bits of the final address. Does not check for overflow. /// Set the add immediate field to the low 12 bits of the final address. Does not check for overflow.
/// This is equivalent to `R_AARCH64_TLSGD_ADD_LO12_NC` in the [aaelf64](https://github.com/ARM-software/abi-aa/blob/2bcab1e3b22d55170c563c3c7940134089176746/aaelf64/aaelf64.rst#relocations-for-thread-local-storage) /// This is equivalent to `R_AARCH64_TLSGD_ADD_LO12_NC` in the [aaelf64](https://github.com/ARM-software/abi-aa/blob/2bcab1e3b22d55170c563c3c7940134089176746/aaelf64/aaelf64.rst#relocations-for-thread-local-storage)
Aarch64TlsGdAddLo12Nc, Aarch64TlsGdAddLo12Nc,
@@ -109,6 +117,8 @@ impl fmt::Display for Reloc {
Self::ElfX86_64TlsGd => write!(f, "ElfX86_64TlsGd"), Self::ElfX86_64TlsGd => write!(f, "ElfX86_64TlsGd"),
Self::MachOX86_64Tlv => write!(f, "MachOX86_64Tlv"), Self::MachOX86_64Tlv => write!(f, "MachOX86_64Tlv"),
Self::MachOAarch64TlsAdrPage21 => write!(f, "MachOAarch64TlsAdrPage21"),
Self::MachOAarch64TlsAdrPageOff12 => write!(f, "MachOAarch64TlsAdrPageOff12"),
Self::Aarch64TlsGdAdrPage21 => write!(f, "Aarch64TlsGdAdrPage21"), Self::Aarch64TlsGdAdrPage21 => write!(f, "Aarch64TlsGdAdrPage21"),
Self::Aarch64TlsGdAddLo12Nc => write!(f, "Aarch64TlsGdAddLo12Nc"), Self::Aarch64TlsGdAddLo12Nc => write!(f, "Aarch64TlsGdAddLo12Nc"),
Self::Aarch64AdrGotPage21 => write!(f, "Aarch64AdrGotPage21"), Self::Aarch64AdrGotPage21 => write!(f, "Aarch64AdrGotPage21"),

View File

@@ -927,6 +927,10 @@
(symbol ExternalName) (symbol ExternalName)
(rd WritableReg)) (rd WritableReg))
(MachOTlsGetAddr
(symbol ExternalName)
(rd WritableReg))
;; An unwind pseudo-instruction. ;; An unwind pseudo-instruction.
(Unwind (Unwind
(inst UnwindInst)) (inst UnwindInst))
@@ -3591,6 +3595,12 @@
(_ Unit (emit (MInst.ElfTlsGetAddr name dst)))) (_ Unit (emit (MInst.ElfTlsGetAddr name dst))))
dst)) dst))
(decl macho_tls_get_addr (ExternalName) Reg)
(rule (macho_tls_get_addr name)
(let ((dst WritableReg (temp_writable_reg $I64))
(_ Unit (emit (MInst.MachOTlsGetAddr name dst))))
dst))
;; A tuple of `ProducesFlags` and `IntCC`. ;; A tuple of `ProducesFlags` and `IntCC`.
(type FlagsAndCC (enum (FlagsAndCC (flags ProducesFlags) (type FlagsAndCC (enum (FlagsAndCC (flags ProducesFlags)
(cc IntCC)))) (cc IntCC))))

View File

@@ -3458,6 +3458,54 @@ impl MachInstEmit for Inst {
sink.put4(0xd503201f); sink.put4(0xd503201f);
} }
&Inst::MachOTlsGetAddr { ref symbol, rd } => {
// Each thread local variable gets a descriptor, where the first xword of the descriptor is a pointer
// to a function that takes the descriptor address in x0, and after the function returns x0
// contains the address for the thread local variable
//
// what we want to emit is basically:
//
// adrp x0, <label>@TLVPPAGE ; Load the address of the page of the thread local variable pointer (TLVP)
// ldr x0, [x0, <label>@TLVPPAGEOFF] ; Load the descriptor's address into x0
// ldr x1, [x0] ; Load the function pointer (the first part of the descriptor)
// blr x1 ; Call the function pointer with the descriptor address in x0
// ; x0 now contains the TLV address
let rd = allocs.next_writable(rd);
assert_eq!(xreg(0), rd.to_reg());
let rtmp = writable_xreg(1);
// adrp x0, <label>@TLVPPAGE
sink.add_reloc(Reloc::MachOAarch64TlsAdrPage21, symbol, 0);
sink.put4(0x90000000);
// ldr x0, [x0, <label>@TLVPPAGEOFF]
sink.add_reloc(Reloc::MachOAarch64TlsAdrPageOff12, symbol, 0);
sink.put4(0xf9400000);
// load [x0] into temp register
Inst::ULoad64 {
rd: rtmp,
mem: AMode::reg(rd.to_reg()),
flags: MemFlags::trusted(),
}
.emit(&[], sink, emit_info, state);
// call function pointer in temp register
Inst::CallInd {
info: crate::isa::Box::new(CallIndInfo {
rn: rtmp.to_reg(),
uses: smallvec![],
defs: smallvec![],
clobbers: PRegSet::empty(),
opcode: Opcode::CallIndirect,
caller_callconv: CallConv::AppleAarch64,
callee_callconv: CallConv::AppleAarch64,
}),
}
.emit(&[], sink, emit_info, state);
}
&Inst::Unwind { ref inst } => { &Inst::Unwind { ref inst } => {
sink.add_unwind(inst.clone()); sink.add_unwind(inst.clone());
} }

View File

@@ -907,6 +907,13 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
clobbers.remove(regs::xreg_preg(0)); clobbers.remove(regs::xreg_preg(0));
collector.reg_clobbers(clobbers); collector.reg_clobbers(clobbers);
} }
&Inst::MachOTlsGetAddr { rd, .. } => {
collector.reg_fixed_def(rd, regs::xreg(0));
let mut clobbers =
AArch64MachineDeps::get_regs_clobbered_by_call(CallConv::AppleAarch64);
clobbers.remove(regs::xreg_preg(0));
collector.reg_clobbers(clobbers);
}
&Inst::Unwind { .. } => {} &Inst::Unwind { .. } => {}
&Inst::EmitIsland { .. } => {} &Inst::EmitIsland { .. } => {}
&Inst::DummyUse { reg } => { &Inst::DummyUse { reg } => {
@@ -2701,6 +2708,10 @@ impl Inst {
let rd = pretty_print_reg(rd.to_reg(), allocs); let rd = pretty_print_reg(rd.to_reg(), allocs);
format!("elf_tls_get_addr {}, {}", rd, symbol.display(None)) format!("elf_tls_get_addr {}, {}", rd, symbol.display(None))
} }
&Inst::MachOTlsGetAddr { ref symbol, rd } => {
let rd = pretty_print_reg(rd.to_reg(), allocs);
format!("macho_tls_get_addr {}, {}", rd, symbol.display(None))
}
&Inst::Unwind { ref inst } => { &Inst::Unwind { ref inst } => {
format!("unwind {:?}", inst) format!("unwind {:?}", inst)
} }

View File

@@ -2574,10 +2574,12 @@
;;; Rules for `tls_value` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Rules for `tls_value` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (tls_value (symbol_value_data name _ _))) (rule (lower (has_type (tls_model (TlsModel.ElfGd)) (tls_value (symbol_value_data name _ _))))
(if (tls_model_is_elf_gd))
(elf_tls_get_addr name)) (elf_tls_get_addr name))
(rule (lower (has_type (tls_model (TlsModel.Macho)) (tls_value (symbol_value_data name _ _))))
(macho_tls_get_addr name))
;;; Rules for `fcvt_low_from_sint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Rules for `fcvt_low_from_sint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $F64X2 (fcvt_low_from_sint val))) (rule (lower (has_type $F64X2 (fcvt_low_from_sint val)))

View File

@@ -0,0 +1,58 @@
test compile precise-output
set tls_model=macho
target aarch64
function u0:0(i32) -> i32, i64 {
gv0 = symbol colocated tls u1:0
block0(v0: i32):
v1 = global_value.i64 gv0
return v0, v1
}
; VCode:
; stp fp, lr, [sp, #-16]!
; mov fp, sp
; str x24, [sp, #-16]!
; stp d14, d15, [sp, #-16]!
; stp d12, d13, [sp, #-16]!
; stp d10, d11, [sp, #-16]!
; stp d8, d9, [sp, #-16]!
; block0:
; mov x24, x0
; macho_tls_get_addr x0, userextname0
; mov x1, x0
; mov x0, x24
; ldp d8, d9, [sp], #16
; ldp d10, d11, [sp], #16
; ldp d12, d13, [sp], #16
; ldp d14, d15, [sp], #16
; ldr x24, [sp], #16
; ldp fp, lr, [sp], #16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; stp x29, x30, [sp, #-0x10]!
; mov x29, sp
; str x24, [sp, #-0x10]!
; stp d14, d15, [sp, #-0x10]!
; stp d12, d13, [sp, #-0x10]!
; stp d10, d11, [sp, #-0x10]!
; stp d8, d9, [sp, #-0x10]!
; block1: ; offset 0x1c
; mov x24, x0
; adrp x0, #0 ; reloc_external MachOAarch64TlsAdrPage21 u1:0 0
; ldr x0, [x0] ; reloc_external MachOAarch64TlsAdrPageOff12 u1:0 0
; ldr x1, [x0]
; blr x1
; mov x1, x0
; mov x0, x24
; ldp d8, d9, [sp], #0x10
; ldp d10, d11, [sp], #0x10
; ldp d12, d13, [sp], #0x10
; ldp d14, d15, [sp], #0x10
; ldr x24, [sp], #0x10
; ldp x29, x30, [sp], #0x10
; ret

View File

@@ -654,6 +654,36 @@ impl ObjectModule {
32, 32,
) )
} }
Reloc::MachOAarch64TlsAdrPage21 => {
assert_eq!(
self.object.format(),
object::BinaryFormat::MachO,
"MachOAarch64TlsAdrPage21 is not supported for this file format"
);
(
RelocationKind::MachO {
value: object::macho::ARM64_RELOC_TLVP_LOAD_PAGE21,
relative: true,
},
RelocationEncoding::Generic,
21,
)
}
Reloc::MachOAarch64TlsAdrPageOff12 => {
assert_eq!(
self.object.format(),
object::BinaryFormat::MachO,
"MachOAarch64TlsAdrPageOff12 is not supported for this file format"
);
(
RelocationKind::MachO {
value: object::macho::ARM64_RELOC_TLVP_LOAD_PAGEOFF12,
relative: false,
},
RelocationEncoding::Generic,
12,
)
}
Reloc::Aarch64TlsGdAdrPage21 => { Reloc::Aarch64TlsGdAdrPage21 => {
assert_eq!( assert_eq!(
self.object.format(), self.object.format(),