diff --git a/cranelift/codegen/src/isa/aarch64/mod.rs b/cranelift/codegen/src/isa/aarch64/mod.rs index 9e6918d011..0498875f2e 100644 --- a/cranelift/codegen/src/isa/aarch64/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/mod.rs @@ -95,6 +95,7 @@ impl TargetIsa for AArch64Backend { dynamic_stackslot_offsets, bb_starts: emit_result.bb_offsets, bb_edges: emit_result.bb_edges, + alignment: emit_result.alignment, }) } @@ -179,6 +180,12 @@ impl TargetIsa for AArch64Backend { fn map_regalloc_reg_to_dwarf(&self, reg: Reg) -> Result { inst::unwind::systemv::map_reg(reg).map(|reg| reg.0) } + + fn function_alignment(&self) -> u32 { + // We use 32-byte alignment for performance reasons, but for correctness we would only need + // 4-byte alignment. + 32 + } } impl fmt::Display for AArch64Backend { diff --git a/cranelift/codegen/src/isa/mod.rs b/cranelift/codegen/src/isa/mod.rs index b9496a14a4..98364bb22f 100644 --- a/cranelift/codegen/src/isa/mod.rs +++ b/cranelift/codegen/src/isa/mod.rs @@ -277,6 +277,9 @@ pub trait TargetIsa: fmt::Display + Send + Sync { /// will be "labeled" or might have calls between them, typically the number /// of defined functions in the object file. fn text_section_builder(&self, num_labeled_funcs: u32) -> Box; + + /// The function alignment required by this ISA. + fn function_alignment(&self) -> u32; } /// Methods implemented for free for target ISA! diff --git a/cranelift/codegen/src/isa/s390x/mod.rs b/cranelift/codegen/src/isa/s390x/mod.rs index 25886a8a0a..877a7374f0 100644 --- a/cranelift/codegen/src/isa/s390x/mod.rs +++ b/cranelift/codegen/src/isa/s390x/mod.rs @@ -93,6 +93,7 @@ impl TargetIsa for S390xBackend { dynamic_stackslot_offsets, bb_starts: emit_result.bb_offsets, bb_edges: emit_result.bb_edges, + alignment: emit_result.alignment, }) } @@ -161,6 +162,10 @@ impl TargetIsa for S390xBackend { fn text_section_builder(&self, num_funcs: u32) -> Box { Box::new(MachTextSectionBuilder::::new(num_funcs)) } + + fn function_alignment(&self) -> u32 { + 4 + } } impl fmt::Display for S390xBackend { diff --git a/cranelift/codegen/src/isa/x64/mod.rs b/cranelift/codegen/src/isa/x64/mod.rs index b57b6f7dac..ddb3523578 100644 --- a/cranelift/codegen/src/isa/x64/mod.rs +++ b/cranelift/codegen/src/isa/x64/mod.rs @@ -88,6 +88,7 @@ impl TargetIsa for X64Backend { dynamic_stackslot_offsets, bb_starts: emit_result.bb_offsets, bb_edges: emit_result.bb_edges, + alignment: emit_result.alignment, }) } @@ -158,6 +159,12 @@ impl TargetIsa for X64Backend { fn text_section_builder(&self, num_funcs: u32) -> Box { Box::new(MachTextSectionBuilder::::new(num_funcs)) } + + /// Align functions on x86 to 16 bytes, ensuring that rip-relative loads to SSE registers are + /// always from aligned memory. + fn function_alignment(&self) -> u32 { + 16 + } } impl fmt::Display for X64Backend { diff --git a/cranelift/codegen/src/machinst/buffer.rs b/cranelift/codegen/src/machinst/buffer.rs index 226feaf899..c4eb5dd2da 100644 --- a/cranelift/codegen/src/machinst/buffer.rs +++ b/cranelift/codegen/src/machinst/buffer.rs @@ -468,7 +468,11 @@ impl MachBuffer { /// Align up to the given alignment. pub fn align_to(&mut self, align_to: CodeOffset) { trace!("MachBuffer: align to {}", align_to); - assert!(align_to.is_power_of_two()); + assert!( + align_to.is_power_of_two(), + "{} is not a power of two", + align_to + ); while self.cur_offset() & (align_to - 1) != 0 { self.put1(0); } @@ -1620,7 +1624,7 @@ impl MachTextSectionBuilder { } impl TextSectionBuilder for MachTextSectionBuilder { - fn append(&mut self, named: bool, func: &[u8], align: Option) -> u64 { + fn append(&mut self, named: bool, func: &[u8], align: u32) -> u64 { // Conditionally emit an island if it's necessary to resolve jumps // between functions which are too far away. let size = func.len() as u32; @@ -1628,7 +1632,7 @@ impl TextSectionBuilder for MachTextSectionBuilder { self.buf.emit_island_maybe_forced(self.force_veneers, size); } - self.buf.align_to(align.unwrap_or(I::LabelUse::ALIGN)); + self.buf.align_to(align); let pos = self.buf.cur_offset(); if named { self.buf diff --git a/cranelift/codegen/src/machinst/mod.rs b/cranelift/codegen/src/machinst/mod.rs index 00a1741982..a2aa97064f 100644 --- a/cranelift/codegen/src/machinst/mod.rs +++ b/cranelift/codegen/src/machinst/mod.rs @@ -300,6 +300,9 @@ pub struct CompiledCodeBase { /// This info is generated only if the `machine_code_cfg_info` /// flag is set. pub bb_edges: Vec<(CodeOffset, CodeOffset)>, + /// Minimum alignment for the function, derived from the use of any + /// pc-relative loads. + pub alignment: u32, } impl CompiledCodeStencil { @@ -314,6 +317,7 @@ impl CompiledCodeStencil { dynamic_stackslot_offsets: self.dynamic_stackslot_offsets, bb_starts: self.bb_starts, bb_edges: self.bb_edges, + alignment: self.alignment, } } } @@ -355,7 +359,7 @@ pub trait TextSectionBuilder { /// /// This function returns the offset at which the data was placed in the /// text section. - fn append(&mut self, labeled: bool, data: &[u8], align: Option) -> u64; + fn append(&mut self, labeled: bool, data: &[u8], align: u32) -> u64; /// Attempts to resolve a relocation for this function. /// diff --git a/cranelift/codegen/src/machinst/vcode.rs b/cranelift/codegen/src/machinst/vcode.rs index 336284503b..bd458227b2 100644 --- a/cranelift/codegen/src/machinst/vcode.rs +++ b/cranelift/codegen/src/machinst/vcode.rs @@ -221,6 +221,9 @@ pub struct EmitResult { /// Stack frame size. pub frame_size: u32, + + /// The alignment requirement for pc-relative loads. + pub alignment: u32, } /// A builder for a VCode function body. @@ -1058,7 +1061,10 @@ impl VCode { } // Emit the constants used by the function. + let mut alignment = 1; for (constant, data) in self.constants.iter() { + alignment = data.alignment().max(alignment); + let label = buffer.get_label_for_constant(constant); buffer.defer_constant(label, data.alignment(), data.as_slice(), u32::max_value()); } @@ -1101,6 +1107,7 @@ impl VCode { dynamic_stackslot_offsets: self.abi.dynamic_stackslot_offsets().clone(), value_labels_ranges, frame_size, + alignment, } } diff --git a/cranelift/jit/src/backend.rs b/cranelift/jit/src/backend.rs index a24d723a40..796a4e00c2 100644 --- a/cranelift/jit/src/backend.rs +++ b/cranelift/jit/src/backend.rs @@ -21,7 +21,6 @@ use std::ptr::NonNull; use std::sync::atomic::{AtomicPtr, Ordering}; use target_lexicon::PointerWidth; -const EXECUTABLE_DATA_ALIGNMENT: u64 = 0x10; const WRITABLE_DATA_ALIGNMENT: u64 = 0x8; const READONLY_DATA_ALIGNMENT: u64 = 0x1; @@ -234,7 +233,12 @@ impl JITModule { let plt_entry = self .memory .code - .allocate(std::mem::size_of::<[u8; 16]>(), EXECUTABLE_DATA_ALIGNMENT) + .allocate( + std::mem::size_of::<[u8; 16]>(), + self.isa + .symbol_alignment() + .max(self.isa.function_alignment() as u64), + ) .unwrap() .cast::<[u8; 16]>(); unsafe { @@ -680,16 +684,20 @@ impl Module for JITModule { } // work around borrow-checker to allow reuse of ctx below - let _ = ctx.compile(self.isa())?; + let res = ctx.compile(self.isa())?; + let alignment = res.alignment as u64; let compiled_code = ctx.compiled_code().unwrap(); let code_size = compiled_code.code_info().total_size; let size = code_size as usize; + let align = alignment + .max(self.isa.function_alignment() as u64) + .max(self.isa.symbol_alignment()); let ptr = self .memory .code - .allocate(size, EXECUTABLE_DATA_ALIGNMENT) + .allocate(size, align) .expect("TODO: handle OOM etc."); { @@ -745,6 +753,7 @@ impl Module for JITModule { &mut self, id: FuncId, func: &ir::Function, + alignment: u64, bytes: &[u8], relocs: &[MachReloc], ) -> ModuleResult { @@ -764,10 +773,13 @@ impl Module for JITModule { } let size = bytes.len(); + let align = alignment + .max(self.isa.function_alignment() as u64) + .max(self.isa.symbol_alignment()); let ptr = self .memory .code - .allocate(size, EXECUTABLE_DATA_ALIGNMENT) + .allocate(size, align) .expect("TODO: handle OOM etc."); unsafe { diff --git a/cranelift/module/src/module.rs b/cranelift/module/src/module.rs index 187fed3184..d38f96ec99 100644 --- a/cranelift/module/src/module.rs +++ b/cranelift/module/src/module.rs @@ -640,6 +640,7 @@ pub trait Module { &mut self, func_id: FuncId, func: &ir::Function, + alignment: u64, bytes: &[u8], relocs: &[MachReloc], ) -> ModuleResult; @@ -736,10 +737,11 @@ impl Module for &mut M { &mut self, func_id: FuncId, func: &ir::Function, + alignment: u64, bytes: &[u8], relocs: &[MachReloc], ) -> ModuleResult { - (**self).define_function_bytes(func_id, func, bytes, relocs) + (**self).define_function_bytes(func_id, func, alignment, bytes, relocs) } fn define_data(&mut self, data: DataId, data_ctx: &DataContext) -> ModuleResult<()> { diff --git a/cranelift/object/src/backend.rs b/cranelift/object/src/backend.rs index 845b279986..8f44b2cc28 100644 --- a/cranelift/object/src/backend.rs +++ b/cranelift/object/src/backend.rs @@ -314,11 +314,13 @@ impl Module for ObjectModule { info!("defining function {}: {}", func_id, ctx.func.display()); let mut code: Vec = Vec::new(); - ctx.compile_and_emit(self.isa(), &mut code)?; + let res = ctx.compile_and_emit(self.isa(), &mut code)?; + let alignment = res.alignment as u64; self.define_function_bytes( func_id, &ctx.func, + alignment, &code, ctx.compiled_code().unwrap().buffer.relocs(), ) @@ -328,6 +330,7 @@ impl Module for ObjectModule { &mut self, func_id: FuncId, func: &ir::Function, + alignment: u64, bytes: &[u8], relocs: &[MachReloc], ) -> ModuleResult { @@ -348,7 +351,10 @@ impl Module for ObjectModule { } *defined = true; - let align = std::cmp::max(self.function_alignment, self.isa.symbol_alignment()); + let align = self + .function_alignment + .max(self.isa.symbol_alignment()) + .max(alignment); let (section, offset) = if self.per_function_section { let symbol_name = self.object.symbol(symbol).name.clone(); let (section, offset) = diff --git a/crates/cranelift/src/compiler.rs b/crates/cranelift/src/compiler.rs index 3d6829f1cc..198b0e6303 100644 --- a/crates/cranelift/src/compiler.rs +++ b/crates/cranelift/src/compiler.rs @@ -271,7 +271,8 @@ impl wasmtime_environ::Compiler for Compiler { &mut func_env, )?; - let (_, code_buf) = compile_maybe_cached(&mut context, isa, cache_ctx.as_mut())?; + let (code, code_buf) = compile_maybe_cached(&mut context, isa, cache_ctx.as_mut())?; + let alignment = code.alignment; let compiled_code = context.compiled_code().unwrap(); let func_relocs = compiled_code @@ -333,6 +334,7 @@ impl wasmtime_environ::Compiler for Compiler { stack_maps, start: 0, length, + alignment, }, address_map: address_transform, })) diff --git a/crates/cranelift/src/obj.rs b/crates/cranelift/src/obj.rs index 58bb4c6eca..28d7094980 100644 --- a/crates/cranelift/src/obj.rs +++ b/crates/cranelift/src/obj.rs @@ -95,7 +95,11 @@ impl<'a> ModuleTextBuilder<'a> { func: &'a CompiledFunction, ) -> (SymbolId, Range) { let body_len = func.body.len() as u64; - let off = self.text.append(labeled, &func.body, None); + let off = self.text.append( + labeled, + &func.body, + self.isa.function_alignment().max(func.info.alignment), + ); let symbol_id = self.obj.add_symbol(Symbol { name, @@ -198,7 +202,7 @@ impl<'a> ModuleTextBuilder<'a> { if padding == 0 { return; } - self.text.append(false, &vec![0; padding], Some(1)); + self.text.append(false, &vec![0; padding], 1); } /// Indicates that the text section has been written completely and this diff --git a/crates/environ/src/compilation.rs b/crates/environ/src/compilation.rs index 3322ce2003..9fc56e75fa 100644 --- a/crates/environ/src/compilation.rs +++ b/crates/environ/src/compilation.rs @@ -28,6 +28,9 @@ pub struct FunctionInfo { pub start: u64, /// The size of the compiled function, in bytes. pub length: u32, + + /// The alignment requirements of this function, in bytes. + pub alignment: u32, } /// Information about a compiled trampoline which the host can call to enter