diff --git a/cranelift/filetests/isa/x86/stack-addr64.cton b/cranelift/filetests/isa/x86/stack-addr64.cton new file mode 100644 index 0000000000..ded05221db --- /dev/null +++ b/cranelift/filetests/isa/x86/stack-addr64.cton @@ -0,0 +1,45 @@ +; binary emission of stack address instructions on x86-64. +test binemit +set opt_level=fastest +target x86_64 haswell + +; The binary encodings can be verified with the command: +; +; sed -ne 's/^ *; asm: *//p' filetests/isa/x86/stack-addr64.cton | llvm-mc -show-encoding -triple=x86_64 +; + +function %stack_addr() { + ss0 = incoming_arg 8, offset 0 + ss1 = incoming_arg 1024, offset -1024 + ss2 = incoming_arg 1024, offset -2048 + ss3 = incoming_arg 8, offset -2056 + ss4 = explicit_slot 8, offset 0 + ss5 = explicit_slot 8, offset 1024 + +ebb0: +[-,%rcx] v0 = stack_addr.i64 ss0 ; bin: 48 8d 8c 24 00000808 +[-,%rcx] v1 = stack_addr.i64 ss1 ; bin: 48 8d 8c 24 00000408 +[-,%rcx] v2 = stack_addr.i64 ss2 ; bin: 48 8d 8c 24 00000008 +[-,%rcx] v3 = stack_addr.i64 ss3 ; bin: 48 8d 8c 24 00000000 +[-,%rcx] v4 = stack_addr.i64 ss4 ; bin: 48 8d 8c 24 00000808 +[-,%rcx] v5 = stack_addr.i64 ss5 ; bin: 48 8d 8c 24 00000c08 + +[-,%rcx] v20 = stack_addr.i64 ss4+1 ; bin: 48 8d 8c 24 00000809 +[-,%rcx] v21 = stack_addr.i64 ss4+2 ; bin: 48 8d 8c 24 0000080a +[-,%rcx] v22 = stack_addr.i64 ss4+2048 ; bin: 48 8d 8c 24 00001008 +[-,%rcx] v23 = stack_addr.i64 ss4-4096 ; bin: 48 8d 8c 24 fffff808 + +[-,%r8] v50 = stack_addr.i64 ss0 ; bin: 4c 8d 84 24 00000808 +[-,%r8] v51 = stack_addr.i64 ss1 ; bin: 4c 8d 84 24 00000408 +[-,%r8] v52 = stack_addr.i64 ss2 ; bin: 4c 8d 84 24 00000008 +[-,%r8] v53 = stack_addr.i64 ss3 ; bin: 4c 8d 84 24 00000000 +[-,%r8] v54 = stack_addr.i64 ss4 ; bin: 4c 8d 84 24 00000808 +[-,%r8] v55 = stack_addr.i64 ss5 ; bin: 4c 8d 84 24 00000c08 + +[-,%r8] v70 = stack_addr.i64 ss4+1 ; bin: 4c 8d 84 24 00000809 +[-,%r8] v71 = stack_addr.i64 ss4+2 ; bin: 4c 8d 84 24 0000080a +[-,%r8] v72 = stack_addr.i64 ss4+2048 ; bin: 4c 8d 84 24 00001008 +[-,%r8] v73 = stack_addr.i64 ss4-4096 ; bin: 4c 8d 84 24 fffff808 + + return +} diff --git a/cranelift/filetests/isa/x86/stack-load-store64.cton b/cranelift/filetests/isa/x86/stack-load-store64.cton new file mode 100644 index 0000000000..c1854e623a --- /dev/null +++ b/cranelift/filetests/isa/x86/stack-load-store64.cton @@ -0,0 +1,21 @@ +; legalization of stack load and store instructions on x86-64. +test legalizer +set opt_level=fastest +target x86_64 haswell + +function %stack_load_and_store() { + ss0 = explicit_slot 8, offset 0 + +ebb0: + v0 = stack_load.i64 ss0 + +; check: v1 = stack_addr.i64 ss0 +; check: v0 = load.i64 notrap aligned v1 + + stack_store.i64 v0, ss0 + +; check: v2 = stack_addr.i64 ss0 +; check: store notrap aligned v0, v2 + + return +} diff --git a/lib/codegen/meta/base/legalize.py b/lib/codegen/meta/base/legalize.py index de196cc645..f575418f8d 100644 --- a/lib/codegen/meta/base/legalize.py +++ b/lib/codegen/meta/base/legalize.py @@ -80,6 +80,10 @@ expand.custom_legalize(insts.select, 'expand_select') expand.custom_legalize(insts.f32const, 'expand_fconst') expand.custom_legalize(insts.f64const, 'expand_fconst') +# Custom expansions for stack memory accesses. +expand.custom_legalize(insts.stack_load, 'expand_stack_load') +expand.custom_legalize(insts.stack_store, 'expand_stack_store') + x = Var('x') y = Var('y') a = Var('a') diff --git a/lib/codegen/meta/isa/x86/encodings.py b/lib/codegen/meta/isa/x86/encodings.py index 62dbf1dcf7..28e2f73740 100644 --- a/lib/codegen/meta/isa/x86/encodings.py +++ b/lib/codegen/meta/isa/x86/encodings.py @@ -436,6 +436,15 @@ X86_64.enc(base.globalsym_addr.i64, *r.pcrel_gvaddr8.rex(0x8d, w=1), X86_64.enc(base.globalsym_addr.i64, *r.got_gvaddr8.rex(0x8b, w=1), isap=is_pic) +# +# Stack addresses. +# +# TODO: Add encoding rules for stack_load and stack_store, so that they +# don't get legalized to stack_addr + load/store. +# +X86_32.enc(base.stack_addr.i32, *r.spaddr4_id(0x8d)) +X86_64.enc(base.stack_addr.i64, *r.spaddr8_id.rex(0x8d, w=1)) + # # Call/return # diff --git a/lib/codegen/meta/isa/x86/recipes.py b/lib/codegen/meta/isa/x86/recipes.py index 2d6ac898a4..75c666a8af 100644 --- a/lib/codegen/meta/isa/x86/recipes.py +++ b/lib/codegen/meta/isa/x86/recipes.py @@ -17,6 +17,7 @@ from base.formats import Jump, Branch, BranchInt, BranchFloat from base.formats import Ternary, FuncAddr, UnaryGlobalValue from base.formats import RegMove, RegSpill, RegFill, CopySpecial from base.formats import LoadComplex, StoreComplex +from base.formats import StackLoad from .registers import GPR, ABCD, FPR, GPR_DEREF_SAFE, GPR_ZERO_DEREF_SAFE from .registers import GPR8, FPR8, GPR8_DEREF_SAFE, GPR8_ZERO_DEREF_SAFE, FLAG from .registers import StackGPR32, StackFPR32 @@ -751,6 +752,36 @@ got_gvaddr8 = TailRecipe( sink.put4(0); ''') +# +# Stack addresses. +# +# TODO: Alternative forms for 8-bit immediates, when applicable. +# + +spaddr4_id = TailRecipe( + 'spaddr4_id', StackLoad, size=6, ins=(), outs=GPR, + emit=''' + let sp = StackRef::sp(stack_slot, &func.stack_slots); + let base = stk_base(sp.base); + PUT_OP(bits, rex2(out_reg0, base), sink); + modrm_sib_disp8(out_reg0, sink); + sib_noindex(base, sink); + let imm : i32 = offset.into(); + sink.put4(sp.offset.checked_add(imm).unwrap() as u32); + ''') + +spaddr8_id = TailRecipe( + 'spaddr8_id', StackLoad, size=6, ins=(), outs=GPR, + emit=''' + let sp = StackRef::sp(stack_slot, &func.stack_slots); + let base = stk_base(sp.base); + PUT_OP(bits, rex2(base, out_reg0), sink); + modrm_sib_disp32(out_reg0, sink); + sib_noindex(base, sink); + let imm : i32 = offset.into(); + sink.put4(sp.offset.checked_add(imm).unwrap() as u32); + ''') + # # Store recipes. diff --git a/lib/codegen/src/legalizer/mod.rs b/lib/codegen/src/legalizer/mod.rs index f099ca7447..f6ca7efeec 100644 --- a/lib/codegen/src/legalizer/mod.rs +++ b/lib/codegen/src/legalizer/mod.rs @@ -16,7 +16,7 @@ use bitset::BitSet; use cursor::{Cursor, FuncCursor}; use flowgraph::ControlFlowGraph; -use ir::{self, InstBuilder}; +use ir::{self, InstBuilder, MemFlags}; use isa::TargetIsa; use timing; @@ -269,3 +269,71 @@ fn expand_fconst( }; pos.func.dfg.replace(inst).bitcast(ty, ival); } + +/// Expand illegal `stack_load` instructions. +fn expand_stack_load( + inst: ir::Inst, + func: &mut ir::Function, + _cfg: &mut ControlFlowGraph, + isa: &TargetIsa, +) { + let ty = func.dfg.value_type(func.dfg.first_result(inst)); + let addr_ty = isa.pointer_type(); + + let mut pos = FuncCursor::new(func).at_inst(inst); + pos.use_srcloc(inst); + + let (stack_slot, offset) = match pos.func.dfg[inst] { + ir::InstructionData::StackLoad { + opcode: _opcode, + stack_slot, + offset, + } => (stack_slot, offset), + _ => panic!( + "Expected stack_load: {}", + pos.func.dfg.display_inst(inst, None) + ), + }; + + let addr = pos.ins().stack_addr(addr_ty, stack_slot, offset); + + let mut mflags = MemFlags::new(); + // Stack slots are required to be accessible and aligned. + mflags.set_notrap(); + mflags.set_aligned(); + pos.func.dfg.replace(inst).load(ty, mflags, addr, 0); +} + +/// Expand illegal `stack_store` instructions. +fn expand_stack_store( + inst: ir::Inst, + func: &mut ir::Function, + _cfg: &mut ControlFlowGraph, + isa: &TargetIsa, +) { + let addr_ty = isa.pointer_type(); + + let mut pos = FuncCursor::new(func).at_inst(inst); + pos.use_srcloc(inst); + + let (val, stack_slot, offset) = match pos.func.dfg[inst] { + ir::InstructionData::StackStore { + opcode: _opcode, + arg, + stack_slot, + offset, + } => (arg, stack_slot, offset), + _ => panic!( + "Expected stack_store: {}", + pos.func.dfg.display_inst(inst, None) + ), + }; + + let addr = pos.ins().stack_addr(addr_ty, stack_slot, offset); + + let mut mflags = MemFlags::new(); + // Stack slots are required to be accessible and aligned. + mflags.set_notrap(); + mflags.set_aligned(); + pos.func.dfg.replace(inst).store(mflags, val, addr, 0); +}