Add spill/fill encodings for Intel ISAs.

To begin with, these are catch-all encodings with a SIB byte and a
32-bit displacement, so they can access any stack slot via both the
stack pointer and the frame pointer.

In the future, we will add encodings for 8-bit displacements as well as
EBP-relative references without a SIB byte.
This commit is contained in:
Jakob Stoklund Olesen
2017-09-22 15:35:11 -07:00
parent 76eb7df9f0
commit 29dfcf5dfb
8 changed files with 217 additions and 6 deletions

View File

@@ -8,6 +8,11 @@ isa intel has_sse2
;
function %F32() {
ss0 = incoming_arg 8, offset 0
ss1 = incoming_arg 1024, offset -1024
ss2 = incoming_arg 1024, offset -2048
ss3 = incoming_arg 8, offset -2056
ebb0:
[-,%rcx] v0 = iconst.i32 1
[-,%rsi] v1 = iconst.i32 2
@@ -105,10 +110,27 @@ ebb0:
; asm: movd %xmm2, -10000(%esi)
[-] store.f32 v101, v1-10000 ; bin: 66 0f 7e 96 ffffd8f0
; Spill / Fill.
; asm: movd %xmm5, 1032(%esp)
[-,ss1] v200 = spill v100 ; bin: 66 0f 7e ac 24 00000408
; asm: movd %xmm2, 1032(%esp)
[-,ss1] v201 = spill v101 ; bin: 66 0f 7e 94 24 00000408
; asm: movd 1032(%esp), %xmm5
[-,%xmm5] v210 = fill v200 ; bin: 66 0f 6e ac 24 00000408
; asm: movd 1032(%esp), %xmm2
[-,%xmm2] v211 = fill v201 ; bin: 66 0f 6e 94 24 00000408
return
}
function %F64() {
ss0 = incoming_arg 8, offset 0
ss1 = incoming_arg 1024, offset -1024
ss2 = incoming_arg 1024, offset -2048
ss3 = incoming_arg 8, offset -2056
ebb0:
[-,%rcx] v0 = iconst.i32 1
[-,%rsi] v1 = iconst.i32 2
@@ -198,5 +220,17 @@ ebb0:
; asm: movq %xmm2, -10000(%esi)
[-] store.f64 v101, v1-10000 ; bin: 66 0f d6 96 ffffd8f0
; Spill / Fill.
; asm: movq %xmm5, 1032(%esp)
[-,ss1] v200 = spill v100 ; bin: 66 0f d6 ac 24 00000408
; asm: movq %xmm2, 1032(%esp)
[-,ss1] v201 = spill v101 ; bin: 66 0f d6 94 24 00000408
; asm: movq 1032(%esp), %xmm5
[-,%xmm5] v210 = fill v200 ; bin: f3 0f 7e ac 24 00000408
; asm: movq 1032(%esp), %xmm2
[-,%xmm2] v211 = fill v201 ; bin: f3 0f 7e 94 24 00000408
return
}

View File

@@ -11,6 +11,11 @@ function %I32() {
fn0 = function %foo()
sig0 = ()
ss0 = incoming_arg 8, offset 0
ss1 = incoming_arg 1024, offset -1024
ss2 = incoming_arg 1024, offset -2048
ss3 = incoming_arg 8, offset -2056
ebb0:
; asm: movl $1, %ecx
[-,%rcx] v1 = iconst.i32 1 ; bin: b9 00000001
@@ -346,6 +351,18 @@ ebb0:
; asm: call *%esi
call_indirect sig0, v401() ; bin: ff d6
; Spill / Fill.
; asm: movl %ecx, 1032(%esp)
[-,ss1] v500 = spill v1 ; bin: 89 8c 24 00000408
; asm: movl %esi, 1032(%esp)
[-,ss1] v501 = spill v2 ; bin: 89 b4 24 00000408
; asm: movl 1032(%esp), %ecx
[-,%rcx] v510 = fill v500 ; bin: 8b 8c 24 00000408
; asm: movl 1032(%esp), %esi
[-,%rsi] v511 = fill v501 ; bin: 8b b4 24 00000408
; asm: testl %ecx, %ecx
; asm: je ebb1
brz v1, ebb1 ; bin: 85 c9 74 0e

View File

@@ -1,6 +1,7 @@
; Binary emission of 64-bit floating point code.
test binemit
set is_64bit
set is_compressed
isa intel has_sse2
; The binary encodings can be verified with the command:
@@ -9,6 +10,11 @@ isa intel has_sse2
;
function %F32() {
ss0 = incoming_arg 8, offset 0
ss1 = incoming_arg 1024, offset -1024
ss2 = incoming_arg 1024, offset -2048
ss3 = incoming_arg 8, offset -2056
ebb0:
[-,%r11] v0 = iconst.i32 1
[-,%rsi] v1 = iconst.i32 2
@@ -36,7 +42,7 @@ ebb0:
[-,%xmm10] v17 = bitcast.f32 v1 ; bin: 66 44 0f 6e d6
; asm: movd %xmm5, %ecx
[-,%rcx] v18 = bitcast.i32 v10 ; bin: 66 40 0f 7e e9
[-,%rcx] v18 = bitcast.i32 v10 ; bin: 66 0f 7e e9
; asm: movd %xmm10, %esi
[-,%rsi] v19 = bitcast.i32 v11 ; bin: 66 44 0f 7e d6
@@ -113,10 +119,27 @@ ebb0:
; asm: movd %xmm10, -10000(%rax)
[-] store.f32 v101, v2-10000 ; bin: 66 44 0f 7e 90 ffffd8f0
; Spill / Fill.
; asm: movd %xmm5, 1032(%rsp)
[-,ss1] v200 = spill v100 ; bin: 66 0f 7e ac 24 00000408
; asm: movd %xmm10, 1032(%rsp)
[-,ss1] v201 = spill v101 ; bin: 66 44 0f 7e 94 24 00000408
; asm: movd 1032(%rsp), %xmm5
[-,%xmm5] v210 = fill v200 ; bin: 66 0f 6e ac 24 00000408
; asm: movd 1032(%rsp), %xmm10
[-,%xmm10] v211 = fill v201 ; bin: 66 44 0f 6e 94 24 00000408
return
}
function %F64() {
ss0 = incoming_arg 8, offset 0
ss1 = incoming_arg 1024, offset -1024
ss2 = incoming_arg 1024, offset -2048
ss3 = incoming_arg 8, offset -2056
ebb0:
[-,%r11] v0 = iconst.i32 1
[-,%rsi] v1 = iconst.i32 2
@@ -221,5 +244,17 @@ ebb0:
; asm: movq %xmm10, -10000(%rax)
[-] store.f64 v101, v2-10000 ; bin: 66 44 0f d6 90 ffffd8f0
; Spill / Fill.
; asm: movq %xmm5, 1032(%rsp)
[-,ss1] v200 = spill v100 ; bin: 66 0f d6 ac 24 00000408
; asm: movq %xmm10, 1032(%rsp)
[-,ss1] v201 = spill v101 ; bin: 66 44 0f d6 94 24 00000408
; asm: movq 1032(%rsp), %xmm5
[-,%xmm5] v210 = fill v200 ; bin: f3 0f 7e ac 24 00000408
; asm: movq 1032(%rsp), %xmm10
[-,%xmm10] v211 = fill v201 ; bin: f3 44 0f 7e 94 24 00000408
return
}

View File

@@ -14,6 +14,13 @@ function %I64() {
fn0 = function %foo()
sig0 = ()
; Use incoming_arg stack slots because they won't be relocated by the frame
; layout.
ss0 = incoming_arg 8, offset 0
ss1 = incoming_arg 1024, offset -1024
ss2 = incoming_arg 1024, offset -2048
ss3 = incoming_arg 8, offset -2056
ebb0:
; Integer Constants.
@@ -436,6 +443,22 @@ ebb0:
; asm: call *%r10
call_indirect sig0, v402() ; bin: 41 ff d2
; Spill / Fill.
; asm: movq %rcx, 1032(%rsp)
[-,ss1] v500 = spill v1 ; bin: 48 89 8c 24 00000408
; asm: movq %rsi, 1032(%rsp)
[-,ss1] v501 = spill v2 ; bin: 48 89 b4 24 00000408
; asm: movq %r10, 1032(%rsp)
[-,ss1] v502 = spill v3 ; bin: 4c 89 94 24 00000408
; asm: movq 1032(%rsp), %rcx
[-,%rcx] v510 = fill v500 ; bin: 48 8b 8c 24 00000408
; asm: movq 1032(%rsp), %rsi
[-,%rsi] v511 = fill v501 ; bin: 48 8b b4 24 00000408
; asm: movq 1032(%rsp), %r10
[-,%r10] v512 = fill v502 ; bin: 4c 8b 94 24 00000408
; asm: testq %rcx, %rcx
; asm: je ebb1
brz v1, ebb1 ; bin: 48 85 c9 74 1b
@@ -477,6 +500,11 @@ function %I32() {
fn0 = function %foo()
sig0 = ()
ss0 = incoming_arg 8, offset 0
ss1 = incoming_arg 1024, offset -1024
ss2 = incoming_arg 1024, offset -2048
ss3 = incoming_arg 8, offset -2056
ebb0:
; Integer Constants.
@@ -806,6 +834,22 @@ ebb0:
; asm: movzbl %dl, %esi
[-,%rsi] v351 = bint.i32 v301 ; bin: 0f b6 f2
; Spill / Fill.
; asm: movl %ecx, 1032(%rsp)
[-,ss1] v500 = spill v1 ; bin: 89 8c 24 00000408
; asm: movl %esi, 1032(%rsp)
[-,ss1] v501 = spill v2 ; bin: 89 b4 24 00000408
; asm: movl %r10d, 1032(%rsp)
[-,ss1] v502 = spill v3 ; bin: 44 89 94 24 00000408
; asm: movl 1032(%rsp), %ecx
[-,%rcx] v510 = fill v500 ; bin: 8b 8c 24 00000408
; asm: movl 1032(%rsp), %esi
[-,%rsi] v511 = fill v501 ; bin: 8b b4 24 00000408
; asm: movl 1032(%rsp), %r10d
[-,%r10] v512 = fill v502 ; bin: 44 8b 94 24 00000408
; asm: testl %ecx, %ecx
; asm: je ebb1x
brz v1, ebb1 ; bin: 85 c9 74 18

View File

@@ -65,7 +65,7 @@ def enc_i32_i64_ld_st(inst, w_bit, recipe, *args, **kwargs):
Add encodings for `inst.i32` to I32.
Add encodings for `inst.i32` to I64 with and without REX.
Add encodings for `inst.i64` to I64 with a REX prefix, using the `w_bit`
argument to determine wheter or not to set the REX.W bit.
argument to determine whether or not to set the REX.W bit.
"""
I32.enc(inst.i32.any, *recipe(*args, **kwargs))
@@ -181,6 +181,8 @@ enc_i32_i64_ld_st(base.store, True, r.st, 0x89)
enc_i32_i64_ld_st(base.store, True, r.stDisp8, 0x89)
enc_i32_i64_ld_st(base.store, True, r.stDisp32, 0x89)
enc_i32_i64(base.spill, r.spSib32, 0x89)
enc_i64(base.istore32.i64.any, r.st, 0x89)
enc_i64(base.istore32.i64.any, r.stDisp8, 0x89)
enc_i64(base.istore32.i64.any, r.stDisp32, 0x89)
@@ -208,6 +210,8 @@ enc_i32_i64_ld_st(base.load, True, r.ld, 0x8b)
enc_i32_i64_ld_st(base.load, True, r.ldDisp8, 0x8b)
enc_i32_i64_ld_st(base.load, True, r.ldDisp32, 0x8b)
enc_i32_i64(base.fill, r.fiSib32, 0x8b)
enc_i64(base.uload32.i64, r.ld, 0x8b)
enc_i64(base.uload32.i64, r.ldDisp8, 0x8b)
enc_i64(base.uload32.i64, r.ldDisp32, 0x8b)
@@ -252,6 +256,12 @@ enc_flt(base.store.f64.any, r.fst, 0x66, 0x0f, 0xd6)
enc_flt(base.store.f64.any, r.fstDisp8, 0x66, 0x0f, 0xd6)
enc_flt(base.store.f64.any, r.fstDisp32, 0x66, 0x0f, 0xd6)
enc_flt(base.fill.f32, r.ffiSib32, 0x66, 0x0f, 0x6e)
enc_flt(base.fill.f64, r.ffiSib32, 0xf3, 0x0f, 0x7e)
enc_flt(base.spill.f32, r.fspSib32, 0x66, 0x0f, 0x7e)
enc_flt(base.spill.f64, r.fspSib32, 0x66, 0x0f, 0xd6)
#
# Function addresses.
#

View File

@@ -8,7 +8,7 @@ from base.formats import Unary, UnaryImm, Binary, BinaryImm, MultiAry
from base.formats import Trap, Call, IndirectCall, Store, Load
from base.formats import IntCompare
from base.formats import RegMove, Ternary, Jump, Branch, FuncAddr
from .registers import GPR, ABCD, FPR, GPR8, FPR8
from .registers import GPR, ABCD, FPR, GPR8, FPR8, StackGPR32, StackFPR32
try:
from typing import Tuple, Dict, Sequence # noqa
@@ -474,6 +474,26 @@ fstDisp32 = TailRecipe(
sink.put4(offset as u32);
''')
# Unary spill with SIB and 32-bit displacement.
spSib32 = TailRecipe(
'spSib32', Unary, size=6, ins=GPR, outs=StackGPR32,
emit='''
let base = stk_base(out_stk0.base);
PUT_OP(bits, rex2(base, in_reg0), sink);
modrm_sib_disp32(in_reg0, sink);
sib_noindex(base, sink);
sink.put4(out_stk0.offset as u32);
''')
fspSib32 = TailRecipe(
'fspSib32', Unary, size=6, ins=FPR, outs=StackFPR32,
emit='''
let base = stk_base(out_stk0.base);
PUT_OP(bits, rex2(base, in_reg0), sink);
modrm_sib_disp32(in_reg0, sink);
sib_noindex(base, sink);
sink.put4(out_stk0.offset as u32);
''')
#
# Load recipes
#
@@ -540,6 +560,26 @@ fldDisp32 = TailRecipe(
sink.put4(offset as u32);
''')
# Unary fill with SIB and 32-bit displacement.
fiSib32 = TailRecipe(
'fiSib32', Unary, size=6, ins=StackGPR32, outs=GPR,
emit='''
let base = stk_base(in_stk0.base);
PUT_OP(bits, rex2(base, out_reg0), sink);
modrm_sib_disp32(out_reg0, sink);
sib_noindex(base, sink);
sink.put4(in_stk0.offset as u32);
''')
ffiSib32 = TailRecipe(
'ffiSib32', Unary, size=6, ins=StackFPR32, outs=FPR,
emit='''
let base = stk_base(in_stk0.base);
PUT_OP(bits, rex2(base, out_reg0), sink);
modrm_sib_disp32(out_reg0, sink);
sib_noindex(base, sink);
sink.put4(in_stk0.offset as u32);
''')
#
# Call/return
#

View File

@@ -23,7 +23,7 @@ data types, and the H-registers even less so. Rather than trying to model the
H-registers accurately, we'll avoid using them in both I32 and I64 modes.
"""
from __future__ import absolute_import
from cdsl.registers import RegBank, RegClass
from cdsl.registers import RegBank, RegClass, Stack
from .defs import ISA
@@ -44,4 +44,10 @@ ABCD = GPR[0:4]
FPR = RegClass(FloatRegs)
FPR8 = FPR[0:8]
# Constraints for stack operands.
# Stack operand with a 32-bit signed displacement from either RBP or RSP.
StackGPR32 = Stack(GPR)
StackFPR32 = Stack(FPR)
RegClass.extract_names(globals())

View File

@@ -2,8 +2,9 @@
use binemit::{CodeSink, Reloc, bad_encoding};
use ir::{Function, Inst, Ebb, InstructionData};
use isa::RegUnit;
use isa::{RegUnit, StackRef, StackBase, StackBaseMask};
use regalloc::RegDiversions;
use super::registers::RU;
include!(concat!(env!("OUT_DIR"), "/binemit-intel.rs"));
@@ -27,6 +28,16 @@ impl Into<Reloc> for RelocKind {
}
}
// Convert a stack base to the corresponding register.
fn stk_base(base: StackBase) -> RegUnit {
let ru = match base {
StackBase::SP => RU::rsp,
StackBase::FP => RU::rbp,
StackBase::Zone => unimplemented!(),
};
ru as RegUnit
}
// Mandatory prefix bytes for Mp* opcodes.
const PREFIX: [u8; 3] = [0x66, 0xf3, 0xf2];
@@ -43,7 +54,7 @@ fn rex1(reg_b: RegUnit) -> u8 {
// Create a dual-register REX prefix, setting:
//
// REX.B = bit 3 of r/m register.
// REX.B = bit 3 of r/m register, or SIB base register when a SIB byte is present.
// REX.R = bit 3 of reg register.
fn rex2(rm: RegUnit, reg: RegUnit) -> u8 {
let b = ((rm >> 3) & 1) as u8;
@@ -185,6 +196,20 @@ fn modrm_disp32<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS)
sink.put1(b);
}
/// Emit a mode 10 ModR/M byte indicating that a SIB byte is present.
fn modrm_sib_disp32<CS: CodeSink + ?Sized>(reg: RegUnit, sink: &mut CS) {
modrm_disp32(0b100, reg, sink);
}
/// Emit a SIB byte with a base register and no scale+index.
fn sib_noindex<CS: CodeSink + ?Sized>(base: RegUnit, sink: &mut CS) {
let base = base as u8 & 7;
// SIB SS_III_BBB.
let mut b = 0b00_100_000;
b |= base;
sink.put1(b);
}
/// Emit a single-byte branch displacement to `destination`.
fn disp1<CS: CodeSink + ?Sized>(destination: Ebb, func: &Function, sink: &mut CS) {
let delta = func.offsets[destination].wrapping_sub(sink.offset() + 1);