Use movss/movsd rather than movd/movq for floating-point loads and stores.

While there may be CPUs that have a domain crossing penalty here,
this also helps the generated code look more like the code produced
by other compilers.
This commit is contained in:
Dan Gohman
2018-03-27 11:21:37 -07:00
parent 3b0a9b9ecf
commit 79f02e42dd
3 changed files with 172 additions and 172 deletions

View File

@@ -259,31 +259,31 @@ X86_64.enc(base.adjust_sp_imm, *r.adjustsp32.rex(0x81, w=1))
# Float loads and stores.
#
enc_both(base.load.f32.any, r.fld, 0x66, 0x0f, 0x6e)
enc_both(base.load.f32.any, r.fldDisp8, 0x66, 0x0f, 0x6e)
enc_both(base.load.f32.any, r.fldDisp32, 0x66, 0x0f, 0x6e)
enc_both(base.load.f32.any, r.fld, 0xf3, 0x0f, 0x10)
enc_both(base.load.f32.any, r.fldDisp8, 0xf3, 0x0f, 0x10)
enc_both(base.load.f32.any, r.fldDisp32, 0xf3, 0x0f, 0x10)
enc_both(base.load.f64.any, r.fld, 0xf3, 0x0f, 0x7e)
enc_both(base.load.f64.any, r.fldDisp8, 0xf3, 0x0f, 0x7e)
enc_both(base.load.f64.any, r.fldDisp32, 0xf3, 0x0f, 0x7e)
enc_both(base.load.f64.any, r.fld, 0xf2, 0x0f, 0x10)
enc_both(base.load.f64.any, r.fldDisp8, 0xf2, 0x0f, 0x10)
enc_both(base.load.f64.any, r.fldDisp32, 0xf2, 0x0f, 0x10)
enc_both(base.store.f32.any, r.fst, 0x66, 0x0f, 0x7e)
enc_both(base.store.f32.any, r.fstDisp8, 0x66, 0x0f, 0x7e)
enc_both(base.store.f32.any, r.fstDisp32, 0x66, 0x0f, 0x7e)
enc_both(base.store.f32.any, r.fst, 0xf3, 0x0f, 0x11)
enc_both(base.store.f32.any, r.fstDisp8, 0xf3, 0x0f, 0x11)
enc_both(base.store.f32.any, r.fstDisp32, 0xf3, 0x0f, 0x11)
enc_both(base.store.f64.any, r.fst, 0x66, 0x0f, 0xd6)
enc_both(base.store.f64.any, r.fstDisp8, 0x66, 0x0f, 0xd6)
enc_both(base.store.f64.any, r.fstDisp32, 0x66, 0x0f, 0xd6)
enc_both(base.store.f64.any, r.fst, 0xf2, 0x0f, 0x11)
enc_both(base.store.f64.any, r.fstDisp8, 0xf2, 0x0f, 0x11)
enc_both(base.store.f64.any, r.fstDisp32, 0xf2, 0x0f, 0x11)
enc_both(base.fill.f32, r.ffillSib32, 0x66, 0x0f, 0x6e)
enc_both(base.regfill.f32, r.fregfill32, 0x66, 0x0f, 0x6e)
enc_both(base.fill.f64, r.ffillSib32, 0xf3, 0x0f, 0x7e)
enc_both(base.regfill.f64, r.fregfill32, 0xf3, 0x0f, 0x7e)
enc_both(base.fill.f32, r.ffillSib32, 0xf3, 0x0f, 0x10)
enc_both(base.regfill.f32, r.fregfill32, 0xf3, 0x0f, 0x10)
enc_both(base.fill.f64, r.ffillSib32, 0xf2, 0x0f, 0x10)
enc_both(base.regfill.f64, r.fregfill32, 0xf2, 0x0f, 0x10)
enc_both(base.spill.f32, r.fspillSib32, 0x66, 0x0f, 0x7e)
enc_both(base.regspill.f32, r.fregspill32, 0x66, 0x0f, 0x7e)
enc_both(base.spill.f64, r.fspillSib32, 0x66, 0x0f, 0xd6)
enc_both(base.regspill.f64, r.fregspill32, 0x66, 0x0f, 0xd6)
enc_both(base.spill.f32, r.fspillSib32, 0xf3, 0x0f, 0x11)
enc_both(base.regspill.f32, r.fregspill32, 0xf3, 0x0f, 0x11)
enc_both(base.spill.f64, r.fspillSib32, 0xf2, 0x0f, 0x11)
enc_both(base.regspill.f64, r.fregspill32, 0xf2, 0x0f, 0x11)
#
# Function addresses.