Intel encodings for nearest/floor/ceil/trunc.
These floating point rounding operations all use the roundss/roundsd instructions that are available in SSE 4.1.
This commit is contained in:
@@ -38,6 +38,7 @@ def gen_recipe(recipe, fmt):
|
||||
with fmt.indented(
|
||||
'if let InstructionData::{} {{'.format(iform.name),
|
||||
'}'):
|
||||
fmt.line('opcode,')
|
||||
for f in iform.imm_fields:
|
||||
fmt.line('{},'.format(f.member))
|
||||
if want_args:
|
||||
|
||||
@@ -11,9 +11,10 @@ from . import settings as cfg
|
||||
from . import instructions as x86
|
||||
from .legalize import intel_expand
|
||||
from base.legalize import narrow, expand
|
||||
from .settings import use_sse41
|
||||
|
||||
try:
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import TYPE_CHECKING, Any # noqa
|
||||
if TYPE_CHECKING:
|
||||
from cdsl.instructions import MaybeBoundInst # noqa
|
||||
except ImportError:
|
||||
@@ -82,7 +83,7 @@ def enc_i32_i64_ld_st(inst, w_bit, recipe, *args, **kwargs):
|
||||
|
||||
|
||||
def enc_flt(inst, recipe, *args, **kwargs):
|
||||
# type: (MaybeBoundInst, r.TailRecipe, *int, **int) -> None
|
||||
# type: (MaybeBoundInst, r.TailRecipe, *int, **Any) -> None
|
||||
"""
|
||||
Add encodings for floating point instruction `inst` to both I32 and I64.
|
||||
"""
|
||||
@@ -363,6 +364,16 @@ enc_flt(base.fdemote.f32.f64, r.furm, 0xf2, 0x0f, 0x5a)
|
||||
enc_flt(base.sqrt.f32, r.furm, 0xf3, 0x0f, 0x51)
|
||||
enc_flt(base.sqrt.f64, r.furm, 0xf2, 0x0f, 0x51)
|
||||
|
||||
# Rounding. The recipe looks at the opcode to pick an immediate.
|
||||
for inst in [
|
||||
base.nearest,
|
||||
base.floor,
|
||||
base.ceil,
|
||||
base.trunc]:
|
||||
enc_flt(inst.f32, r.furmi_rnd, 0x66, 0x0f, 0x3a, 0x0a, isap=use_sse41)
|
||||
enc_flt(inst.f64, r.furmi_rnd, 0x66, 0x0f, 0x3a, 0x0b, isap=use_sse41)
|
||||
|
||||
|
||||
# Binary arithmetic ops.
|
||||
for inst, opc in [
|
||||
(base.fadd, 0x58),
|
||||
|
||||
@@ -289,6 +289,21 @@ frurm = TailRecipe(
|
||||
modrm_rr(in_reg0, out_reg0, sink);
|
||||
''')
|
||||
|
||||
# XX /r, RMI form for one of the roundXX SSE 4.1 instructions.
|
||||
furmi_rnd = TailRecipe(
|
||||
'furmi_rnd', Unary, size=2, ins=FPR, outs=FPR,
|
||||
emit='''
|
||||
PUT_OP(bits, rex2(in_reg0, out_reg0), sink);
|
||||
modrm_rr(in_reg0, out_reg0, sink);
|
||||
sink.put1(match opcode {
|
||||
Opcode::Nearest => 0b00,
|
||||
Opcode::Floor => 0b01,
|
||||
Opcode::Ceil => 0b10,
|
||||
Opcode::Trunc => 0b11,
|
||||
x => panic!("{} unexpected for furmi_rnd", opcode),
|
||||
});
|
||||
''')
|
||||
|
||||
# XX /r, for regmove instructions.
|
||||
rmov = TailRecipe(
|
||||
'ur', RegMove, size=1, ins=GPR, outs=(),
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
//! Emitting binary Intel machine code.
|
||||
|
||||
use binemit::{CodeSink, Reloc, bad_encoding};
|
||||
use ir::{Function, Inst, Ebb, InstructionData};
|
||||
use ir::{Function, Inst, Ebb, InstructionData, Opcode};
|
||||
use isa::{RegUnit, StackRef, StackBase, StackBaseMask};
|
||||
use regalloc::RegDiversions;
|
||||
use super::registers::RU;
|
||||
@@ -41,6 +41,9 @@ fn stk_base(base: StackBase) -> RegUnit {
|
||||
// Mandatory prefix bytes for Mp* opcodes.
|
||||
const PREFIX: [u8; 3] = [0x66, 0xf3, 0xf2];
|
||||
|
||||
// Second byte for three-byte opcodes for mm=0b10 and mm=0b11.
|
||||
const OP3_BYTE2: [u8; 2] = [0x38, 0x3a];
|
||||
|
||||
// A REX prefix with no bits set: 0b0100WRXB.
|
||||
const BASE_REX: u8 = 0b0100_0000;
|
||||
|
||||
@@ -111,6 +114,15 @@ fn put_mp1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
sink.put1(bits as u8);
|
||||
}
|
||||
|
||||
// Emit single-byte opcode with mandatory prefix and REX.
|
||||
fn put_rexmp1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
debug_assert_eq!(bits & 0x0c00, 0, "Invalid encoding bits for Mp1*");
|
||||
let pp = (bits >> 8) & 3;
|
||||
sink.put1(PREFIX[(pp - 1) as usize]);
|
||||
rex_prefix(bits, rex, sink);
|
||||
sink.put1(bits as u8);
|
||||
}
|
||||
|
||||
// Emit two-byte opcode (0F XX) with mandatory prefix.
|
||||
fn put_mp2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
debug_assert_eq!(bits & 0x8c00, 0x0400, "Invalid encoding bits for Mp2*");
|
||||
@@ -131,12 +143,27 @@ fn put_rexmp2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
sink.put1(bits as u8);
|
||||
}
|
||||
|
||||
// Emit single-byte opcode with mandatory prefix and REX.
|
||||
fn put_rexmp1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
debug_assert_eq!(bits & 0x0c00, 0, "Invalid encoding bits for Mp1*");
|
||||
// Emit three-byte opcode (0F 3[8A] XX) with mandatory prefix.
|
||||
fn put_mp3<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
debug_assert_eq!(bits & 0x8800, 0x0800, "Invalid encoding bits for Mp3*");
|
||||
let pp = (bits >> 8) & 3;
|
||||
sink.put1(PREFIX[(pp - 1) as usize]);
|
||||
debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp3 encoding");
|
||||
let mm = (bits >> 10) & 3;
|
||||
sink.put1(0x0f);
|
||||
sink.put1(OP3_BYTE2[(mm - 2) as usize]);
|
||||
sink.put1(bits as u8);
|
||||
}
|
||||
|
||||
// Emit three-byte opcode (0F 3[8A] XX) with mandatory prefix and REX
|
||||
fn put_rexmp3<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
debug_assert_eq!(bits & 0x0800, 0x0800, "Invalid encoding bits for Mp3*");
|
||||
let pp = (bits >> 8) & 3;
|
||||
sink.put1(PREFIX[(pp - 1) as usize]);
|
||||
rex_prefix(bits, rex, sink);
|
||||
let mm = (bits >> 10) & 3;
|
||||
sink.put1(0x0f);
|
||||
sink.put1(OP3_BYTE2[(mm - 2) as usize]);
|
||||
sink.put1(bits as u8);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user