Remove the old x86 backend
This commit is contained in:
@@ -71,9 +71,6 @@ arm32 = [] # Work-in-progress codegen backend for ARM.
|
||||
# backend is the default now.
|
||||
experimental_x64 = []
|
||||
|
||||
# Make the old x86 backend the default.
|
||||
old-x86-backend = []
|
||||
|
||||
# Option to enable all architectures.
|
||||
all-arch = [
|
||||
"x86",
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,723 +0,0 @@
|
||||
#![allow(non_snake_case)]
|
||||
|
||||
use crate::cdsl::instructions::{
|
||||
AllInstructions, InstructionBuilder as Inst, InstructionGroup, InstructionGroupBuilder,
|
||||
};
|
||||
use crate::cdsl::operands::Operand;
|
||||
use crate::cdsl::types::ValueType;
|
||||
use crate::cdsl::typevar::{Interval, TypeSetBuilder, TypeVar};
|
||||
use crate::shared::entities::EntityRefs;
|
||||
use crate::shared::formats::Formats;
|
||||
use crate::shared::immediates::Immediates;
|
||||
use crate::shared::types;
|
||||
|
||||
#[allow(clippy::many_single_char_names)]
|
||||
pub(crate) fn define(
|
||||
mut all_instructions: &mut AllInstructions,
|
||||
formats: &Formats,
|
||||
immediates: &Immediates,
|
||||
entities: &EntityRefs,
|
||||
) -> InstructionGroup {
|
||||
let mut ig = InstructionGroupBuilder::new(&mut all_instructions);
|
||||
|
||||
let iflags: &TypeVar = &ValueType::Special(types::Flag::IFlags.into()).into();
|
||||
|
||||
let iWord = &TypeVar::new(
|
||||
"iWord",
|
||||
"A scalar integer machine word",
|
||||
TypeSetBuilder::new().ints(32..64).build(),
|
||||
);
|
||||
let nlo = &Operand::new("nlo", iWord).with_doc("Low part of numerator");
|
||||
let nhi = &Operand::new("nhi", iWord).with_doc("High part of numerator");
|
||||
let d = &Operand::new("d", iWord).with_doc("Denominator");
|
||||
let q = &Operand::new("q", iWord).with_doc("Quotient");
|
||||
let r = &Operand::new("r", iWord).with_doc("Remainder");
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_udivmodx",
|
||||
r#"
|
||||
Extended unsigned division.
|
||||
|
||||
Concatenate the bits in `nhi` and `nlo` to form the numerator.
|
||||
Interpret the bits as an unsigned number and divide by the unsigned
|
||||
denominator `d`. Trap when `d` is zero or if the quotient is larger
|
||||
than the range of the output.
|
||||
|
||||
Return both quotient and remainder.
|
||||
"#,
|
||||
&formats.ternary,
|
||||
)
|
||||
.operands_in(vec![nlo, nhi, d])
|
||||
.operands_out(vec![q, r])
|
||||
.can_trap(true),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_sdivmodx",
|
||||
r#"
|
||||
Extended signed division.
|
||||
|
||||
Concatenate the bits in `nhi` and `nlo` to form the numerator.
|
||||
Interpret the bits as a signed number and divide by the signed
|
||||
denominator `d`. Trap when `d` is zero or if the quotient is outside
|
||||
the range of the output.
|
||||
|
||||
Return both quotient and remainder.
|
||||
"#,
|
||||
&formats.ternary,
|
||||
)
|
||||
.operands_in(vec![nlo, nhi, d])
|
||||
.operands_out(vec![q, r])
|
||||
.can_trap(true),
|
||||
);
|
||||
|
||||
let argL = &Operand::new("argL", iWord);
|
||||
let argR = &Operand::new("argR", iWord);
|
||||
let resLo = &Operand::new("resLo", iWord);
|
||||
let resHi = &Operand::new("resHi", iWord);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_umulx",
|
||||
r#"
|
||||
Unsigned integer multiplication, producing a double-length result.
|
||||
|
||||
Polymorphic over all scalar integer types, but does not support vector
|
||||
types.
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![argL, argR])
|
||||
.operands_out(vec![resLo, resHi]),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_smulx",
|
||||
r#"
|
||||
Signed integer multiplication, producing a double-length result.
|
||||
|
||||
Polymorphic over all scalar integer types, but does not support vector
|
||||
types.
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![argL, argR])
|
||||
.operands_out(vec![resLo, resHi]),
|
||||
);
|
||||
|
||||
let Float = &TypeVar::new(
|
||||
"Float",
|
||||
"A scalar or vector floating point number",
|
||||
TypeSetBuilder::new()
|
||||
.floats(Interval::All)
|
||||
.simd_lanes(Interval::All)
|
||||
.build(),
|
||||
);
|
||||
let IntTo = &TypeVar::new(
|
||||
"IntTo",
|
||||
"An integer type with the same number of lanes",
|
||||
TypeSetBuilder::new()
|
||||
.ints(32..64)
|
||||
.simd_lanes(Interval::All)
|
||||
.build(),
|
||||
);
|
||||
let x = &Operand::new("x", Float);
|
||||
let a = &Operand::new("a", IntTo);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_cvtt2si",
|
||||
r#"
|
||||
Convert with truncation floating point to signed integer.
|
||||
|
||||
The source floating point operand is converted to a signed integer by
|
||||
rounding towards zero. If the result can't be represented in the output
|
||||
type, returns the smallest signed value the output type can represent.
|
||||
|
||||
This instruction does not trap.
|
||||
"#,
|
||||
&formats.unary,
|
||||
)
|
||||
.operands_in(vec![x])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let f32x4 = &TypeVar::new(
|
||||
"f32x4",
|
||||
"A floating point number",
|
||||
TypeSetBuilder::new()
|
||||
.floats(32..32)
|
||||
.simd_lanes(4..4)
|
||||
.build(),
|
||||
);
|
||||
let i32x4 = &TypeVar::new(
|
||||
"i32x4",
|
||||
"An integer type with the same number of lanes",
|
||||
TypeSetBuilder::new().ints(32..32).simd_lanes(4..4).build(),
|
||||
);
|
||||
let x = &Operand::new("x", i32x4);
|
||||
let a = &Operand::new("a", f32x4);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_vcvtudq2ps",
|
||||
r#"
|
||||
Convert unsigned integer to floating point.
|
||||
|
||||
Convert packed doubleword unsigned integers to packed single-precision floating-point
|
||||
values. This instruction does not trap.
|
||||
"#,
|
||||
&formats.unary,
|
||||
)
|
||||
.operands_in(vec![x])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let x = &Operand::new("x", Float);
|
||||
let a = &Operand::new("a", Float);
|
||||
let y = &Operand::new("y", Float);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_fmin",
|
||||
r#"
|
||||
Floating point minimum with x86 semantics.
|
||||
|
||||
This is equivalent to the C ternary operator `x < y ? x : y` which
|
||||
differs from `fmin` when either operand is NaN or when comparing
|
||||
+0.0 to -0.0.
|
||||
|
||||
When the two operands don't compare as LT, `y` is returned unchanged,
|
||||
even if it is a signalling NaN.
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![x, y])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_fmax",
|
||||
r#"
|
||||
Floating point maximum with x86 semantics.
|
||||
|
||||
This is equivalent to the C ternary operator `x > y ? x : y` which
|
||||
differs from `fmax` when either operand is NaN or when comparing
|
||||
+0.0 to -0.0.
|
||||
|
||||
When the two operands don't compare as GT, `y` is returned unchanged,
|
||||
even if it is a signalling NaN.
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![x, y])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let x = &Operand::new("x", iWord);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_push",
|
||||
r#"
|
||||
Pushes a value onto the stack.
|
||||
|
||||
Decrements the stack pointer and stores the specified value on to the top.
|
||||
|
||||
This is polymorphic in i32 and i64. However, it is only implemented for i64
|
||||
in 64-bit mode, and only for i32 in 32-bit mode.
|
||||
"#,
|
||||
&formats.unary,
|
||||
)
|
||||
.operands_in(vec![x])
|
||||
.other_side_effects(true)
|
||||
.can_store(true),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_pop",
|
||||
r#"
|
||||
Pops a value from the stack.
|
||||
|
||||
Loads a value from the top of the stack and then increments the stack
|
||||
pointer.
|
||||
|
||||
This is polymorphic in i32 and i64. However, it is only implemented for i64
|
||||
in 64-bit mode, and only for i32 in 32-bit mode.
|
||||
"#,
|
||||
&formats.nullary,
|
||||
)
|
||||
.operands_out(vec![x])
|
||||
.other_side_effects(true)
|
||||
.can_load(true),
|
||||
);
|
||||
|
||||
let y = &Operand::new("y", iWord);
|
||||
let rflags = &Operand::new("rflags", iflags);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_bsr",
|
||||
r#"
|
||||
Bit Scan Reverse -- returns the bit-index of the most significant 1
|
||||
in the word. Result is undefined if the argument is zero. However, it
|
||||
sets the Z flag depending on the argument, so it is at least easy to
|
||||
detect and handle that case.
|
||||
|
||||
This is polymorphic in i32 and i64. It is implemented for both i64 and
|
||||
i32 in 64-bit mode, and only for i32 in 32-bit mode.
|
||||
"#,
|
||||
&formats.unary,
|
||||
)
|
||||
.operands_in(vec![x])
|
||||
.operands_out(vec![y, rflags]),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_bsf",
|
||||
r#"
|
||||
Bit Scan Forwards -- returns the bit-index of the least significant 1
|
||||
in the word. Is otherwise identical to 'bsr', just above.
|
||||
"#,
|
||||
&formats.unary,
|
||||
)
|
||||
.operands_in(vec![x])
|
||||
.operands_out(vec![y, rflags]),
|
||||
);
|
||||
|
||||
let uimm8 = &immediates.uimm8;
|
||||
let TxN = &TypeVar::new(
|
||||
"TxN",
|
||||
"A SIMD vector type",
|
||||
TypeSetBuilder::new()
|
||||
.ints(Interval::All)
|
||||
.floats(Interval::All)
|
||||
.bools(Interval::All)
|
||||
.simd_lanes(Interval::All)
|
||||
.includes_scalars(false)
|
||||
.build(),
|
||||
);
|
||||
let a = &Operand::new("a", TxN).with_doc("A vector value (i.e. held in an XMM register)");
|
||||
let b = &Operand::new("b", TxN).with_doc("A vector value (i.e. held in an XMM register)");
|
||||
let i = &Operand::new("i", uimm8).with_doc("An ordering operand controlling the copying of data from the source to the destination; see PSHUFD in Intel manual for details");
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_pshufd",
|
||||
r#"
|
||||
Packed Shuffle Doublewords -- copies data from either memory or lanes in an extended
|
||||
register and re-orders the data according to the passed immediate byte.
|
||||
"#,
|
||||
&formats.binary_imm8,
|
||||
)
|
||||
.operands_in(vec![a, i]) // TODO allow copying from memory here (need more permissive type than TxN)
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_pshufb",
|
||||
r#"
|
||||
Packed Shuffle Bytes -- re-orders data in an extended register using a shuffle
|
||||
mask from either memory or another extended register
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![a, b]) // TODO allow re-ordering from memory here (need more permissive type than TxN)
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let mask = &Operand::new("mask", uimm8).with_doc("mask to select lanes from b");
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_pblendw",
|
||||
r#"
|
||||
Blend packed words using an immediate mask. Each bit of the 8-bit immediate corresponds to a
|
||||
lane in ``b``: if the bit is set, the lane is copied into ``a``.
|
||||
"#,
|
||||
&formats.ternary_imm8,
|
||||
)
|
||||
.operands_in(vec![a, b, mask])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let Idx = &Operand::new("Idx", uimm8).with_doc("Lane index");
|
||||
let x = &Operand::new("x", TxN);
|
||||
let a = &Operand::new("a", &TxN.lane_of());
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_pextr",
|
||||
r#"
|
||||
Extract lane ``Idx`` from ``x``.
|
||||
The lane index, ``Idx``, is an immediate value, not an SSA value. It
|
||||
must indicate a valid lane index for the type of ``x``.
|
||||
"#,
|
||||
&formats.binary_imm8,
|
||||
)
|
||||
.operands_in(vec![x, Idx])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let IBxN = &TypeVar::new(
|
||||
"IBxN",
|
||||
"A SIMD vector type containing only booleans and integers",
|
||||
TypeSetBuilder::new()
|
||||
.ints(Interval::All)
|
||||
.bools(Interval::All)
|
||||
.simd_lanes(Interval::All)
|
||||
.includes_scalars(false)
|
||||
.build(),
|
||||
);
|
||||
let x = &Operand::new("x", IBxN);
|
||||
let y = &Operand::new("y", &IBxN.lane_of()).with_doc("New lane value");
|
||||
let a = &Operand::new("a", IBxN);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_pinsr",
|
||||
r#"
|
||||
Insert ``y`` into ``x`` at lane ``Idx``.
|
||||
The lane index, ``Idx``, is an immediate value, not an SSA value. It
|
||||
must indicate a valid lane index for the type of ``x``.
|
||||
"#,
|
||||
&formats.ternary_imm8,
|
||||
)
|
||||
.operands_in(vec![x, y, Idx])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let FxN = &TypeVar::new(
|
||||
"FxN",
|
||||
"A SIMD vector type containing floats",
|
||||
TypeSetBuilder::new()
|
||||
.floats(Interval::All)
|
||||
.simd_lanes(Interval::All)
|
||||
.includes_scalars(false)
|
||||
.build(),
|
||||
);
|
||||
let x = &Operand::new("x", FxN);
|
||||
let y = &Operand::new("y", &FxN.lane_of()).with_doc("New lane value");
|
||||
let a = &Operand::new("a", FxN);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_insertps",
|
||||
r#"
|
||||
Insert a lane of ``y`` into ``x`` at using ``Idx`` to encode both which lane the value is
|
||||
extracted from and which it is inserted to. This is similar to x86_pinsr but inserts
|
||||
floats, which are already stored in an XMM register.
|
||||
"#,
|
||||
&formats.ternary_imm8,
|
||||
)
|
||||
.operands_in(vec![x, y, Idx])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let x = &Operand::new("x", TxN);
|
||||
let y = &Operand::new("y", TxN);
|
||||
let a = &Operand::new("a", TxN);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_punpckh",
|
||||
r#"
|
||||
Unpack the high-order lanes of ``x`` and ``y`` and interleave into ``a``. With notional
|
||||
i8x4 vectors, where ``x = [x3, x2, x1, x0]`` and ``y = [y3, y2, y1, y0]``, this operation
|
||||
would result in ``a = [y3, x3, y2, x2]`` (using the Intel manual's right-to-left lane
|
||||
ordering).
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![x, y])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_punpckl",
|
||||
r#"
|
||||
Unpack the low-order lanes of ``x`` and ``y`` and interleave into ``a``. With notional
|
||||
i8x4 vectors, where ``x = [x3, x2, x1, x0]`` and ``y = [y3, y2, y1, y0]``, this operation
|
||||
would result in ``a = [y1, x1, y0, x0]`` (using the Intel manual's right-to-left lane
|
||||
ordering).
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![x, y])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let x = &Operand::new("x", FxN);
|
||||
let y = &Operand::new("y", FxN);
|
||||
let a = &Operand::new("a", FxN);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_movsd",
|
||||
r#"
|
||||
Move the low 64 bits of the float vector ``y`` to the low 64 bits of float vector ``x``
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![x, y])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_movlhps",
|
||||
r#"
|
||||
Move the low 64 bits of the float vector ``y`` to the high 64 bits of float vector ``x``
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![x, y])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let IxN = &TypeVar::new(
|
||||
"IxN",
|
||||
"A SIMD vector type containing integers",
|
||||
TypeSetBuilder::new()
|
||||
.ints(Interval::All)
|
||||
.simd_lanes(Interval::All)
|
||||
.includes_scalars(false)
|
||||
.build(),
|
||||
);
|
||||
let I128 = &TypeVar::new(
|
||||
"I128",
|
||||
"A SIMD vector type containing one large integer (due to Cranelift type constraints, \
|
||||
this uses the Cranelift I64X2 type but should be understood as one large value, i.e., the \
|
||||
upper lane is concatenated with the lower lane to form the integer)",
|
||||
TypeSetBuilder::new()
|
||||
.ints(64..64)
|
||||
.simd_lanes(2..2)
|
||||
.includes_scalars(false)
|
||||
.build(),
|
||||
);
|
||||
|
||||
let x = &Operand::new("x", IxN).with_doc("Vector value to shift");
|
||||
let y = &Operand::new("y", I128).with_doc("Number of bits to shift");
|
||||
let a = &Operand::new("a", IxN);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_psll",
|
||||
r#"
|
||||
Shift Packed Data Left Logical -- This implements the behavior of the shared instruction
|
||||
``ishl`` but alters the shift operand to live in an XMM register as expected by the PSLL*
|
||||
family of instructions.
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![x, y])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_psrl",
|
||||
r#"
|
||||
Shift Packed Data Right Logical -- This implements the behavior of the shared instruction
|
||||
``ushr`` but alters the shift operand to live in an XMM register as expected by the PSRL*
|
||||
family of instructions.
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![x, y])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_psra",
|
||||
r#"
|
||||
Shift Packed Data Right Arithmetic -- This implements the behavior of the shared
|
||||
instruction ``sshr`` but alters the shift operand to live in an XMM register as expected by
|
||||
the PSRA* family of instructions.
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![x, y])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let I64x2 = &TypeVar::new(
|
||||
"I64x2",
|
||||
"A SIMD vector type containing two 64-bit integers",
|
||||
TypeSetBuilder::new()
|
||||
.ints(64..64)
|
||||
.simd_lanes(2..2)
|
||||
.includes_scalars(false)
|
||||
.build(),
|
||||
);
|
||||
|
||||
let x = &Operand::new("x", I64x2);
|
||||
let y = &Operand::new("y", I64x2);
|
||||
let a = &Operand::new("a", I64x2);
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_pmullq",
|
||||
r#"
|
||||
Multiply Packed Integers -- Multiply two 64x2 integers and receive a 64x2 result with
|
||||
lane-wise wrapping if the result overflows. This instruction is necessary to add distinct
|
||||
encodings for CPUs with newer vector features.
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![x, y])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_pmuludq",
|
||||
r#"
|
||||
Multiply Packed Integers -- Using only the bottom 32 bits in each lane, multiply two 64x2
|
||||
unsigned integers and receive a 64x2 result. This instruction avoids the need for handling
|
||||
overflow as in `x86_pmullq`.
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![x, y])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let x = &Operand::new("x", TxN);
|
||||
let y = &Operand::new("y", TxN);
|
||||
let f = &Operand::new("f", iflags);
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_ptest",
|
||||
r#"
|
||||
Logical Compare -- PTEST will set the ZF flag if all bits in the result are 0 of the
|
||||
bitwise AND of the first source operand (first operand) and the second source operand
|
||||
(second operand). PTEST sets the CF flag if all bits in the result are 0 of the bitwise
|
||||
AND of the second source operand (second operand) and the logical NOT of the destination
|
||||
operand (first operand).
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![x, y])
|
||||
.operands_out(vec![f]),
|
||||
);
|
||||
|
||||
let x = &Operand::new("x", IxN);
|
||||
let y = &Operand::new("y", IxN);
|
||||
let a = &Operand::new("a", IxN);
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_pmaxs",
|
||||
r#"
|
||||
Maximum of Packed Signed Integers -- Compare signed integers in the first and second
|
||||
operand and return the maximum values.
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![x, y])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_pmaxu",
|
||||
r#"
|
||||
Maximum of Packed Unsigned Integers -- Compare unsigned integers in the first and second
|
||||
operand and return the maximum values.
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![x, y])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_pmins",
|
||||
r#"
|
||||
Minimum of Packed Signed Integers -- Compare signed integers in the first and second
|
||||
operand and return the minimum values.
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![x, y])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_pminu",
|
||||
r#"
|
||||
Minimum of Packed Unsigned Integers -- Compare unsigned integers in the first and second
|
||||
operand and return the minimum values.
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![x, y])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let c = &Operand::new("c", uimm8)
|
||||
.with_doc("The number of bytes to shift right; see PALIGNR in Intel manual for details");
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_palignr",
|
||||
r#"
|
||||
Concatenate destination and source operands, extracting a byte-aligned result shifted to
|
||||
the right by `c`.
|
||||
"#,
|
||||
&formats.ternary_imm8,
|
||||
)
|
||||
.operands_in(vec![x, y, c])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let i64_t = &TypeVar::new(
|
||||
"i64_t",
|
||||
"A scalar 64bit integer",
|
||||
TypeSetBuilder::new().ints(64..64).build(),
|
||||
);
|
||||
|
||||
let GV = &Operand::new("GV", &entities.global_value);
|
||||
let addr = &Operand::new("addr", i64_t);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_elf_tls_get_addr",
|
||||
r#"
|
||||
Elf tls get addr -- This implements the GD TLS model for ELF. The clobber output should
|
||||
not be used.
|
||||
"#,
|
||||
&formats.unary_global_value,
|
||||
)
|
||||
// This is a bit overly broad to mark as clobbering *all* the registers, because it should
|
||||
// only preserve caller-saved registers. There's no way to indicate this to register
|
||||
// allocation yet, though, so mark as clobbering all registers instead.
|
||||
.clobbers_all_regs(true)
|
||||
.operands_in(vec![GV])
|
||||
.operands_out(vec![addr]),
|
||||
);
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_macho_tls_get_addr",
|
||||
r#"
|
||||
Mach-O tls get addr -- This implements TLS access for Mach-O. The clobber output should
|
||||
not be used.
|
||||
"#,
|
||||
&formats.unary_global_value,
|
||||
)
|
||||
// See above comment for x86_elf_tls_get_addr.
|
||||
.clobbers_all_regs(true)
|
||||
.operands_in(vec![GV])
|
||||
.operands_out(vec![addr]),
|
||||
);
|
||||
|
||||
ig.build()
|
||||
}
|
||||
@@ -1,827 +0,0 @@
|
||||
use crate::cdsl::ast::{constant, var, ExprBuilder, Literal};
|
||||
use crate::cdsl::instructions::{vector, Bindable, InstructionGroup};
|
||||
use crate::cdsl::types::{LaneType, ValueType};
|
||||
use crate::cdsl::xform::TransformGroupBuilder;
|
||||
use crate::shared::types::Float::{F32, F64};
|
||||
use crate::shared::types::Int::{I16, I32, I64, I8};
|
||||
use crate::shared::Definitions as SharedDefinitions;
|
||||
|
||||
#[allow(clippy::many_single_char_names)]
|
||||
pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &InstructionGroup) {
|
||||
let mut expand = TransformGroupBuilder::new(
|
||||
"x86_expand",
|
||||
r#"
|
||||
Legalize instructions by expansion.
|
||||
|
||||
Use x86-specific instructions if needed."#,
|
||||
)
|
||||
.isa("x86")
|
||||
.chain_with(shared.transform_groups.by_name("expand_flags").id);
|
||||
|
||||
let mut narrow = TransformGroupBuilder::new(
|
||||
"x86_narrow",
|
||||
r#"
|
||||
Legalize instructions by narrowing.
|
||||
|
||||
Use x86-specific instructions if needed."#,
|
||||
)
|
||||
.isa("x86")
|
||||
.chain_with(shared.transform_groups.by_name("narrow_flags").id);
|
||||
|
||||
let mut narrow_avx = TransformGroupBuilder::new(
|
||||
"x86_narrow_avx",
|
||||
r#"
|
||||
Legalize instructions by narrowing with CPU feature checks.
|
||||
|
||||
This special case converts using x86 AVX instructions where available."#,
|
||||
)
|
||||
.isa("x86");
|
||||
// We cannot chain with the x86_narrow group until this group is built, see bottom of this
|
||||
// function for where this is chained.
|
||||
|
||||
let mut widen = TransformGroupBuilder::new(
|
||||
"x86_widen",
|
||||
r#"
|
||||
Legalize instructions by widening.
|
||||
|
||||
Use x86-specific instructions if needed."#,
|
||||
)
|
||||
.isa("x86")
|
||||
.chain_with(shared.transform_groups.by_name("widen").id);
|
||||
|
||||
// List of instructions.
|
||||
let insts = &shared.instructions;
|
||||
let band = insts.by_name("band");
|
||||
let bor = insts.by_name("bor");
|
||||
let clz = insts.by_name("clz");
|
||||
let ctz = insts.by_name("ctz");
|
||||
let fcmp = insts.by_name("fcmp");
|
||||
let fcvt_from_uint = insts.by_name("fcvt_from_uint");
|
||||
let fcvt_to_sint = insts.by_name("fcvt_to_sint");
|
||||
let fcvt_to_uint = insts.by_name("fcvt_to_uint");
|
||||
let fcvt_to_sint_sat = insts.by_name("fcvt_to_sint_sat");
|
||||
let fcvt_to_uint_sat = insts.by_name("fcvt_to_uint_sat");
|
||||
let fmax = insts.by_name("fmax");
|
||||
let fmin = insts.by_name("fmin");
|
||||
let iadd = insts.by_name("iadd");
|
||||
let iconst = insts.by_name("iconst");
|
||||
let imul = insts.by_name("imul");
|
||||
let ineg = insts.by_name("ineg");
|
||||
let isub = insts.by_name("isub");
|
||||
let ishl = insts.by_name("ishl");
|
||||
let ireduce = insts.by_name("ireduce");
|
||||
let popcnt = insts.by_name("popcnt");
|
||||
let sdiv = insts.by_name("sdiv");
|
||||
let selectif = insts.by_name("selectif");
|
||||
let smulhi = insts.by_name("smulhi");
|
||||
let srem = insts.by_name("srem");
|
||||
let tls_value = insts.by_name("tls_value");
|
||||
let udiv = insts.by_name("udiv");
|
||||
let umulhi = insts.by_name("umulhi");
|
||||
let ushr = insts.by_name("ushr");
|
||||
let ushr_imm = insts.by_name("ushr_imm");
|
||||
let urem = insts.by_name("urem");
|
||||
|
||||
let x86_bsf = x86_instructions.by_name("x86_bsf");
|
||||
let x86_bsr = x86_instructions.by_name("x86_bsr");
|
||||
let x86_umulx = x86_instructions.by_name("x86_umulx");
|
||||
let x86_smulx = x86_instructions.by_name("x86_smulx");
|
||||
|
||||
let imm = &shared.imm;
|
||||
|
||||
// Shift by a 64-bit amount is equivalent to a shift by that amount mod 32, so we can reduce
|
||||
// the size of the shift amount. This is useful for x86_32, where an I64 shift amount is
|
||||
// not encodable.
|
||||
let a = var("a");
|
||||
let x = var("x");
|
||||
let y = var("y");
|
||||
let z = var("z");
|
||||
|
||||
for &ty in &[I8, I16, I32] {
|
||||
let ishl_by_i64 = ishl.bind(ty).bind(I64);
|
||||
let ireduce = ireduce.bind(I32);
|
||||
expand.legalize(
|
||||
def!(a = ishl_by_i64(x, y)),
|
||||
vec![def!(z = ireduce(y)), def!(a = ishl(x, z))],
|
||||
);
|
||||
}
|
||||
|
||||
for &ty in &[I8, I16, I32] {
|
||||
let ushr_by_i64 = ushr.bind(ty).bind(I64);
|
||||
let ireduce = ireduce.bind(I32);
|
||||
expand.legalize(
|
||||
def!(a = ushr_by_i64(x, y)),
|
||||
vec![def!(z = ireduce(y)), def!(a = ishl(x, z))],
|
||||
);
|
||||
}
|
||||
|
||||
// Division and remainder.
|
||||
//
|
||||
// The srem expansion requires custom code because srem INT_MIN, -1 is not
|
||||
// allowed to trap. The other ops need to check avoid_div_traps.
|
||||
expand.custom_legalize(sdiv, "expand_sdivrem");
|
||||
expand.custom_legalize(srem, "expand_sdivrem");
|
||||
expand.custom_legalize(udiv, "expand_udivrem");
|
||||
expand.custom_legalize(urem, "expand_udivrem");
|
||||
|
||||
// Double length (widening) multiplication.
|
||||
let a = var("a");
|
||||
let x = var("x");
|
||||
let y = var("y");
|
||||
let a1 = var("a1");
|
||||
let a2 = var("a2");
|
||||
let res_lo = var("res_lo");
|
||||
let res_hi = var("res_hi");
|
||||
|
||||
expand.legalize(
|
||||
def!(res_hi = umulhi(x, y)),
|
||||
vec![def!((res_lo, res_hi) = x86_umulx(x, y))],
|
||||
);
|
||||
|
||||
expand.legalize(
|
||||
def!(res_hi = smulhi(x, y)),
|
||||
vec![def!((res_lo, res_hi) = x86_smulx(x, y))],
|
||||
);
|
||||
|
||||
// Floating point condition codes.
|
||||
//
|
||||
// The 8 condition codes in `supported_floatccs` are directly supported by a
|
||||
// `ucomiss` or `ucomisd` instruction. The remaining codes need legalization
|
||||
// patterns.
|
||||
|
||||
let floatcc_eq = Literal::enumerator_for(&imm.floatcc, "eq");
|
||||
let floatcc_ord = Literal::enumerator_for(&imm.floatcc, "ord");
|
||||
let floatcc_ueq = Literal::enumerator_for(&imm.floatcc, "ueq");
|
||||
let floatcc_ne = Literal::enumerator_for(&imm.floatcc, "ne");
|
||||
let floatcc_uno = Literal::enumerator_for(&imm.floatcc, "uno");
|
||||
let floatcc_one = Literal::enumerator_for(&imm.floatcc, "one");
|
||||
|
||||
// Equality needs an explicit `ord` test which checks the parity bit.
|
||||
expand.legalize(
|
||||
def!(a = fcmp(floatcc_eq, x, y)),
|
||||
vec![
|
||||
def!(a1 = fcmp(floatcc_ord, x, y)),
|
||||
def!(a2 = fcmp(floatcc_ueq, x, y)),
|
||||
def!(a = band(a1, a2)),
|
||||
],
|
||||
);
|
||||
expand.legalize(
|
||||
def!(a = fcmp(floatcc_ne, x, y)),
|
||||
vec![
|
||||
def!(a1 = fcmp(floatcc_uno, x, y)),
|
||||
def!(a2 = fcmp(floatcc_one, x, y)),
|
||||
def!(a = bor(a1, a2)),
|
||||
],
|
||||
);
|
||||
|
||||
let floatcc_lt = &Literal::enumerator_for(&imm.floatcc, "lt");
|
||||
let floatcc_gt = &Literal::enumerator_for(&imm.floatcc, "gt");
|
||||
let floatcc_le = &Literal::enumerator_for(&imm.floatcc, "le");
|
||||
let floatcc_ge = &Literal::enumerator_for(&imm.floatcc, "ge");
|
||||
let floatcc_ugt = &Literal::enumerator_for(&imm.floatcc, "ugt");
|
||||
let floatcc_ult = &Literal::enumerator_for(&imm.floatcc, "ult");
|
||||
let floatcc_uge = &Literal::enumerator_for(&imm.floatcc, "uge");
|
||||
let floatcc_ule = &Literal::enumerator_for(&imm.floatcc, "ule");
|
||||
|
||||
// Inequalities that need to be reversed.
|
||||
for &(cc, rev_cc) in &[
|
||||
(floatcc_lt, floatcc_gt),
|
||||
(floatcc_le, floatcc_ge),
|
||||
(floatcc_ugt, floatcc_ult),
|
||||
(floatcc_uge, floatcc_ule),
|
||||
] {
|
||||
expand.legalize(def!(a = fcmp(cc, x, y)), vec![def!(a = fcmp(rev_cc, y, x))]);
|
||||
}
|
||||
|
||||
// We need to modify the CFG for min/max legalization.
|
||||
expand.custom_legalize(fmin, "expand_minmax");
|
||||
expand.custom_legalize(fmax, "expand_minmax");
|
||||
|
||||
// Conversions from unsigned need special handling.
|
||||
expand.custom_legalize(fcvt_from_uint, "expand_fcvt_from_uint");
|
||||
// Conversions from float to int can trap and modify the control flow graph.
|
||||
expand.custom_legalize(fcvt_to_sint, "expand_fcvt_to_sint");
|
||||
expand.custom_legalize(fcvt_to_uint, "expand_fcvt_to_uint");
|
||||
expand.custom_legalize(fcvt_to_sint_sat, "expand_fcvt_to_sint_sat");
|
||||
expand.custom_legalize(fcvt_to_uint_sat, "expand_fcvt_to_uint_sat");
|
||||
|
||||
// Count leading and trailing zeroes, for baseline x86_64
|
||||
let c_minus_one = var("c_minus_one");
|
||||
let c_thirty_one = var("c_thirty_one");
|
||||
let c_thirty_two = var("c_thirty_two");
|
||||
let c_sixty_three = var("c_sixty_three");
|
||||
let c_sixty_four = var("c_sixty_four");
|
||||
let index1 = var("index1");
|
||||
let r2flags = var("r2flags");
|
||||
let index2 = var("index2");
|
||||
|
||||
let intcc_eq = Literal::enumerator_for(&imm.intcc, "eq");
|
||||
let imm64_minus_one = Literal::constant(&imm.imm64, -1);
|
||||
let imm64_63 = Literal::constant(&imm.imm64, 63);
|
||||
expand.legalize(
|
||||
def!(a = clz.I64(x)),
|
||||
vec![
|
||||
def!(c_minus_one = iconst(imm64_minus_one)),
|
||||
def!(c_sixty_three = iconst(imm64_63)),
|
||||
def!((index1, r2flags) = x86_bsr(x)),
|
||||
def!(index2 = selectif(intcc_eq, r2flags, c_minus_one, index1)),
|
||||
def!(a = isub(c_sixty_three, index2)),
|
||||
],
|
||||
);
|
||||
|
||||
let imm64_31 = Literal::constant(&imm.imm64, 31);
|
||||
expand.legalize(
|
||||
def!(a = clz.I32(x)),
|
||||
vec![
|
||||
def!(c_minus_one = iconst(imm64_minus_one)),
|
||||
def!(c_thirty_one = iconst(imm64_31)),
|
||||
def!((index1, r2flags) = x86_bsr(x)),
|
||||
def!(index2 = selectif(intcc_eq, r2flags, c_minus_one, index1)),
|
||||
def!(a = isub(c_thirty_one, index2)),
|
||||
],
|
||||
);
|
||||
|
||||
let imm64_64 = Literal::constant(&imm.imm64, 64);
|
||||
expand.legalize(
|
||||
def!(a = ctz.I64(x)),
|
||||
vec![
|
||||
def!(c_sixty_four = iconst(imm64_64)),
|
||||
def!((index1, r2flags) = x86_bsf(x)),
|
||||
def!(a = selectif(intcc_eq, r2flags, c_sixty_four, index1)),
|
||||
],
|
||||
);
|
||||
|
||||
let imm64_32 = Literal::constant(&imm.imm64, 32);
|
||||
expand.legalize(
|
||||
def!(a = ctz.I32(x)),
|
||||
vec![
|
||||
def!(c_thirty_two = iconst(imm64_32)),
|
||||
def!((index1, r2flags) = x86_bsf(x)),
|
||||
def!(a = selectif(intcc_eq, r2flags, c_thirty_two, index1)),
|
||||
],
|
||||
);
|
||||
|
||||
// Population count for baseline x86_64
|
||||
let x = var("x");
|
||||
let r = var("r");
|
||||
|
||||
let qv3 = var("qv3");
|
||||
let qv4 = var("qv4");
|
||||
let qv5 = var("qv5");
|
||||
let qv6 = var("qv6");
|
||||
let qv7 = var("qv7");
|
||||
let qv8 = var("qv8");
|
||||
let qv9 = var("qv9");
|
||||
let qv10 = var("qv10");
|
||||
let qv11 = var("qv11");
|
||||
let qv12 = var("qv12");
|
||||
let qv13 = var("qv13");
|
||||
let qv14 = var("qv14");
|
||||
let qv15 = var("qv15");
|
||||
let qc77 = var("qc77");
|
||||
#[allow(non_snake_case)]
|
||||
let qc0F = var("qc0F");
|
||||
let qc01 = var("qc01");
|
||||
|
||||
let imm64_1 = Literal::constant(&imm.imm64, 1);
|
||||
let imm64_4 = Literal::constant(&imm.imm64, 4);
|
||||
expand.legalize(
|
||||
def!(r = popcnt.I64(x)),
|
||||
vec![
|
||||
def!(qv3 = ushr_imm(x, imm64_1)),
|
||||
def!(qc77 = iconst(Literal::constant(&imm.imm64, 0x7777_7777_7777_7777))),
|
||||
def!(qv4 = band(qv3, qc77)),
|
||||
def!(qv5 = isub(x, qv4)),
|
||||
def!(qv6 = ushr_imm(qv4, imm64_1)),
|
||||
def!(qv7 = band(qv6, qc77)),
|
||||
def!(qv8 = isub(qv5, qv7)),
|
||||
def!(qv9 = ushr_imm(qv7, imm64_1)),
|
||||
def!(qv10 = band(qv9, qc77)),
|
||||
def!(qv11 = isub(qv8, qv10)),
|
||||
def!(qv12 = ushr_imm(qv11, imm64_4)),
|
||||
def!(qv13 = iadd(qv11, qv12)),
|
||||
def!(qc0F = iconst(Literal::constant(&imm.imm64, 0x0F0F_0F0F_0F0F_0F0F))),
|
||||
def!(qv14 = band(qv13, qc0F)),
|
||||
def!(qc01 = iconst(Literal::constant(&imm.imm64, 0x0101_0101_0101_0101))),
|
||||
def!(qv15 = imul(qv14, qc01)),
|
||||
def!(r = ushr_imm(qv15, Literal::constant(&imm.imm64, 56))),
|
||||
],
|
||||
);
|
||||
|
||||
let lv3 = var("lv3");
|
||||
let lv4 = var("lv4");
|
||||
let lv5 = var("lv5");
|
||||
let lv6 = var("lv6");
|
||||
let lv7 = var("lv7");
|
||||
let lv8 = var("lv8");
|
||||
let lv9 = var("lv9");
|
||||
let lv10 = var("lv10");
|
||||
let lv11 = var("lv11");
|
||||
let lv12 = var("lv12");
|
||||
let lv13 = var("lv13");
|
||||
let lv14 = var("lv14");
|
||||
let lv15 = var("lv15");
|
||||
let lc77 = var("lc77");
|
||||
#[allow(non_snake_case)]
|
||||
let lc0F = var("lc0F");
|
||||
let lc01 = var("lc01");
|
||||
|
||||
expand.legalize(
|
||||
def!(r = popcnt.I32(x)),
|
||||
vec![
|
||||
def!(lv3 = ushr_imm(x, imm64_1)),
|
||||
def!(lc77 = iconst(Literal::constant(&imm.imm64, 0x7777_7777))),
|
||||
def!(lv4 = band(lv3, lc77)),
|
||||
def!(lv5 = isub(x, lv4)),
|
||||
def!(lv6 = ushr_imm(lv4, imm64_1)),
|
||||
def!(lv7 = band(lv6, lc77)),
|
||||
def!(lv8 = isub(lv5, lv7)),
|
||||
def!(lv9 = ushr_imm(lv7, imm64_1)),
|
||||
def!(lv10 = band(lv9, lc77)),
|
||||
def!(lv11 = isub(lv8, lv10)),
|
||||
def!(lv12 = ushr_imm(lv11, imm64_4)),
|
||||
def!(lv13 = iadd(lv11, lv12)),
|
||||
def!(lc0F = iconst(Literal::constant(&imm.imm64, 0x0F0F_0F0F))),
|
||||
def!(lv14 = band(lv13, lc0F)),
|
||||
def!(lc01 = iconst(Literal::constant(&imm.imm64, 0x0101_0101))),
|
||||
def!(lv15 = imul(lv14, lc01)),
|
||||
def!(r = ushr_imm(lv15, Literal::constant(&imm.imm64, 24))),
|
||||
],
|
||||
);
|
||||
|
||||
expand.custom_legalize(ineg, "convert_ineg");
|
||||
expand.custom_legalize(tls_value, "expand_tls_value");
|
||||
widen.custom_legalize(ineg, "convert_ineg");
|
||||
|
||||
// To reduce compilation times, separate out large blocks of legalizations by theme.
|
||||
define_simd(shared, x86_instructions, &mut narrow, &mut narrow_avx);
|
||||
|
||||
expand.build_and_add_to(&mut shared.transform_groups);
|
||||
let narrow_id = narrow.build_and_add_to(&mut shared.transform_groups);
|
||||
narrow_avx
|
||||
.chain_with(narrow_id)
|
||||
.build_and_add_to(&mut shared.transform_groups);
|
||||
widen.build_and_add_to(&mut shared.transform_groups);
|
||||
}
|
||||
|
||||
fn define_simd(
|
||||
shared: &mut SharedDefinitions,
|
||||
x86_instructions: &InstructionGroup,
|
||||
narrow: &mut TransformGroupBuilder,
|
||||
narrow_avx: &mut TransformGroupBuilder,
|
||||
) {
|
||||
let insts = &shared.instructions;
|
||||
let band = insts.by_name("band");
|
||||
let band_not = insts.by_name("band_not");
|
||||
let bitcast = insts.by_name("bitcast");
|
||||
let bitselect = insts.by_name("bitselect");
|
||||
let bor = insts.by_name("bor");
|
||||
let bnot = insts.by_name("bnot");
|
||||
let bxor = insts.by_name("bxor");
|
||||
let extractlane = insts.by_name("extractlane");
|
||||
let fabs = insts.by_name("fabs");
|
||||
let fcmp = insts.by_name("fcmp");
|
||||
let fcvt_from_uint = insts.by_name("fcvt_from_uint");
|
||||
let fcvt_to_sint_sat = insts.by_name("fcvt_to_sint_sat");
|
||||
let fcvt_to_uint_sat = insts.by_name("fcvt_to_uint_sat");
|
||||
let fmax = insts.by_name("fmax");
|
||||
let fmin = insts.by_name("fmin");
|
||||
let fneg = insts.by_name("fneg");
|
||||
let iadd_imm = insts.by_name("iadd_imm");
|
||||
let icmp = insts.by_name("icmp");
|
||||
let imax = insts.by_name("imax");
|
||||
let imin = insts.by_name("imin");
|
||||
let imul = insts.by_name("imul");
|
||||
let ineg = insts.by_name("ineg");
|
||||
let insertlane = insts.by_name("insertlane");
|
||||
let ishl = insts.by_name("ishl");
|
||||
let ishl_imm = insts.by_name("ishl_imm");
|
||||
let raw_bitcast = insts.by_name("raw_bitcast");
|
||||
let scalar_to_vector = insts.by_name("scalar_to_vector");
|
||||
let splat = insts.by_name("splat");
|
||||
let shuffle = insts.by_name("shuffle");
|
||||
let sshr = insts.by_name("sshr");
|
||||
let swizzle = insts.by_name("swizzle");
|
||||
let trueif = insts.by_name("trueif");
|
||||
let uadd_sat = insts.by_name("uadd_sat");
|
||||
let umax = insts.by_name("umax");
|
||||
let umin = insts.by_name("umin");
|
||||
let snarrow = insts.by_name("snarrow");
|
||||
let swiden_high = insts.by_name("swiden_high");
|
||||
let swiden_low = insts.by_name("swiden_low");
|
||||
let ushr_imm = insts.by_name("ushr_imm");
|
||||
let ushr = insts.by_name("ushr");
|
||||
let uwiden_high = insts.by_name("uwiden_high");
|
||||
let uwiden_low = insts.by_name("uwiden_low");
|
||||
let vconst = insts.by_name("vconst");
|
||||
let vall_true = insts.by_name("vall_true");
|
||||
let vany_true = insts.by_name("vany_true");
|
||||
let vselect = insts.by_name("vselect");
|
||||
|
||||
let x86_palignr = x86_instructions.by_name("x86_palignr");
|
||||
let x86_pmaxs = x86_instructions.by_name("x86_pmaxs");
|
||||
let x86_pmaxu = x86_instructions.by_name("x86_pmaxu");
|
||||
let x86_pmins = x86_instructions.by_name("x86_pmins");
|
||||
let x86_pminu = x86_instructions.by_name("x86_pminu");
|
||||
let x86_pshufb = x86_instructions.by_name("x86_pshufb");
|
||||
let x86_pshufd = x86_instructions.by_name("x86_pshufd");
|
||||
let x86_psra = x86_instructions.by_name("x86_psra");
|
||||
let x86_ptest = x86_instructions.by_name("x86_ptest");
|
||||
let x86_punpckh = x86_instructions.by_name("x86_punpckh");
|
||||
let x86_punpckl = x86_instructions.by_name("x86_punpckl");
|
||||
|
||||
let imm = &shared.imm;
|
||||
|
||||
// Set up variables and immediates.
|
||||
let uimm8_zero = Literal::constant(&imm.uimm8, 0x00);
|
||||
let uimm8_one = Literal::constant(&imm.uimm8, 0x01);
|
||||
let uimm8_eight = Literal::constant(&imm.uimm8, 8);
|
||||
let u128_zeroes = constant(vec![0x00; 16]);
|
||||
let u128_ones = constant(vec![0xff; 16]);
|
||||
let u128_seventies = constant(vec![0x70; 16]);
|
||||
let a = var("a");
|
||||
let b = var("b");
|
||||
let c = var("c");
|
||||
let d = var("d");
|
||||
let e = var("e");
|
||||
let f = var("f");
|
||||
let g = var("g");
|
||||
let h = var("h");
|
||||
let x = var("x");
|
||||
let y = var("y");
|
||||
let z = var("z");
|
||||
|
||||
// Limit the SIMD vector size: eventually multiple vector sizes may be supported
|
||||
// but for now only SSE-sized vectors are available.
|
||||
let sse_vector_size: u64 = 128;
|
||||
let allowed_simd_type = |t: &LaneType| t.lane_bits() >= 8 && t.lane_bits() < 128;
|
||||
|
||||
// SIMD splat: 8-bits
|
||||
for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 8) {
|
||||
let splat_any8x16 = splat.bind(vector(ty, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(y = splat_any8x16(x)),
|
||||
vec![
|
||||
// Move into the lowest 8 bits of an XMM register.
|
||||
def!(a = scalar_to_vector(x)),
|
||||
// Zero out a different XMM register; the shuffle mask for moving the lowest byte
|
||||
// to all other byte lanes is 0x0.
|
||||
def!(b = vconst(u128_zeroes)),
|
||||
// PSHUFB takes two XMM operands, one of which is a shuffle mask (i.e. b).
|
||||
def!(y = x86_pshufb(a, b)),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
// SIMD splat: 16-bits
|
||||
for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 16) {
|
||||
let splat_x16x8 = splat.bind(vector(ty, sse_vector_size));
|
||||
let raw_bitcast_any16x8_to_i32x4 = raw_bitcast
|
||||
.bind(vector(I32, sse_vector_size))
|
||||
.bind(vector(ty, sse_vector_size));
|
||||
let raw_bitcast_i32x4_to_any16x8 = raw_bitcast
|
||||
.bind(vector(ty, sse_vector_size))
|
||||
.bind(vector(I32, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(y = splat_x16x8(x)),
|
||||
vec![
|
||||
// Move into the lowest 16 bits of an XMM register.
|
||||
def!(a = scalar_to_vector(x)),
|
||||
// Insert the value again but in the next lowest 16 bits.
|
||||
def!(b = insertlane(a, x, uimm8_one)),
|
||||
// No instruction emitted; pretend this is an I32x4 so we can use PSHUFD.
|
||||
def!(c = raw_bitcast_any16x8_to_i32x4(b)),
|
||||
// Broadcast the bytes in the XMM register with PSHUFD.
|
||||
def!(d = x86_pshufd(c, uimm8_zero)),
|
||||
// No instruction emitted; pretend this is an X16x8 again.
|
||||
def!(y = raw_bitcast_i32x4_to_any16x8(d)),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
// SIMD splat: 32-bits
|
||||
for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 32) {
|
||||
let splat_any32x4 = splat.bind(vector(ty, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(y = splat_any32x4(x)),
|
||||
vec![
|
||||
// Translate to an x86 MOV to get the value in an XMM register.
|
||||
def!(a = scalar_to_vector(x)),
|
||||
// Broadcast the bytes in the XMM register with PSHUFD.
|
||||
def!(y = x86_pshufd(a, uimm8_zero)),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
// SIMD splat: 64-bits
|
||||
for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 64) {
|
||||
let splat_any64x2 = splat.bind(vector(ty, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(y = splat_any64x2(x)),
|
||||
vec![
|
||||
// Move into the lowest 64 bits of an XMM register.
|
||||
def!(a = scalar_to_vector(x)),
|
||||
// Move into the highest 64 bits of the same XMM register.
|
||||
def!(y = insertlane(a, x, uimm8_one)),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
// SIMD swizzle; the following inefficient implementation is due to the Wasm SIMD spec requiring
|
||||
// mask indexes greater than 15 to have the same semantics as a 0 index. For the spec discussion,
|
||||
// see https://github.com/WebAssembly/simd/issues/93.
|
||||
{
|
||||
let swizzle = swizzle.bind(vector(I8, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(a = swizzle(x, y)),
|
||||
vec![
|
||||
def!(b = vconst(u128_seventies)),
|
||||
def!(c = uadd_sat(y, b)),
|
||||
def!(a = x86_pshufb(x, c)),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
// SIMD bnot
|
||||
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||
let bnot = bnot.bind(vector(ty, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(y = bnot(x)),
|
||||
vec![def!(a = vconst(u128_ones)), def!(y = bxor(a, x))],
|
||||
);
|
||||
}
|
||||
|
||||
// SIMD shift right (arithmetic, i16x8 and i32x4)
|
||||
for ty in &[I16, I32] {
|
||||
let sshr = sshr.bind(vector(*ty, sse_vector_size));
|
||||
let bitcast_i64x2 = bitcast.bind(vector(I64, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(a = sshr(x, y)),
|
||||
vec![def!(b = bitcast_i64x2(y)), def!(a = x86_psra(x, b))],
|
||||
);
|
||||
}
|
||||
// SIMD shift right (arithmetic, i8x16)
|
||||
{
|
||||
let sshr = sshr.bind(vector(I8, sse_vector_size));
|
||||
let bitcast_i64x2 = bitcast.bind(vector(I64, sse_vector_size));
|
||||
let raw_bitcast_i16x8 = raw_bitcast.bind(vector(I16, sse_vector_size));
|
||||
let raw_bitcast_i16x8_again = raw_bitcast.bind(vector(I16, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(z = sshr(x, y)),
|
||||
vec![
|
||||
// Since we will use the high byte of each 16x8 lane, shift an extra 8 bits.
|
||||
def!(a = iadd_imm(y, uimm8_eight)),
|
||||
def!(b = bitcast_i64x2(a)),
|
||||
// Take the low 8 bytes of x, duplicate them in 16x8 lanes, then shift right.
|
||||
def!(c = x86_punpckl(x, x)),
|
||||
def!(d = raw_bitcast_i16x8(c)),
|
||||
def!(e = x86_psra(d, b)),
|
||||
// Take the high 8 bytes of x, duplicate them in 16x8 lanes, then shift right.
|
||||
def!(f = x86_punpckh(x, x)),
|
||||
def!(g = raw_bitcast_i16x8_again(f)),
|
||||
def!(h = x86_psra(g, b)),
|
||||
// Re-pack the vector.
|
||||
def!(z = snarrow(e, h)),
|
||||
],
|
||||
);
|
||||
}
|
||||
// SIMD shift right (arithmetic, i64x2)
|
||||
{
|
||||
let sshr_vector = sshr.bind(vector(I64, sse_vector_size));
|
||||
let sshr_scalar_lane0 = sshr.bind(I64);
|
||||
let sshr_scalar_lane1 = sshr.bind(I64);
|
||||
narrow.legalize(
|
||||
def!(z = sshr_vector(x, y)),
|
||||
vec![
|
||||
// Use scalar operations to shift the first lane.
|
||||
def!(a = extractlane(x, uimm8_zero)),
|
||||
def!(b = sshr_scalar_lane0(a, y)),
|
||||
def!(c = insertlane(x, b, uimm8_zero)),
|
||||
// Do the same for the second lane.
|
||||
def!(d = extractlane(x, uimm8_one)),
|
||||
def!(e = sshr_scalar_lane1(d, y)),
|
||||
def!(z = insertlane(c, e, uimm8_one)),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
// SIMD select
|
||||
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||
let bitselect = bitselect.bind(vector(ty, sse_vector_size)); // must bind both x/y and c
|
||||
narrow.legalize(
|
||||
def!(d = bitselect(c, x, y)),
|
||||
vec![
|
||||
def!(a = band(x, c)),
|
||||
def!(b = band_not(y, c)),
|
||||
def!(d = bor(a, b)),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
// SIMD vselect; replace with bitselect if BLEND* instructions are not available.
|
||||
// This works, because each lane of boolean vector is filled with zeroes or ones.
|
||||
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||
let vselect = vselect.bind(vector(ty, sse_vector_size));
|
||||
let raw_bitcast = raw_bitcast.bind(vector(ty, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(d = vselect(c, x, y)),
|
||||
vec![def!(a = raw_bitcast(c)), def!(d = bitselect(a, x, y))],
|
||||
);
|
||||
}
|
||||
|
||||
// SIMD vany_true
|
||||
let ne = Literal::enumerator_for(&imm.intcc, "ne");
|
||||
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||
let vany_true = vany_true.bind(vector(ty, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(y = vany_true(x)),
|
||||
vec![def!(a = x86_ptest(x, x)), def!(y = trueif(ne, a))],
|
||||
);
|
||||
}
|
||||
|
||||
// SIMD vall_true
|
||||
let eq = Literal::enumerator_for(&imm.intcc, "eq");
|
||||
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||
let vall_true = vall_true.bind(vector(ty, sse_vector_size));
|
||||
if ty.is_int() {
|
||||
// In the common case (Wasm's integer-only all_true), we do not require a
|
||||
// bitcast.
|
||||
narrow.legalize(
|
||||
def!(y = vall_true(x)),
|
||||
vec![
|
||||
def!(a = vconst(u128_zeroes)),
|
||||
def!(c = icmp(eq, x, a)),
|
||||
def!(d = x86_ptest(c, c)),
|
||||
def!(y = trueif(eq, d)),
|
||||
],
|
||||
);
|
||||
} else {
|
||||
// However, to support other types we must bitcast them to an integer vector to
|
||||
// use icmp.
|
||||
let lane_type_as_int = LaneType::int_from_bits(ty.lane_bits() as u16);
|
||||
let raw_bitcast_to_int = raw_bitcast.bind(vector(lane_type_as_int, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(y = vall_true(x)),
|
||||
vec![
|
||||
def!(a = vconst(u128_zeroes)),
|
||||
def!(b = raw_bitcast_to_int(x)),
|
||||
def!(c = icmp(eq, b, a)),
|
||||
def!(d = x86_ptest(c, c)),
|
||||
def!(y = trueif(eq, d)),
|
||||
],
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// SIMD icmp ne
|
||||
let ne = Literal::enumerator_for(&imm.intcc, "ne");
|
||||
for ty in ValueType::all_lane_types().filter(|ty| allowed_simd_type(ty) && ty.is_int()) {
|
||||
let icmp_ = icmp.bind(vector(ty, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(c = icmp_(ne, a, b)),
|
||||
vec![def!(x = icmp(eq, a, b)), def!(c = bnot(x))],
|
||||
);
|
||||
}
|
||||
|
||||
// SIMD icmp greater-/less-than
|
||||
let sgt = Literal::enumerator_for(&imm.intcc, "sgt");
|
||||
let ugt = Literal::enumerator_for(&imm.intcc, "ugt");
|
||||
let sge = Literal::enumerator_for(&imm.intcc, "sge");
|
||||
let uge = Literal::enumerator_for(&imm.intcc, "uge");
|
||||
let slt = Literal::enumerator_for(&imm.intcc, "slt");
|
||||
let ult = Literal::enumerator_for(&imm.intcc, "ult");
|
||||
let sle = Literal::enumerator_for(&imm.intcc, "sle");
|
||||
let ule = Literal::enumerator_for(&imm.intcc, "ule");
|
||||
for ty in &[I8, I16, I32] {
|
||||
// greater-than
|
||||
let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(c = icmp_(ugt, a, b)),
|
||||
vec![
|
||||
def!(x = x86_pmaxu(a, b)),
|
||||
def!(y = icmp(eq, x, b)),
|
||||
def!(c = bnot(y)),
|
||||
],
|
||||
);
|
||||
let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(c = icmp_(sge, a, b)),
|
||||
vec![def!(x = x86_pmins(a, b)), def!(c = icmp(eq, x, b))],
|
||||
);
|
||||
let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(c = icmp_(uge, a, b)),
|
||||
vec![def!(x = x86_pminu(a, b)), def!(c = icmp(eq, x, b))],
|
||||
);
|
||||
|
||||
// less-than
|
||||
let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(def!(c = icmp_(slt, a, b)), vec![def!(c = icmp(sgt, b, a))]);
|
||||
let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(def!(c = icmp_(ult, a, b)), vec![def!(c = icmp(ugt, b, a))]);
|
||||
let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(def!(c = icmp_(sle, a, b)), vec![def!(c = icmp(sge, b, a))]);
|
||||
let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(def!(c = icmp_(ule, a, b)), vec![def!(c = icmp(uge, b, a))]);
|
||||
}
|
||||
|
||||
// SIMD integer min/max
|
||||
for ty in &[I8, I16, I32] {
|
||||
let imin = imin.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(def!(c = imin(a, b)), vec![def!(c = x86_pmins(a, b))]);
|
||||
let umin = umin.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(def!(c = umin(a, b)), vec![def!(c = x86_pminu(a, b))]);
|
||||
let imax = imax.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(def!(c = imax(a, b)), vec![def!(c = x86_pmaxs(a, b))]);
|
||||
let umax = umax.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(def!(c = umax(a, b)), vec![def!(c = x86_pmaxu(a, b))]);
|
||||
}
|
||||
|
||||
// SIMD fcmp greater-/less-than
|
||||
let gt = Literal::enumerator_for(&imm.floatcc, "gt");
|
||||
let lt = Literal::enumerator_for(&imm.floatcc, "lt");
|
||||
let ge = Literal::enumerator_for(&imm.floatcc, "ge");
|
||||
let le = Literal::enumerator_for(&imm.floatcc, "le");
|
||||
let ugt = Literal::enumerator_for(&imm.floatcc, "ugt");
|
||||
let ult = Literal::enumerator_for(&imm.floatcc, "ult");
|
||||
let uge = Literal::enumerator_for(&imm.floatcc, "uge");
|
||||
let ule = Literal::enumerator_for(&imm.floatcc, "ule");
|
||||
for ty in &[F32, F64] {
|
||||
let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(def!(c = fcmp_(gt, a, b)), vec![def!(c = fcmp(lt, b, a))]);
|
||||
let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(def!(c = fcmp_(ge, a, b)), vec![def!(c = fcmp(le, b, a))]);
|
||||
let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(def!(c = fcmp_(ult, a, b)), vec![def!(c = fcmp(ugt, b, a))]);
|
||||
let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(def!(c = fcmp_(ule, a, b)), vec![def!(c = fcmp(uge, b, a))]);
|
||||
}
|
||||
|
||||
for ty in &[F32, F64] {
|
||||
let fneg = fneg.bind(vector(*ty, sse_vector_size));
|
||||
let lane_type_as_int = LaneType::int_from_bits(LaneType::from(*ty).lane_bits() as u16);
|
||||
let uimm8_shift = Literal::constant(&imm.uimm8, lane_type_as_int.lane_bits() as i64 - 1);
|
||||
let vconst = vconst.bind(vector(lane_type_as_int, sse_vector_size));
|
||||
let bitcast_to_float = raw_bitcast.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(b = fneg(a)),
|
||||
vec![
|
||||
def!(c = vconst(u128_ones)),
|
||||
def!(d = ishl_imm(c, uimm8_shift)), // Create a mask of all 0s except the MSB.
|
||||
def!(e = bitcast_to_float(d)), // Cast mask to the floating-point type.
|
||||
def!(b = bxor(a, e)), // Flip the MSB.
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
// SIMD fabs
|
||||
for ty in &[F32, F64] {
|
||||
let fabs = fabs.bind(vector(*ty, sse_vector_size));
|
||||
let lane_type_as_int = LaneType::int_from_bits(LaneType::from(*ty).lane_bits() as u16);
|
||||
let vconst = vconst.bind(vector(lane_type_as_int, sse_vector_size));
|
||||
let bitcast_to_float = raw_bitcast.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(b = fabs(a)),
|
||||
vec![
|
||||
def!(c = vconst(u128_ones)),
|
||||
def!(d = ushr_imm(c, uimm8_one)), // Create a mask of all 1s except the MSB.
|
||||
def!(e = bitcast_to_float(d)), // Cast mask to the floating-point type.
|
||||
def!(b = band(a, e)), // Unset the MSB.
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
// SIMD widen
|
||||
for ty in &[I8, I16] {
|
||||
let swiden_high = swiden_high.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(b = swiden_high(a)),
|
||||
vec![
|
||||
def!(c = x86_palignr(a, a, uimm8_eight)),
|
||||
def!(b = swiden_low(c)),
|
||||
],
|
||||
);
|
||||
let uwiden_high = uwiden_high.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(b = uwiden_high(a)),
|
||||
vec![
|
||||
def!(c = x86_palignr(a, a, uimm8_eight)),
|
||||
def!(b = uwiden_low(c)),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
narrow.custom_legalize(shuffle, "convert_shuffle");
|
||||
narrow.custom_legalize(extractlane, "convert_extractlane");
|
||||
narrow.custom_legalize(insertlane, "convert_insertlane");
|
||||
narrow.custom_legalize(ineg, "convert_ineg");
|
||||
narrow.custom_legalize(ushr, "convert_ushr");
|
||||
narrow.custom_legalize(ishl, "convert_ishl");
|
||||
narrow.custom_legalize(fcvt_to_sint_sat, "expand_fcvt_to_sint_sat_vector");
|
||||
narrow.custom_legalize(fmin, "expand_minmax_vector");
|
||||
narrow.custom_legalize(fmax, "expand_minmax_vector");
|
||||
|
||||
narrow_avx.custom_legalize(imul, "convert_i64x2_imul");
|
||||
narrow_avx.custom_legalize(fcvt_from_uint, "expand_fcvt_from_uint_vector");
|
||||
narrow_avx.custom_legalize(fcvt_to_uint_sat, "expand_fcvt_to_uint_sat_vector");
|
||||
}
|
||||
@@ -1,87 +1,25 @@
|
||||
use crate::cdsl::cpu_modes::CpuMode;
|
||||
use crate::cdsl::instructions::{InstructionGroupBuilder, InstructionPredicateMap};
|
||||
use crate::cdsl::isa::TargetIsa;
|
||||
use crate::cdsl::types::{ReferenceType, VectorType};
|
||||
use crate::cdsl::recipes::Recipes;
|
||||
use crate::cdsl::regs::IsaRegsBuilder;
|
||||
|
||||
use crate::shared::types::Bool::B1;
|
||||
use crate::shared::types::Float::{F32, F64};
|
||||
use crate::shared::types::Int::{I16, I32, I64, I8};
|
||||
use crate::shared::types::Reference::{R32, R64};
|
||||
use crate::shared::Definitions as SharedDefinitions;
|
||||
|
||||
mod encodings;
|
||||
mod instructions;
|
||||
mod legalize;
|
||||
mod opcodes;
|
||||
mod recipes;
|
||||
mod registers;
|
||||
pub(crate) mod settings;
|
||||
|
||||
pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
|
||||
let settings = settings::define(&shared_defs.settings);
|
||||
let regs = registers::define();
|
||||
|
||||
let inst_group = instructions::define(
|
||||
&mut shared_defs.all_instructions,
|
||||
&shared_defs.formats,
|
||||
&shared_defs.imm,
|
||||
&shared_defs.entities,
|
||||
);
|
||||
legalize::define(shared_defs, &inst_group);
|
||||
let inst_group = InstructionGroupBuilder::new(&mut shared_defs.all_instructions).build();
|
||||
|
||||
// CPU modes for 32-bit and 64-bit operations.
|
||||
let mut x86_64 = CpuMode::new("I64");
|
||||
let mut x86_32 = CpuMode::new("I32");
|
||||
|
||||
let expand_flags = shared_defs.transform_groups.by_name("expand_flags");
|
||||
let x86_widen = shared_defs.transform_groups.by_name("x86_widen");
|
||||
let x86_narrow = shared_defs.transform_groups.by_name("x86_narrow");
|
||||
let x86_narrow_avx = shared_defs.transform_groups.by_name("x86_narrow_avx");
|
||||
let x86_expand = shared_defs.transform_groups.by_name("x86_expand");
|
||||
|
||||
x86_32.legalize_monomorphic(expand_flags);
|
||||
x86_32.legalize_default(x86_narrow);
|
||||
x86_32.legalize_type(B1, expand_flags);
|
||||
x86_32.legalize_type(I8, x86_widen);
|
||||
x86_32.legalize_type(I16, x86_widen);
|
||||
x86_32.legalize_type(I32, x86_expand);
|
||||
x86_32.legalize_value_type(ReferenceType(R32), x86_expand);
|
||||
x86_32.legalize_type(F32, x86_expand);
|
||||
x86_32.legalize_type(F64, x86_expand);
|
||||
x86_32.legalize_value_type(VectorType::new(I32.into(), 4), x86_narrow_avx);
|
||||
x86_32.legalize_value_type(VectorType::new(I64.into(), 2), x86_narrow_avx);
|
||||
x86_32.legalize_value_type(VectorType::new(F32.into(), 4), x86_narrow_avx);
|
||||
|
||||
x86_64.legalize_monomorphic(expand_flags);
|
||||
x86_64.legalize_default(x86_narrow);
|
||||
x86_64.legalize_type(B1, expand_flags);
|
||||
x86_64.legalize_type(I8, x86_widen);
|
||||
x86_64.legalize_type(I16, x86_widen);
|
||||
x86_64.legalize_type(I32, x86_expand);
|
||||
x86_64.legalize_type(I64, x86_expand);
|
||||
x86_64.legalize_value_type(ReferenceType(R64), x86_expand);
|
||||
x86_64.legalize_type(F32, x86_expand);
|
||||
x86_64.legalize_type(F64, x86_expand);
|
||||
x86_64.legalize_value_type(VectorType::new(I32.into(), 4), x86_narrow_avx);
|
||||
x86_64.legalize_value_type(VectorType::new(I64.into(), 2), x86_narrow_avx);
|
||||
x86_64.legalize_value_type(VectorType::new(F32.into(), 4), x86_narrow_avx);
|
||||
|
||||
let recipes = recipes::define(shared_defs, &settings, ®s);
|
||||
|
||||
let encodings = encodings::define(shared_defs, &settings, &inst_group, &recipes);
|
||||
x86_32.set_encodings(encodings.enc32);
|
||||
x86_64.set_encodings(encodings.enc64);
|
||||
let encodings_predicates = encodings.inst_pred_reg.extract();
|
||||
|
||||
let recipes = encodings.recipes;
|
||||
|
||||
let cpu_modes = vec![x86_64, x86_32];
|
||||
let cpu_modes = vec![];
|
||||
|
||||
TargetIsa::new(
|
||||
"x86",
|
||||
settings,
|
||||
regs,
|
||||
recipes,
|
||||
IsaRegsBuilder::new().build(),
|
||||
Recipes::new(),
|
||||
cpu_modes,
|
||||
encodings_predicates,
|
||||
InstructionPredicateMap::new(),
|
||||
)
|
||||
}
|
||||
|
||||
@@ -1,721 +0,0 @@
|
||||
//! Static, named definitions of instruction opcodes.
|
||||
|
||||
/// Empty opcode for use as a default.
|
||||
pub static EMPTY: [u8; 0] = [];
|
||||
|
||||
/// Add with carry flag r{16,32,64} to r/m of the same size.
|
||||
pub static ADC: [u8; 1] = [0x11];
|
||||
|
||||
/// Add r{16,32,64} to r/m of the same size.
|
||||
pub static ADD: [u8; 1] = [0x01];
|
||||
|
||||
/// Add imm{16,32} to r/m{16,32,64}, possibly sign-extended.
|
||||
pub static ADD_IMM: [u8; 1] = [0x81];
|
||||
|
||||
/// Add sign-extended imm8 to r/m{16,32,64}.
|
||||
pub static ADD_IMM8_SIGN_EXTEND: [u8; 1] = [0x83];
|
||||
|
||||
/// Add packed double-precision floating-point values from xmm2/mem to xmm1 and store result in
|
||||
/// xmm1 (SSE2).
|
||||
pub static ADDPD: [u8; 3] = [0x66, 0x0f, 0x58];
|
||||
|
||||
/// Add packed single-precision floating-point values from xmm2/mem to xmm1 and store result in
|
||||
/// xmm1 (SSE).
|
||||
pub static ADDPS: [u8; 2] = [0x0f, 0x58];
|
||||
|
||||
/// Add the low double-precision floating-point value from xmm2/mem to xmm1
|
||||
/// and store the result in xmm1.
|
||||
pub static ADDSD: [u8; 3] = [0xf2, 0x0f, 0x58];
|
||||
|
||||
/// Add the low single-precision floating-point value from xmm2/mem to xmm1
|
||||
/// and store the result in xmm1.
|
||||
pub static ADDSS: [u8; 3] = [0xf3, 0x0f, 0x58];
|
||||
|
||||
/// r/m{16,32,64} AND register of the same size (Intel docs have a typo).
|
||||
pub static AND: [u8; 1] = [0x21];
|
||||
|
||||
/// imm{16,32} AND r/m{16,32,64}, possibly sign-extended.
|
||||
pub static AND_IMM: [u8; 1] = [0x81];
|
||||
|
||||
/// r/m{16,32,64} AND sign-extended imm8.
|
||||
pub static AND_IMM8_SIGN_EXTEND: [u8; 1] = [0x83];
|
||||
|
||||
/// Return the bitwise logical AND NOT of packed single-precision floating-point
|
||||
/// values in xmm1 and xmm2/mem.
|
||||
pub static ANDNPS: [u8; 2] = [0x0f, 0x55];
|
||||
|
||||
/// Return the bitwise logical AND of packed single-precision floating-point values
|
||||
/// in xmm1 and xmm2/mem.
|
||||
pub static ANDPS: [u8; 2] = [0x0f, 0x54];
|
||||
|
||||
/// Bit scan forward (stores index of first encountered 1 from the front).
|
||||
pub static BIT_SCAN_FORWARD: [u8; 2] = [0x0f, 0xbc];
|
||||
|
||||
/// Bit scan reverse (stores index of first encountered 1 from the back).
|
||||
pub static BIT_SCAN_REVERSE: [u8; 2] = [0x0f, 0xbd];
|
||||
|
||||
/// Select packed single-precision floating-point values from xmm1 and xmm2/m128
|
||||
/// from mask specified in XMM0 and store the values into xmm1 (SSE4.1).
|
||||
pub static BLENDVPS: [u8; 4] = [0x66, 0x0f, 0x38, 0x14];
|
||||
|
||||
/// Select packed double-precision floating-point values from xmm1 and xmm2/m128
|
||||
/// from mask specified in XMM0 and store the values into xmm1 (SSE4.1).
|
||||
pub static BLENDVPD: [u8; 4] = [0x66, 0x0f, 0x38, 0x15];
|
||||
|
||||
/// Call near, relative, displacement relative to next instruction (sign-extended).
|
||||
pub static CALL_RELATIVE: [u8; 1] = [0xe8];
|
||||
|
||||
/// Move r/m{16,32,64} if overflow (OF=1).
|
||||
pub static CMOV_OVERFLOW: [u8; 2] = [0x0f, 0x40];
|
||||
|
||||
/// Compare imm{16,32} with r/m{16,32,64} (sign-extended if 64).
|
||||
pub static CMP_IMM: [u8; 1] = [0x81];
|
||||
|
||||
/// Compare imm8 with r/m{16,32,64}.
|
||||
pub static CMP_IMM8: [u8; 1] = [0x83];
|
||||
|
||||
/// Compare r{16,32,64} with r/m of the same size.
|
||||
pub static CMP_REG: [u8; 1] = [0x39];
|
||||
|
||||
/// Compare packed double-precision floating-point value in xmm2/m32 and xmm1 using bits 2:0 of
|
||||
/// imm8 as comparison predicate (SSE2).
|
||||
pub static CMPPD: [u8; 3] = [0x66, 0x0f, 0xc2];
|
||||
|
||||
/// Compare packed single-precision floating-point value in xmm2/m32 and xmm1 using bits 2:0 of
|
||||
/// imm8 as comparison predicate (SSE).
|
||||
pub static CMPPS: [u8; 2] = [0x0f, 0xc2];
|
||||
|
||||
/// Convert four packed signed doubleword integers from xmm2/mem to four packed single-precision
|
||||
/// floating-point values in xmm1 (SSE2).
|
||||
pub static CVTDQ2PS: [u8; 2] = [0x0f, 0x5b];
|
||||
|
||||
/// Convert scalar double-precision floating-point value to scalar single-precision
|
||||
/// floating-point value.
|
||||
pub static CVTSD2SS: [u8; 3] = [0xf2, 0x0f, 0x5a];
|
||||
|
||||
/// Convert doubleword integer to scalar double-precision floating-point value.
|
||||
pub static CVTSI2SD: [u8; 3] = [0xf2, 0x0f, 0x2a];
|
||||
|
||||
/// Convert doubleword integer to scalar single-precision floating-point value.
|
||||
pub static CVTSI2SS: [u8; 3] = [0xf3, 0x0f, 0x2a];
|
||||
|
||||
/// Convert scalar single-precision floating-point value to scalar double-precision
|
||||
/// float-point value.
|
||||
pub static CVTSS2SD: [u8; 3] = [0xf3, 0x0f, 0x5a];
|
||||
|
||||
/// Convert four packed single-precision floating-point values from xmm2/mem to four packed signed
|
||||
/// doubleword values in xmm1 using truncation (SSE2).
|
||||
pub static CVTTPS2DQ: [u8; 3] = [0xf3, 0x0f, 0x5b];
|
||||
|
||||
/// Convert with truncation scalar double-precision floating-point value to signed
|
||||
/// integer.
|
||||
pub static CVTTSD2SI: [u8; 3] = [0xf2, 0x0f, 0x2c];
|
||||
|
||||
/// Convert with truncation scalar single-precision floating-point value to integer.
|
||||
pub static CVTTSS2SI: [u8; 3] = [0xf3, 0x0f, 0x2c];
|
||||
|
||||
/// Unsigned divide for {16,32,64}-bit.
|
||||
pub static DIV: [u8; 1] = [0xf7];
|
||||
|
||||
/// Divide packed double-precision floating-point values in xmm1 by packed double-precision
|
||||
/// floating-point values in xmm2/mem (SSE2).
|
||||
pub static DIVPD: [u8; 3] = [0x66, 0x0f, 0x5e];
|
||||
|
||||
/// Divide packed single-precision floating-point values in xmm1 by packed single-precision
|
||||
/// floating-point values in xmm2/mem (SSE).
|
||||
pub static DIVPS: [u8; 2] = [0x0f, 0x5e];
|
||||
|
||||
/// Divide low double-precision floating-point value in xmm1 by low double-precision
|
||||
/// floating-point value in xmm2/m64.
|
||||
pub static DIVSD: [u8; 3] = [0xf2, 0x0f, 0x5e];
|
||||
|
||||
/// Divide low single-precision floating-point value in xmm1 by low single-precision
|
||||
/// floating-point value in xmm2/m32.
|
||||
pub static DIVSS: [u8; 3] = [0xf3, 0x0f, 0x5e];
|
||||
|
||||
/// Signed divide for {16,32,64}-bit.
|
||||
pub static IDIV: [u8; 1] = [0xf7];
|
||||
|
||||
/// Signed multiply for {16,32,64}-bit, generic registers.
|
||||
pub static IMUL: [u8; 2] = [0x0f, 0xaf];
|
||||
|
||||
/// Signed multiply for {16,32,64}-bit, storing into RDX:RAX.
|
||||
pub static IMUL_RDX_RAX: [u8; 1] = [0xf7];
|
||||
|
||||
/// Insert scalar single-precision floating-point value.
|
||||
pub static INSERTPS: [u8; 4] = [0x66, 0x0f, 0x3a, 0x21];
|
||||
|
||||
/// Either:
|
||||
/// 1. Jump near, absolute indirect, RIP = 64-bit offset from register or memory.
|
||||
/// 2. Jump far, absolute indirect, address given in m16:64.
|
||||
pub static JUMP_ABSOLUTE: [u8; 1] = [0xff];
|
||||
|
||||
/// Jump near, relative, RIP = RIP + 32-bit displacement sign extended to 64 bits.
|
||||
pub static JUMP_NEAR_RELATIVE: [u8; 1] = [0xe9];
|
||||
|
||||
/// Jump near (rel32) if overflow (OF=1).
|
||||
pub static JUMP_NEAR_IF_OVERFLOW: [u8; 2] = [0x0f, 0x80];
|
||||
|
||||
/// Jump short, relative, RIP = RIP + 8-bit displacement sign extended to 64 bits.
|
||||
pub static JUMP_SHORT: [u8; 1] = [0xeb];
|
||||
|
||||
/// Jump short (rel8) if equal (ZF=1).
|
||||
pub static JUMP_SHORT_IF_EQUAL: [u8; 1] = [0x74];
|
||||
|
||||
/// Jump short (rel8) if not equal (ZF=0).
|
||||
pub static JUMP_SHORT_IF_NOT_EQUAL: [u8; 1] = [0x75];
|
||||
|
||||
/// Jump short (rel8) if overflow (OF=1).
|
||||
pub static JUMP_SHORT_IF_OVERFLOW: [u8; 1] = [0x70];
|
||||
|
||||
/// Store effective address for m in register r{16,32,64}.
|
||||
pub static LEA: [u8; 1] = [0x8d];
|
||||
|
||||
/// Count the number of leading zero bits.
|
||||
pub static LZCNT: [u8; 3] = [0xf3, 0x0f, 0xbd];
|
||||
|
||||
/// Return the maximum packed double-precision floating-point values between xmm1 and xmm2/m128
|
||||
/// (SSE2).
|
||||
pub static MAXPD: [u8; 3] = [0x66, 0x0f, 0x5f];
|
||||
|
||||
/// Return the maximum packed single-precision floating-point values between xmm1 and xmm2/m128
|
||||
/// (SSE).
|
||||
pub static MAXPS: [u8; 2] = [0x0f, 0x5f];
|
||||
|
||||
/// Return the maximum scalar double-precision floating-point value between
|
||||
/// xmm2/m64 and xmm1.
|
||||
pub static MAXSD: [u8; 3] = [0xf2, 0x0f, 0x5f];
|
||||
|
||||
/// Return the maximum scalar single-precision floating-point value between
|
||||
/// xmm2/m32 and xmm1.
|
||||
pub static MAXSS: [u8; 3] = [0xf3, 0x0f, 0x5f];
|
||||
|
||||
/// Return the minimum packed double-precision floating-point values between xmm1 and xmm2/m128
|
||||
/// (SSE2).
|
||||
pub static MINPD: [u8; 3] = [0x66, 0x0f, 0x5d];
|
||||
|
||||
/// Return the minimum packed single-precision floating-point values between xmm1 and xmm2/m128
|
||||
/// (SSE).
|
||||
pub static MINPS: [u8; 2] = [0x0f, 0x5d];
|
||||
|
||||
/// Return the minimum scalar double-precision floating-point value between
|
||||
/// xmm2/m64 and xmm1.
|
||||
pub static MINSD: [u8; 3] = [0xf2, 0x0f, 0x5d];
|
||||
|
||||
/// Return the minimum scalar single-precision floating-point value between
|
||||
/// xmm2/m32 and xmm1.
|
||||
pub static MINSS: [u8; 3] = [0xf3, 0x0f, 0x5d];
|
||||
|
||||
/// Move r8 to r/m8.
|
||||
pub static MOV_BYTE_STORE: [u8; 1] = [0x88];
|
||||
|
||||
/// Move imm{16,32,64} to same-sized register.
|
||||
pub static MOV_IMM: [u8; 1] = [0xb8];
|
||||
|
||||
/// Move imm{16,32} to r{16,32,64}, sign-extended if 64-bit target.
|
||||
pub static MOV_IMM_SIGNEXTEND: [u8; 1] = [0xc7];
|
||||
|
||||
/// Move {r/m16, r/m32, r/m64} to same-sized register.
|
||||
pub static MOV_LOAD: [u8; 1] = [0x8b];
|
||||
|
||||
/// Move r16 to r/m16.
|
||||
pub static MOV_STORE_16: [u8; 2] = [0x66, 0x89];
|
||||
|
||||
/// Move {r16, r32, r64} to same-sized register or memory.
|
||||
pub static MOV_STORE: [u8; 1] = [0x89];
|
||||
|
||||
/// Move aligned packed single-precision floating-point values from x/m to xmm (SSE).
|
||||
pub static MOVAPS_LOAD: [u8; 2] = [0x0f, 0x28];
|
||||
|
||||
/// Move doubleword from r/m32 to xmm (SSE2). Quadword with REX prefix.
|
||||
pub static MOVD_LOAD_XMM: [u8; 3] = [0x66, 0x0f, 0x6e];
|
||||
|
||||
/// Move doubleword from xmm to r/m32 (SSE2). Quadword with REX prefix.
|
||||
pub static MOVD_STORE_XMM: [u8; 3] = [0x66, 0x0f, 0x7e];
|
||||
|
||||
/// Move packed single-precision floating-point values low to high (SSE).
|
||||
pub static MOVLHPS: [u8; 2] = [0x0f, 0x16];
|
||||
|
||||
/// Move scalar double-precision floating-point value (from reg/mem to reg).
|
||||
pub static MOVSD_LOAD: [u8; 3] = [0xf2, 0x0f, 0x10];
|
||||
|
||||
/// Move scalar double-precision floating-point value (from reg to reg/mem).
|
||||
pub static MOVSD_STORE: [u8; 3] = [0xf2, 0x0f, 0x11];
|
||||
|
||||
/// Move scalar single-precision floating-point value (from reg to reg/mem).
|
||||
pub static MOVSS_STORE: [u8; 3] = [0xf3, 0x0f, 0x11];
|
||||
|
||||
/// Move scalar single-precision floating-point-value (from reg/mem to reg).
|
||||
pub static MOVSS_LOAD: [u8; 3] = [0xf3, 0x0f, 0x10];
|
||||
|
||||
/// Move byte to register with sign-extension.
|
||||
pub static MOVSX_BYTE: [u8; 2] = [0x0f, 0xbe];
|
||||
|
||||
/// Move word to register with sign-extension.
|
||||
pub static MOVSX_WORD: [u8; 2] = [0x0f, 0xbf];
|
||||
|
||||
/// Move doubleword to register with sign-extension.
|
||||
pub static MOVSXD: [u8; 1] = [0x63];
|
||||
|
||||
/// Move unaligned packed single-precision floating-point from x/m to xmm (SSE).
|
||||
pub static MOVUPS_LOAD: [u8; 2] = [0x0f, 0x10];
|
||||
|
||||
/// Move unaligned packed single-precision floating-point value from xmm to x/m (SSE).
|
||||
pub static MOVUPS_STORE: [u8; 2] = [0x0f, 0x11];
|
||||
|
||||
/// Move byte to register with zero-extension.
|
||||
pub static MOVZX_BYTE: [u8; 2] = [0x0f, 0xb6];
|
||||
|
||||
/// Move word to register with zero-extension.
|
||||
pub static MOVZX_WORD: [u8; 2] = [0x0f, 0xb7];
|
||||
|
||||
/// Unsigned multiply for {16,32,64}-bit.
|
||||
pub static MUL: [u8; 1] = [0xf7];
|
||||
|
||||
/// Multiply packed double-precision floating-point values from xmm2/mem to xmm1 and store result
|
||||
/// in xmm1 (SSE2).
|
||||
pub static MULPD: [u8; 3] = [0x66, 0x0f, 0x59];
|
||||
|
||||
/// Multiply packed single-precision floating-point values from xmm2/mem to xmm1 and store result
|
||||
/// in xmm1 (SSE).
|
||||
pub static MULPS: [u8; 2] = [0x0f, 0x59];
|
||||
|
||||
/// Multiply the low double-precision floating-point value in xmm2/m64 by the
|
||||
/// low double-precision floating-point value in xmm1.
|
||||
pub static MULSD: [u8; 3] = [0xf2, 0x0f, 0x59];
|
||||
|
||||
/// Multiply the low single-precision floating-point value in xmm2/m32 by the
|
||||
/// low single-precision floating-point value in xmm1.
|
||||
pub static MULSS: [u8; 3] = [0xf3, 0x0f, 0x59];
|
||||
|
||||
/// Reverse each bit of r/m{16,32,64}.
|
||||
pub static NOT: [u8; 1] = [0xf7];
|
||||
|
||||
/// r{16,32,64} OR register of same size.
|
||||
pub static OR: [u8; 1] = [0x09];
|
||||
|
||||
/// imm{16,32} OR r/m{16,32,64}, possibly sign-extended.
|
||||
pub static OR_IMM: [u8; 1] = [0x81];
|
||||
|
||||
/// r/m{16,32,64} OR sign-extended imm8.
|
||||
pub static OR_IMM8_SIGN_EXTEND: [u8; 1] = [0x83];
|
||||
|
||||
/// Return the bitwise logical OR of packed single-precision values in xmm and x/m (SSE).
|
||||
pub static ORPS: [u8; 2] = [0x0f, 0x56];
|
||||
|
||||
/// Compute the absolute value of bytes in xmm2/m128 and store the unsigned result in xmm1 (SSSE3).
|
||||
pub static PABSB: [u8; 4] = [0x66, 0x0f, 0x38, 0x1c];
|
||||
|
||||
/// Compute the absolute value of 32-bit integers in xmm2/m128 and store the unsigned result in
|
||||
/// xmm1 (SSSE3).
|
||||
pub static PABSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x1e];
|
||||
|
||||
/// Compute the absolute value of 16-bit integers in xmm2/m128 and store the unsigned result in
|
||||
/// xmm1 (SSSE3).
|
||||
pub static PABSW: [u8; 4] = [0x66, 0x0f, 0x38, 0x1d];
|
||||
|
||||
/// Converts 8 packed signed word integers from xmm1 and from xmm2/m128 into 16 packed signed byte
|
||||
/// integers in xmm1 using signed saturation (SSE2).
|
||||
pub static PACKSSWB: [u8; 3] = [0x66, 0x0f, 0x63];
|
||||
|
||||
/// Converts 4 packed signed doubleword integers from xmm1 and from xmm2/m128 into 8 packed signed
|
||||
/// word integers in xmm1 using signed saturation (SSE2).
|
||||
pub static PACKSSDW: [u8; 3] = [0x66, 0x0f, 0x6b];
|
||||
|
||||
/// Converts 8 packed signed word integers from xmm1 and from xmm2/m128 into 16 packed unsigned byte
|
||||
/// integers in xmm1 using unsigned saturation (SSE2).
|
||||
pub static PACKUSWB: [u8; 3] = [0x66, 0x0f, 0x67];
|
||||
|
||||
/// Converts 4 packed signed doubleword integers from xmm1 and from xmm2/m128 into 8 unpacked signed
|
||||
/// word integers in xmm1 using unsigned saturation (SSE4.1).
|
||||
pub static PACKUSDW: [u8; 4] = [0x66, 0x0f, 0x38, 0x2b];
|
||||
|
||||
/// Add packed byte integers from xmm2/m128 and xmm1 (SSE2).
|
||||
pub static PADDB: [u8; 3] = [0x66, 0x0f, 0xfc];
|
||||
|
||||
/// Add packed doubleword integers from xmm2/m128 and xmm1 (SSE2).
|
||||
pub static PADDD: [u8; 3] = [0x66, 0x0f, 0xfe];
|
||||
|
||||
/// Add packed quadword integers from xmm2/m128 and xmm1 (SSE2).
|
||||
pub static PADDQ: [u8; 3] = [0x66, 0x0f, 0xd4];
|
||||
|
||||
/// Add packed word integers from xmm2/m128 and xmm1 (SSE2).
|
||||
pub static PADDW: [u8; 3] = [0x66, 0x0f, 0xfd];
|
||||
|
||||
/// Add packed signed byte integers from xmm2/m128 and xmm1 saturate the results (SSE).
|
||||
pub static PADDSB: [u8; 3] = [0x66, 0x0f, 0xec];
|
||||
|
||||
/// Add packed signed word integers from xmm2/m128 and xmm1 saturate the results (SSE).
|
||||
pub static PADDSW: [u8; 3] = [0x66, 0x0f, 0xed];
|
||||
|
||||
/// Add packed unsigned byte integers from xmm2/m128 and xmm1 saturate the results (SSE).
|
||||
pub static PADDUSB: [u8; 3] = [0x66, 0x0f, 0xdc];
|
||||
|
||||
/// Add packed unsigned word integers from xmm2/m128 and xmm1 saturate the results (SSE).
|
||||
pub static PADDUSW: [u8; 3] = [0x66, 0x0f, 0xdd];
|
||||
|
||||
/// Concatenate destination and source operands, extract a byte-aligned result into xmm1 that is
|
||||
/// shifted to the right by the constant number of bytes in imm8 (SSSE3).
|
||||
pub static PALIGNR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0f];
|
||||
|
||||
/// Bitwise AND of xmm2/m128 and xmm1 (SSE2).
|
||||
pub static PAND: [u8; 3] = [0x66, 0x0f, 0xdb];
|
||||
|
||||
/// Bitwise AND NOT of xmm2/m128 and xmm1 (SSE2).
|
||||
pub static PANDN: [u8; 3] = [0x66, 0x0f, 0xdf];
|
||||
|
||||
/// Average packed unsigned byte integers from xmm2/m128 and xmm1 with rounding (SSE2).
|
||||
pub static PAVGB: [u8; 3] = [0x66, 0x0f, 0xE0];
|
||||
|
||||
/// Average packed unsigned word integers from xmm2/m128 and xmm1 with rounding (SSE2).
|
||||
pub static PAVGW: [u8; 3] = [0x66, 0x0f, 0xE3];
|
||||
|
||||
/// Select byte values from xmm1 and xmm2/m128 from mask specified in the high bit of each byte
|
||||
/// in XMM0 and store the values into xmm1 (SSE4.1).
|
||||
pub static PBLENDVB: [u8; 4] = [0x66, 0x0f, 0x38, 0x10];
|
||||
|
||||
/// Select words from xmm1 and xmm2/m128 from mask specified in imm8 and store the values into xmm1
|
||||
/// (SSE4.1).
|
||||
pub static PBLENDW: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0e];
|
||||
|
||||
/// Compare packed data for equal (SSE2).
|
||||
pub static PCMPEQB: [u8; 3] = [0x66, 0x0f, 0x74];
|
||||
|
||||
/// Compare packed data for equal (SSE2).
|
||||
pub static PCMPEQD: [u8; 3] = [0x66, 0x0f, 0x76];
|
||||
|
||||
/// Compare packed data for equal (SSE4.1).
|
||||
pub static PCMPEQQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x29];
|
||||
|
||||
/// Compare packed data for equal (SSE2).
|
||||
pub static PCMPEQW: [u8; 3] = [0x66, 0x0f, 0x75];
|
||||
|
||||
/// Compare packed signed byte integers for greater than (SSE2).
|
||||
pub static PCMPGTB: [u8; 3] = [0x66, 0x0f, 0x64];
|
||||
|
||||
/// Compare packed signed doubleword integers for greater than (SSE2).
|
||||
pub static PCMPGTD: [u8; 3] = [0x66, 0x0f, 0x66];
|
||||
|
||||
/// Compare packed signed quadword integers for greater than (SSE4.2).
|
||||
pub static PCMPGTQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x37];
|
||||
|
||||
/// Compare packed signed word integers for greater than (SSE2).
|
||||
pub static PCMPGTW: [u8; 3] = [0x66, 0x0f, 0x65];
|
||||
|
||||
/// Extract doubleword or quadword, depending on REX.W (SSE4.1).
|
||||
pub static PEXTR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x16];
|
||||
|
||||
/// Extract byte (SSE4.1).
|
||||
pub static PEXTRB: [u8; 4] = [0x66, 0x0f, 0x3a, 0x14];
|
||||
|
||||
/// Extract word (SSE4.1). There is a 3-byte SSE2 variant that can also move to m/16.
|
||||
pub static PEXTRW: [u8; 4] = [0x66, 0x0f, 0x3a, 0x15];
|
||||
|
||||
/// Insert doubleword or quadword, depending on REX.W (SSE4.1).
|
||||
pub static PINSR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x22];
|
||||
|
||||
/// Insert byte (SSE4.1).
|
||||
pub static PINSRB: [u8; 4] = [0x66, 0x0f, 0x3a, 0x20];
|
||||
|
||||
/// Insert word (SSE2).
|
||||
pub static PINSRW: [u8; 3] = [0x66, 0x0f, 0xc4];
|
||||
|
||||
/// Compare packed signed byte integers in xmm1 and xmm2/m128 and store packed maximum values in
|
||||
/// xmm1 (SSE4.1).
|
||||
pub static PMAXSB: [u8; 4] = [0x66, 0x0f, 0x38, 0x3c];
|
||||
|
||||
/// Compare packed signed doubleword integers in xmm1 and xmm2/m128 and store packed maximum
|
||||
/// values in xmm1 (SSE4.1).
|
||||
pub static PMAXSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x3d];
|
||||
|
||||
/// Compare packed signed word integers in xmm1 and xmm2/m128 and store packed maximum values in
|
||||
/// xmm1 (SSE2).
|
||||
pub static PMAXSW: [u8; 3] = [0x66, 0x0f, 0xee];
|
||||
|
||||
/// Compare packed unsigned byte integers in xmm1 and xmm2/m128 and store packed maximum values in
|
||||
/// xmm1 (SSE2).
|
||||
pub static PMAXUB: [u8; 3] = [0x66, 0x0f, 0xde];
|
||||
|
||||
/// Compare packed unsigned doubleword integers in xmm1 and xmm2/m128 and store packed maximum
|
||||
/// values in xmm1 (SSE4.1).
|
||||
pub static PMAXUD: [u8; 4] = [0x66, 0x0f, 0x38, 0x3f];
|
||||
|
||||
/// Compare packed unsigned word integers in xmm1 and xmm2/m128 and store packed maximum values in
|
||||
/// xmm1 (SSE4.1).
|
||||
pub static PMAXUW: [u8; 4] = [0x66, 0x0f, 0x38, 0x3e];
|
||||
|
||||
/// Compare packed signed byte integers in xmm1 and xmm2/m128 and store packed minimum values in
|
||||
/// xmm1 (SSE4.1).
|
||||
pub static PMINSB: [u8; 4] = [0x66, 0x0f, 0x38, 0x38];
|
||||
|
||||
/// Compare packed signed doubleword integers in xmm1 and xmm2/m128 and store packed minimum
|
||||
/// values in xmm1 (SSE4.1).
|
||||
pub static PMINSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x39];
|
||||
|
||||
/// Compare packed signed word integers in xmm1 and xmm2/m128 and store packed minimum values in
|
||||
/// xmm1 (SSE2).
|
||||
pub static PMINSW: [u8; 3] = [0x66, 0x0f, 0xea];
|
||||
|
||||
/// Compare packed unsigned byte integers in xmm1 and xmm2/m128 and store packed minimum values in
|
||||
/// xmm1 (SSE2).
|
||||
pub static PMINUB: [u8; 3] = [0x66, 0x0f, 0xda];
|
||||
|
||||
/// Compare packed unsigned doubleword integers in xmm1 and xmm2/m128 and store packed minimum
|
||||
/// values in xmm1 (SSE4.1).
|
||||
pub static PMINUD: [u8; 4] = [0x66, 0x0f, 0x38, 0x3b];
|
||||
|
||||
/// Compare packed unsigned word integers in xmm1 and xmm2/m128 and store packed minimum values in
|
||||
/// xmm1 (SSE4.1).
|
||||
pub static PMINUW: [u8; 4] = [0x66, 0x0f, 0x38, 0x3a];
|
||||
|
||||
/// Sign extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit
|
||||
/// integers in xmm1 (SSE4.1).
|
||||
pub static PMOVSXBW: [u8; 4] = [0x66, 0x0f, 0x38, 0x20];
|
||||
|
||||
/// Sign extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 32-bit
|
||||
/// integers in xmm1 (SSE4.1).
|
||||
pub static PMOVSXWD: [u8; 4] = [0x66, 0x0f, 0x38, 0x23];
|
||||
|
||||
/// Sign extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit
|
||||
/// integers in xmm1 (SSE4.1).
|
||||
pub static PMOVSXDQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x25];
|
||||
|
||||
/// Zero extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit
|
||||
/// integers in xmm1 (SSE4.1).
|
||||
pub static PMOVZXBW: [u8; 4] = [0x66, 0x0f, 0x38, 0x30];
|
||||
|
||||
/// Zero extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 32-bit
|
||||
/// integers in xmm1 (SSE4.1).
|
||||
pub static PMOVZXWD: [u8; 4] = [0x66, 0x0f, 0x38, 0x33];
|
||||
|
||||
/// Zero extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit
|
||||
/// integers in xmm1 (SSE4.1).
|
||||
pub static PMOVZXDQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x35];
|
||||
|
||||
/// Multiply the packed signed word integers in xmm1 and xmm2/m128, and store the low 16 bits of
|
||||
/// the results in xmm1 (SSE2).
|
||||
pub static PMULLW: [u8; 3] = [0x66, 0x0f, 0xd5];
|
||||
|
||||
/// Multiply the packed doubleword signed integers in xmm1 and xmm2/m128 and store the low 32
|
||||
/// bits of each product in xmm1 (SSE4.1).
|
||||
pub static PMULLD: [u8; 4] = [0x66, 0x0f, 0x38, 0x40];
|
||||
|
||||
/// Multiply the packed quadword signed integers in xmm2 and xmm3/m128 and store the low 64
|
||||
/// bits of each product in xmm1 (AVX512VL/DQ). Requires an EVEX encoding.
|
||||
pub static VPMULLQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x40];
|
||||
|
||||
/// Multiply packed unsigned doubleword integers in xmm1 by packed unsigned doubleword integers
|
||||
/// in xmm2/m128, and store the quadword results in xmm1 (SSE2).
|
||||
pub static PMULUDQ: [u8; 3] = [0x66, 0x0f, 0xf4];
|
||||
|
||||
/// Multiply the packed word integers, add adjacent doubleword results.
|
||||
pub static PMADDWD: [u8; 3] = [0x66, 0x0f, 0xf5];
|
||||
|
||||
/// Pop top of stack into r{16,32,64}; increment stack pointer.
|
||||
pub static POP_REG: [u8; 1] = [0x58];
|
||||
|
||||
/// Returns the count of number of bits set to 1.
|
||||
pub static POPCNT: [u8; 3] = [0xf3, 0x0f, 0xb8];
|
||||
|
||||
/// Bitwise OR of xmm2/m128 and xmm1 (SSE2).
|
||||
pub static POR: [u8; 3] = [0x66, 0x0f, 0xeb];
|
||||
|
||||
/// Shuffle bytes in xmm1 according to contents of xmm2/m128 (SSE3).
|
||||
pub static PSHUFB: [u8; 4] = [0x66, 0x0f, 0x38, 0x00];
|
||||
|
||||
/// Shuffle the doublewords in xmm2/m128 based on the encoding in imm8 and
|
||||
/// store the result in xmm1 (SSE2).
|
||||
pub static PSHUFD: [u8; 3] = [0x66, 0x0f, 0x70];
|
||||
|
||||
/// Shift words in xmm1 by imm8; the direction and sign-bit behavior is controlled by the RRR
|
||||
/// digit used in the ModR/M byte (SSE2).
|
||||
pub static PS_W_IMM: [u8; 3] = [0x66, 0x0f, 0x71];
|
||||
|
||||
/// Shift doublewords in xmm1 by imm8; the direction and sign-bit behavior is controlled by the RRR
|
||||
/// digit used in the ModR/M byte (SSE2).
|
||||
pub static PS_D_IMM: [u8; 3] = [0x66, 0x0f, 0x72];
|
||||
|
||||
/// Shift quadwords in xmm1 by imm8; the direction and sign-bit behavior is controlled by the RRR
|
||||
/// digit used in the ModR/M byte (SSE2).
|
||||
pub static PS_Q_IMM: [u8; 3] = [0x66, 0x0f, 0x73];
|
||||
|
||||
/// Shift words in xmm1 left by xmm2/m128 while shifting in 0s (SSE2).
|
||||
pub static PSLLW: [u8; 3] = [0x66, 0x0f, 0xf1];
|
||||
|
||||
/// Shift doublewords in xmm1 left by xmm2/m128 while shifting in 0s (SSE2).
|
||||
pub static PSLLD: [u8; 3] = [0x66, 0x0f, 0xf2];
|
||||
|
||||
/// Shift quadwords in xmm1 left by xmm2/m128 while shifting in 0s (SSE2).
|
||||
pub static PSLLQ: [u8; 3] = [0x66, 0x0f, 0xf3];
|
||||
|
||||
/// Shift words in xmm1 right by xmm2/m128 while shifting in 0s (SSE2).
|
||||
pub static PSRLW: [u8; 3] = [0x66, 0x0f, 0xd1];
|
||||
|
||||
/// Shift doublewords in xmm1 right by xmm2/m128 while shifting in 0s (SSE2).
|
||||
pub static PSRLD: [u8; 3] = [0x66, 0x0f, 0xd2];
|
||||
|
||||
/// Shift quadwords in xmm1 right by xmm2/m128 while shifting in 0s (SSE2).
|
||||
pub static PSRLQ: [u8; 3] = [0x66, 0x0f, 0xd3];
|
||||
|
||||
/// Shift words in xmm1 right by xmm2/m128 while shifting in sign bits (SSE2).
|
||||
pub static PSRAW: [u8; 3] = [0x66, 0x0f, 0xe1];
|
||||
|
||||
/// Shift doublewords in xmm1 right by xmm2/m128 while shifting in sign bits (SSE2).
|
||||
pub static PSRAD: [u8; 3] = [0x66, 0x0f, 0xe2];
|
||||
|
||||
/// Subtract packed byte integers in xmm2/m128 from packed byte integers in xmm1 (SSE2).
|
||||
pub static PSUBB: [u8; 3] = [0x66, 0x0f, 0xf8];
|
||||
|
||||
/// Subtract packed word integers in xmm2/m128 from packed word integers in xmm1 (SSE2).
|
||||
pub static PSUBW: [u8; 3] = [0x66, 0x0f, 0xf9];
|
||||
|
||||
/// Subtract packed doubleword integers in xmm2/m128 from doubleword byte integers in xmm1 (SSE2).
|
||||
pub static PSUBD: [u8; 3] = [0x66, 0x0f, 0xfa];
|
||||
|
||||
/// Subtract packed quadword integers in xmm2/m128 from xmm1 (SSE2).
|
||||
pub static PSUBQ: [u8; 3] = [0x66, 0x0f, 0xfb];
|
||||
|
||||
/// Subtract packed signed byte integers in xmm2/m128 from packed signed byte integers in xmm1
|
||||
/// and saturate results (SSE2).
|
||||
pub static PSUBSB: [u8; 3] = [0x66, 0x0f, 0xe8];
|
||||
|
||||
/// Subtract packed signed word integers in xmm2/m128 from packed signed word integers in xmm1
|
||||
/// and saturate results (SSE2).
|
||||
pub static PSUBSW: [u8; 3] = [0x66, 0x0f, 0xe9];
|
||||
|
||||
/// Subtract packed unsigned byte integers in xmm2/m128 from packed unsigned byte integers in xmm1
|
||||
/// and saturate results (SSE2).
|
||||
pub static PSUBUSB: [u8; 3] = [0x66, 0x0f, 0xd8];
|
||||
|
||||
/// Subtract packed unsigned word integers in xmm2/m128 from packed unsigned word integers in xmm1
|
||||
/// and saturate results (SSE2).
|
||||
pub static PSUBUSW: [u8; 3] = [0x66, 0x0f, 0xd9];
|
||||
|
||||
/// Set ZF if xmm2/m128 AND xmm1 result is all 0s; set CF if xmm2/m128 AND NOT xmm1 result is all
|
||||
/// 0s (SSE4.1).
|
||||
pub static PTEST: [u8; 4] = [0x66, 0x0f, 0x38, 0x17];
|
||||
|
||||
/// Unpack and interleave high-order bytes from xmm1 and xmm2/m128 into xmm1 (SSE2).
|
||||
pub static PUNPCKHBW: [u8; 3] = [0x66, 0x0f, 0x68];
|
||||
|
||||
/// Unpack and interleave high-order words from xmm1 and xmm2/m128 into xmm1 (SSE2).
|
||||
pub static PUNPCKHWD: [u8; 3] = [0x66, 0x0f, 0x69];
|
||||
|
||||
/// Unpack and interleave high-order doublewords from xmm1 and xmm2/m128 into xmm1 (SSE2).
|
||||
pub static PUNPCKHDQ: [u8; 3] = [0x66, 0x0f, 0x6A];
|
||||
|
||||
/// Unpack and interleave high-order quadwords from xmm1 and xmm2/m128 into xmm1 (SSE2).
|
||||
pub static PUNPCKHQDQ: [u8; 3] = [0x66, 0x0f, 0x6D];
|
||||
|
||||
/// Unpack and interleave low-order bytes from xmm1 and xmm2/m128 into xmm1 (SSE2).
|
||||
pub static PUNPCKLBW: [u8; 3] = [0x66, 0x0f, 0x60];
|
||||
|
||||
/// Unpack and interleave low-order words from xmm1 and xmm2/m128 into xmm1 (SSE2).
|
||||
pub static PUNPCKLWD: [u8; 3] = [0x66, 0x0f, 0x61];
|
||||
|
||||
/// Unpack and interleave low-order doublewords from xmm1 and xmm2/m128 into xmm1 (SSE2).
|
||||
pub static PUNPCKLDQ: [u8; 3] = [0x66, 0x0f, 0x62];
|
||||
|
||||
/// Unpack and interleave low-order quadwords from xmm1 and xmm2/m128 into xmm1 (SSE2).
|
||||
pub static PUNPCKLQDQ: [u8; 3] = [0x66, 0x0f, 0x6C];
|
||||
|
||||
/// Push r{16,32,64}.
|
||||
pub static PUSH_REG: [u8; 1] = [0x50];
|
||||
|
||||
/// Logical exclusive OR (SSE2).
|
||||
pub static PXOR: [u8; 3] = [0x66, 0x0f, 0xef];
|
||||
|
||||
/// Near return to calling procedure.
|
||||
pub static RET_NEAR: [u8; 1] = [0xc3];
|
||||
|
||||
/// General rotation opcode. Kind of rotation depends on encoding.
|
||||
pub static ROTATE_CL: [u8; 1] = [0xd3];
|
||||
|
||||
/// General rotation opcode. Kind of rotation depends on encoding.
|
||||
pub static ROTATE_IMM8: [u8; 1] = [0xc1];
|
||||
|
||||
/// Round scalar doubl-precision floating-point values.
|
||||
pub static ROUNDSD: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0b];
|
||||
|
||||
/// Round scalar single-precision floating-point values.
|
||||
pub static ROUNDSS: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0a];
|
||||
|
||||
/// Subtract with borrow r{16,32,64} from r/m of the same size.
|
||||
pub static SBB: [u8; 1] = [0x19];
|
||||
|
||||
/// Set byte if overflow (OF=1).
|
||||
pub static SET_BYTE_IF_OVERFLOW: [u8; 2] = [0x0f, 0x90];
|
||||
|
||||
/// Compute the square root of the packed double-precision floating-point values and store the
|
||||
/// result in xmm1 (SSE2).
|
||||
pub static SQRTPD: [u8; 3] = [0x66, 0x0f, 0x51];
|
||||
|
||||
/// Compute the square root of the packed double-precision floating-point values and store the
|
||||
/// result in xmm1 (SSE).
|
||||
pub static SQRTPS: [u8; 2] = [0x0f, 0x51];
|
||||
|
||||
/// Compute square root of scalar double-precision floating-point value.
|
||||
pub static SQRTSD: [u8; 3] = [0xf2, 0x0f, 0x51];
|
||||
|
||||
/// Compute square root of scalar single-precision value.
|
||||
pub static SQRTSS: [u8; 3] = [0xf3, 0x0f, 0x51];
|
||||
|
||||
/// Subtract r{16,32,64} from r/m of same size.
|
||||
pub static SUB: [u8; 1] = [0x29];
|
||||
|
||||
/// Subtract packed double-precision floating-point values in xmm2/mem from xmm1 and store result
|
||||
/// in xmm1 (SSE2).
|
||||
pub static SUBPD: [u8; 3] = [0x66, 0x0f, 0x5c];
|
||||
|
||||
/// Subtract packed single-precision floating-point values in xmm2/mem from xmm1 and store result
|
||||
/// in xmm1 (SSE).
|
||||
pub static SUBPS: [u8; 2] = [0x0f, 0x5c];
|
||||
|
||||
/// Subtract the low double-precision floating-point value in xmm2/m64 from xmm1
|
||||
/// and store the result in xmm1.
|
||||
pub static SUBSD: [u8; 3] = [0xf2, 0x0f, 0x5c];
|
||||
|
||||
/// Subtract the low single-precision floating-point value in xmm2/m32 from xmm1
|
||||
/// and store the result in xmm1.
|
||||
pub static SUBSS: [u8; 3] = [0xf3, 0x0f, 0x5c];
|
||||
|
||||
/// AND r8 with r/m8; set SF, ZF, PF according to result.
|
||||
pub static TEST_BYTE_REG: [u8; 1] = [0x84];
|
||||
|
||||
/// AND {r16, r32, r64} with r/m of the same size; set SF, ZF, PF according to result.
|
||||
pub static TEST_REG: [u8; 1] = [0x85];
|
||||
|
||||
/// Count the number of trailing zero bits.
|
||||
pub static TZCNT: [u8; 3] = [0xf3, 0x0f, 0xbc];
|
||||
|
||||
/// Compare low double-precision floating-point values in xmm1 and xmm2/mem64
|
||||
/// and set the EFLAGS flags accordingly.
|
||||
pub static UCOMISD: [u8; 3] = [0x66, 0x0f, 0x2e];
|
||||
|
||||
/// Compare low single-precision floating-point values in xmm1 and xmm2/mem32
|
||||
/// and set the EFLAGS flags accordingly.
|
||||
pub static UCOMISS: [u8; 2] = [0x0f, 0x2e];
|
||||
|
||||
/// Raise invalid opcode instruction.
|
||||
pub static UNDEFINED2: [u8; 2] = [0x0f, 0x0b];
|
||||
|
||||
/// Convert four packed unsigned doubleword integers from xmm2/m128/m32bcst to packed
|
||||
/// single-precision floating-point values in xmm1 with writemask k1. Rounding behavior
|
||||
/// is controlled by MXCSR but can be overriden by EVEX.L'L in static rounding mode
|
||||
/// (AVX512VL, AVX512F).
|
||||
pub static VCVTUDQ2PS: [u8; 3] = [0xf2, 0x0f, 0x7a];
|
||||
|
||||
/// imm{16,32} XOR r/m{16,32,64}, possibly sign-extended.
|
||||
pub static XOR_IMM: [u8; 1] = [0x81];
|
||||
|
||||
/// r/m{16,32,64} XOR sign-extended imm8.
|
||||
pub static XOR_IMM8_SIGN_EXTEND: [u8; 1] = [0x83];
|
||||
|
||||
/// r/m{16,32,64} XOR register of the same size.
|
||||
pub static XOR: [u8; 1] = [0x31];
|
||||
|
||||
/// Bitwise logical XOR of packed double-precision floating-point values.
|
||||
pub static XORPD: [u8; 3] = [0x66, 0x0f, 0x57];
|
||||
|
||||
/// Bitwise logical XOR of packed single-precision floating-point values.
|
||||
pub static XORPS: [u8; 2] = [0x0f, 0x57];
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,43 +0,0 @@
|
||||
use crate::cdsl::regs::{IsaRegs, IsaRegsBuilder, RegBankBuilder, RegClassBuilder};
|
||||
|
||||
pub(crate) fn define() -> IsaRegs {
|
||||
let mut regs = IsaRegsBuilder::new();
|
||||
|
||||
let builder = RegBankBuilder::new("FloatRegs", "xmm")
|
||||
.units(16)
|
||||
.track_pressure(true);
|
||||
let float_regs = regs.add_bank(builder);
|
||||
|
||||
let builder = RegBankBuilder::new("IntRegs", "r")
|
||||
.units(16)
|
||||
.names(vec!["rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi"])
|
||||
.track_pressure(true)
|
||||
.pinned_reg(15);
|
||||
let int_regs = regs.add_bank(builder);
|
||||
|
||||
let builder = RegBankBuilder::new("FlagRegs", "")
|
||||
.units(1)
|
||||
.names(vec!["rflags"])
|
||||
.track_pressure(false);
|
||||
let flag_reg = regs.add_bank(builder);
|
||||
|
||||
let builder = RegClassBuilder::new_toplevel("GPR", int_regs);
|
||||
let gpr = regs.add_class(builder);
|
||||
|
||||
let builder = RegClassBuilder::new_toplevel("FPR", float_regs);
|
||||
let fpr = regs.add_class(builder);
|
||||
|
||||
let builder = RegClassBuilder::new_toplevel("FLAG", flag_reg);
|
||||
regs.add_class(builder);
|
||||
|
||||
let builder = RegClassBuilder::subclass_of("GPR8", gpr, 0, 8);
|
||||
let gpr8 = regs.add_class(builder);
|
||||
|
||||
let builder = RegClassBuilder::subclass_of("ABCD", gpr8, 0, 4);
|
||||
regs.add_class(builder);
|
||||
|
||||
let builder = RegClassBuilder::subclass_of("FPR8", fpr, 0, 8);
|
||||
regs.add_class(builder);
|
||||
|
||||
regs.build()
|
||||
}
|
||||
@@ -1,3 +0,0 @@
|
||||
//! Shared ISA-specific definitions.
|
||||
|
||||
pub mod x86;
|
||||
@@ -1,419 +0,0 @@
|
||||
//! Provides a named interface to the `u16` Encoding bits.
|
||||
|
||||
use std::ops::RangeInclusive;
|
||||
|
||||
/// Named interface to the `u16` Encoding bits, representing an opcode.
|
||||
///
|
||||
/// Cranelift requires each recipe to have a single encoding size in bytes.
|
||||
/// X86 opcodes are variable length, so we use separate recipes for different
|
||||
/// styles of opcodes and prefixes. The opcode format is indicated by the
|
||||
/// recipe name prefix.
|
||||
///
|
||||
/// VEX/XOP and EVEX prefixes are not yet supported.
|
||||
/// Encodings using any of these prefixes are represented by separate recipes.
|
||||
///
|
||||
/// The encoding bits are:
|
||||
///
|
||||
/// 0-7: The opcode byte <op>.
|
||||
/// 8-9: pp, mandatory prefix:
|
||||
/// 00: none (Op*)
|
||||
/// 01: 66 (Mp*)
|
||||
/// 10: F3 (Mp*)
|
||||
/// 11: F2 (Mp*)
|
||||
/// 10-11: mm, opcode map:
|
||||
/// 00: <op> (Op1/Mp1)
|
||||
/// 01: 0F <op> (Op2/Mp2)
|
||||
/// 10: 0F 38 <op> (Op3/Mp3)
|
||||
/// 11: 0F 3A <op> (Op3/Mp3)
|
||||
/// 12-14 rrr, opcode bits for the ModR/M byte for certain opcodes.
|
||||
/// 15: REX.W bit (or VEX.W/E)
|
||||
#[derive(Copy, Clone, PartialEq)]
|
||||
pub struct EncodingBits(u16);
|
||||
const OPCODE: RangeInclusive<u16> = 0..=7;
|
||||
const OPCODE_PREFIX: RangeInclusive<u16> = 8..=11; // Includes pp and mm.
|
||||
const RRR: RangeInclusive<u16> = 12..=14;
|
||||
const REX_W: RangeInclusive<u16> = 15..=15;
|
||||
|
||||
impl From<u16> for EncodingBits {
|
||||
fn from(bits: u16) -> Self {
|
||||
Self(bits)
|
||||
}
|
||||
}
|
||||
|
||||
impl EncodingBits {
|
||||
/// Constructs a new EncodingBits from parts.
|
||||
pub fn new(op_bytes: &[u8], rrr: u16, rex_w: u16) -> Self {
|
||||
assert!(
|
||||
!op_bytes.is_empty(),
|
||||
"op_bytes must include at least one opcode byte"
|
||||
);
|
||||
let mut new = Self::from(0);
|
||||
let last_byte = op_bytes[op_bytes.len() - 1];
|
||||
new.write(OPCODE, last_byte as u16);
|
||||
let prefix: u8 = OpcodePrefix::from_opcode(op_bytes).into();
|
||||
new.write(OPCODE_PREFIX, prefix as u16);
|
||||
new.write(RRR, rrr);
|
||||
new.write(REX_W, rex_w);
|
||||
new
|
||||
}
|
||||
|
||||
/// Returns a copy of the EncodingBits with the RRR bits set.
|
||||
#[inline]
|
||||
pub fn with_rrr(mut self, rrr: u8) -> Self {
|
||||
debug_assert_eq!(self.rrr(), 0);
|
||||
self.write(RRR, rrr.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Returns a copy of the EncodingBits with the REX.W bit set.
|
||||
#[inline]
|
||||
pub fn with_rex_w(mut self) -> Self {
|
||||
debug_assert_eq!(self.rex_w(), 0);
|
||||
self.write(REX_W, 1);
|
||||
self
|
||||
}
|
||||
|
||||
/// Returns the raw bits.
|
||||
#[inline]
|
||||
pub fn bits(self) -> u16 {
|
||||
self.0
|
||||
}
|
||||
|
||||
/// Convenience method for writing bits to specific range.
|
||||
#[inline]
|
||||
fn write(&mut self, range: RangeInclusive<u16>, value: u16) {
|
||||
assert!(ExactSizeIterator::len(&range) > 0);
|
||||
let size = range.end() - range.start() + 1; // Calculate the number of bits in the range.
|
||||
let mask = (1 << size) - 1; // Generate a bit mask.
|
||||
debug_assert!(
|
||||
value <= mask,
|
||||
"The written value should have fewer than {} bits.",
|
||||
size
|
||||
);
|
||||
let mask_complement = !(mask << *range.start()); // Create the bitwise complement for the clear mask.
|
||||
self.0 &= mask_complement; // Clear the bits in `range`.
|
||||
let value = (value & mask) << *range.start(); // Place the value in the correct location.
|
||||
self.0 |= value; // Modify the bits in `range`.
|
||||
}
|
||||
|
||||
/// Convenience method for reading bits from a specific range.
|
||||
#[inline]
|
||||
fn read(self, range: RangeInclusive<u16>) -> u8 {
|
||||
assert!(ExactSizeIterator::len(&range) > 0);
|
||||
let size = range.end() - range.start() + 1; // Calculate the number of bits in the range.
|
||||
debug_assert!(size <= 8, "This structure expects ranges of at most 8 bits");
|
||||
let mask = (1 << size) - 1; // Generate a bit mask.
|
||||
((self.0 >> *range.start()) & mask) as u8
|
||||
}
|
||||
|
||||
/// Instruction opcode byte, without the prefix.
|
||||
#[inline]
|
||||
pub fn opcode_byte(self) -> u8 {
|
||||
self.read(OPCODE)
|
||||
}
|
||||
|
||||
/// Prefix kind for the instruction, as an enum.
|
||||
#[inline]
|
||||
pub fn prefix(self) -> OpcodePrefix {
|
||||
OpcodePrefix::from(self.read(OPCODE_PREFIX))
|
||||
}
|
||||
|
||||
/// Extracts the PP bits of the OpcodePrefix.
|
||||
#[inline]
|
||||
pub fn pp(self) -> u8 {
|
||||
self.prefix().to_primitive() & 0x3
|
||||
}
|
||||
|
||||
/// Extracts the MM bits of the OpcodePrefix.
|
||||
#[inline]
|
||||
pub fn mm(self) -> u8 {
|
||||
(self.prefix().to_primitive() >> 2) & 0x3
|
||||
}
|
||||
|
||||
/// Bits for the ModR/M byte for certain opcodes.
|
||||
#[inline]
|
||||
pub fn rrr(self) -> u8 {
|
||||
self.read(RRR)
|
||||
}
|
||||
|
||||
/// REX.W bit (or VEX.W/E).
|
||||
#[inline]
|
||||
pub fn rex_w(self) -> u8 {
|
||||
self.read(REX_W)
|
||||
}
|
||||
}
|
||||
|
||||
/// Opcode prefix representation.
|
||||
///
|
||||
/// The prefix type occupies four of the EncodingBits.
|
||||
#[allow(non_camel_case_types)]
|
||||
#[allow(missing_docs)]
|
||||
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
|
||||
pub enum OpcodePrefix {
|
||||
Op1,
|
||||
Mp1_66,
|
||||
Mp1_f3,
|
||||
Mp1_f2,
|
||||
Op2_0f,
|
||||
Mp2_66_0f,
|
||||
Mp2_f3_0f,
|
||||
Mp2_f2_0f,
|
||||
Op3_0f_38,
|
||||
Mp3_66_0f_38,
|
||||
Mp3_f3_0f_38,
|
||||
Mp3_f2_0f_38,
|
||||
Op3_0f_3a,
|
||||
Mp3_66_0f_3a,
|
||||
Mp3_f3_0f_3a,
|
||||
Mp3_f2_0f_3a,
|
||||
}
|
||||
|
||||
impl From<u8> for OpcodePrefix {
|
||||
fn from(n: u8) -> Self {
|
||||
use OpcodePrefix::*;
|
||||
match n {
|
||||
0b0000 => Op1,
|
||||
0b0001 => Mp1_66,
|
||||
0b0010 => Mp1_f3,
|
||||
0b0011 => Mp1_f2,
|
||||
0b0100 => Op2_0f,
|
||||
0b0101 => Mp2_66_0f,
|
||||
0b0110 => Mp2_f3_0f,
|
||||
0b0111 => Mp2_f2_0f,
|
||||
0b1000 => Op3_0f_38,
|
||||
0b1001 => Mp3_66_0f_38,
|
||||
0b1010 => Mp3_f3_0f_38,
|
||||
0b1011 => Mp3_f2_0f_38,
|
||||
0b1100 => Op3_0f_3a,
|
||||
0b1101 => Mp3_66_0f_3a,
|
||||
0b1110 => Mp3_f3_0f_3a,
|
||||
0b1111 => Mp3_f2_0f_3a,
|
||||
_ => panic!("invalid opcode prefix"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Into<u8> for OpcodePrefix {
|
||||
fn into(self) -> u8 {
|
||||
use OpcodePrefix::*;
|
||||
match self {
|
||||
Op1 => 0b0000,
|
||||
Mp1_66 => 0b0001,
|
||||
Mp1_f3 => 0b0010,
|
||||
Mp1_f2 => 0b0011,
|
||||
Op2_0f => 0b0100,
|
||||
Mp2_66_0f => 0b0101,
|
||||
Mp2_f3_0f => 0b0110,
|
||||
Mp2_f2_0f => 0b0111,
|
||||
Op3_0f_38 => 0b1000,
|
||||
Mp3_66_0f_38 => 0b1001,
|
||||
Mp3_f3_0f_38 => 0b1010,
|
||||
Mp3_f2_0f_38 => 0b1011,
|
||||
Op3_0f_3a => 0b1100,
|
||||
Mp3_66_0f_3a => 0b1101,
|
||||
Mp3_f3_0f_3a => 0b1110,
|
||||
Mp3_f2_0f_3a => 0b1111,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl OpcodePrefix {
|
||||
/// Convert an opcode prefix to a `u8`; this is a convenience proxy for `Into<u8>`.
|
||||
fn to_primitive(self) -> u8 {
|
||||
self.into()
|
||||
}
|
||||
|
||||
/// Extracts the OpcodePrefix from the opcode.
|
||||
pub fn from_opcode(op_bytes: &[u8]) -> Self {
|
||||
assert!(!op_bytes.is_empty(), "at least one opcode byte");
|
||||
|
||||
let prefix_bytes = &op_bytes[..op_bytes.len() - 1];
|
||||
match prefix_bytes {
|
||||
[] => Self::Op1,
|
||||
[0x66] => Self::Mp1_66,
|
||||
[0xf3] => Self::Mp1_f3,
|
||||
[0xf2] => Self::Mp1_f2,
|
||||
[0x0f] => Self::Op2_0f,
|
||||
[0x66, 0x0f] => Self::Mp2_66_0f,
|
||||
[0xf3, 0x0f] => Self::Mp2_f3_0f,
|
||||
[0xf2, 0x0f] => Self::Mp2_f2_0f,
|
||||
[0x0f, 0x38] => Self::Op3_0f_38,
|
||||
[0x66, 0x0f, 0x38] => Self::Mp3_66_0f_38,
|
||||
[0xf3, 0x0f, 0x38] => Self::Mp3_f3_0f_38,
|
||||
[0xf2, 0x0f, 0x38] => Self::Mp3_f2_0f_38,
|
||||
[0x0f, 0x3a] => Self::Op3_0f_3a,
|
||||
[0x66, 0x0f, 0x3a] => Self::Mp3_66_0f_3a,
|
||||
[0xf3, 0x0f, 0x3a] => Self::Mp3_f3_0f_3a,
|
||||
[0xf2, 0x0f, 0x3a] => Self::Mp3_f2_0f_3a,
|
||||
_ => {
|
||||
panic!("unexpected opcode sequence: {:?}", op_bytes);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the recipe name prefix.
|
||||
///
|
||||
/// At the moment, each similar OpcodePrefix group is given its own Recipe.
|
||||
/// In order to distinguish them, this string is prefixed.
|
||||
pub fn recipe_name_prefix(self) -> &'static str {
|
||||
use OpcodePrefix::*;
|
||||
match self {
|
||||
Op1 => "Op1",
|
||||
Op2_0f => "Op2",
|
||||
Op3_0f_38 | Op3_0f_3a => "Op3",
|
||||
Mp1_66 | Mp1_f3 | Mp1_f2 => "Mp1",
|
||||
Mp2_66_0f | Mp2_f3_0f | Mp2_f2_0f => "Mp2",
|
||||
Mp3_66_0f_38 | Mp3_f3_0f_38 | Mp3_f2_0f_38 => "Mp3",
|
||||
Mp3_66_0f_3a | Mp3_f3_0f_3a | Mp3_f2_0f_3a => "Mp3",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
/// Helper function for prefix_roundtrip() to avoid long lines.
|
||||
fn test_roundtrip(p: OpcodePrefix) {
|
||||
assert_eq!(p, OpcodePrefix::from(p.to_primitive()));
|
||||
}
|
||||
|
||||
/// Tests that to/from each opcode matches.
|
||||
#[test]
|
||||
fn prefix_roundtrip() {
|
||||
test_roundtrip(OpcodePrefix::Op1);
|
||||
test_roundtrip(OpcodePrefix::Mp1_66);
|
||||
test_roundtrip(OpcodePrefix::Mp1_f3);
|
||||
test_roundtrip(OpcodePrefix::Mp1_f2);
|
||||
test_roundtrip(OpcodePrefix::Op2_0f);
|
||||
test_roundtrip(OpcodePrefix::Mp2_66_0f);
|
||||
test_roundtrip(OpcodePrefix::Mp2_f3_0f);
|
||||
test_roundtrip(OpcodePrefix::Mp2_f2_0f);
|
||||
test_roundtrip(OpcodePrefix::Op3_0f_38);
|
||||
test_roundtrip(OpcodePrefix::Mp3_66_0f_38);
|
||||
test_roundtrip(OpcodePrefix::Mp3_f3_0f_38);
|
||||
test_roundtrip(OpcodePrefix::Mp3_f2_0f_38);
|
||||
test_roundtrip(OpcodePrefix::Op3_0f_3a);
|
||||
test_roundtrip(OpcodePrefix::Mp3_66_0f_3a);
|
||||
test_roundtrip(OpcodePrefix::Mp3_f3_0f_3a);
|
||||
test_roundtrip(OpcodePrefix::Mp3_f2_0f_3a);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn prefix_to_name() {
|
||||
assert_eq!(OpcodePrefix::Op1.recipe_name_prefix(), "Op1");
|
||||
assert_eq!(OpcodePrefix::Op2_0f.recipe_name_prefix(), "Op2");
|
||||
assert_eq!(OpcodePrefix::Op3_0f_38.recipe_name_prefix(), "Op3");
|
||||
assert_eq!(OpcodePrefix::Mp1_66.recipe_name_prefix(), "Mp1");
|
||||
assert_eq!(OpcodePrefix::Mp2_66_0f.recipe_name_prefix(), "Mp2");
|
||||
assert_eq!(OpcodePrefix::Mp3_66_0f_3a.recipe_name_prefix(), "Mp3");
|
||||
}
|
||||
|
||||
/// Tests that the opcode_byte is the lower of the EncodingBits.
|
||||
#[test]
|
||||
fn encodingbits_opcode_byte() {
|
||||
let enc = EncodingBits::from(0x00ff);
|
||||
assert_eq!(enc.opcode_byte(), 0xff);
|
||||
assert_eq!(enc.prefix().to_primitive(), 0x0);
|
||||
assert_eq!(enc.rrr(), 0x0);
|
||||
assert_eq!(enc.rex_w(), 0x0);
|
||||
|
||||
let enc = EncodingBits::from(0x00cd);
|
||||
assert_eq!(enc.opcode_byte(), 0xcd);
|
||||
}
|
||||
|
||||
/// Tests that the OpcodePrefix is encoded correctly.
|
||||
#[test]
|
||||
fn encodingbits_prefix() {
|
||||
let enc = EncodingBits::from(0x0c00);
|
||||
assert_eq!(enc.opcode_byte(), 0x00);
|
||||
assert_eq!(enc.prefix().to_primitive(), 0xc);
|
||||
assert_eq!(enc.prefix(), OpcodePrefix::Op3_0f_3a);
|
||||
assert_eq!(enc.rrr(), 0x0);
|
||||
assert_eq!(enc.rex_w(), 0x0);
|
||||
}
|
||||
|
||||
/// Tests that the PP bits are encoded correctly.
|
||||
#[test]
|
||||
fn encodingbits_pp() {
|
||||
let enc = EncodingBits::from(0x0300);
|
||||
assert_eq!(enc.opcode_byte(), 0x0);
|
||||
assert_eq!(enc.pp(), 0x3);
|
||||
assert_eq!(enc.mm(), 0x0);
|
||||
assert_eq!(enc.rrr(), 0x0);
|
||||
assert_eq!(enc.rex_w(), 0x0);
|
||||
}
|
||||
|
||||
/// Tests that the MM bits are encoded correctly.
|
||||
#[test]
|
||||
fn encodingbits_mm() {
|
||||
let enc = EncodingBits::from(0x0c00);
|
||||
assert_eq!(enc.opcode_byte(), 0x0);
|
||||
assert_eq!(enc.pp(), 0x00);
|
||||
assert_eq!(enc.mm(), 0x3);
|
||||
assert_eq!(enc.rrr(), 0x0);
|
||||
assert_eq!(enc.rex_w(), 0x0);
|
||||
}
|
||||
|
||||
/// Tests that the ModR/M bits are encoded correctly.
|
||||
#[test]
|
||||
fn encodingbits_rrr() {
|
||||
let enc = EncodingBits::from(0x5000);
|
||||
assert_eq!(enc.opcode_byte(), 0x0);
|
||||
assert_eq!(enc.prefix().to_primitive(), 0x0);
|
||||
assert_eq!(enc.rrr(), 0x5);
|
||||
assert_eq!(enc.rex_w(), 0x0);
|
||||
}
|
||||
|
||||
/// Tests that the REX.W bit is encoded correctly.
|
||||
#[test]
|
||||
fn encodingbits_rex_w() {
|
||||
let enc = EncodingBits::from(0x8000);
|
||||
assert_eq!(enc.opcode_byte(), 0x00);
|
||||
assert_eq!(enc.prefix().to_primitive(), 0x0);
|
||||
assert_eq!(enc.rrr(), 0x0);
|
||||
assert_eq!(enc.rex_w(), 0x1);
|
||||
}
|
||||
|
||||
/// Tests setting and unsetting a bit using EncodingBits::write.
|
||||
#[test]
|
||||
fn encodingbits_flip() {
|
||||
let mut bits = EncodingBits::from(0);
|
||||
let range = 2..=2;
|
||||
|
||||
bits.write(range.clone(), 1);
|
||||
assert_eq!(bits.bits(), 0b100);
|
||||
|
||||
bits.write(range, 0);
|
||||
assert_eq!(bits.bits(), 0b000);
|
||||
}
|
||||
|
||||
/// Tests a round-trip of EncodingBits from/to a u16 (hardcoded endianness).
|
||||
#[test]
|
||||
fn encodingbits_roundtrip() {
|
||||
let bits: u16 = 0x1234;
|
||||
assert_eq!(EncodingBits::from(bits).bits(), bits);
|
||||
}
|
||||
|
||||
#[test]
|
||||
// I purposely want to divide the bits using the ranges defined above.
|
||||
#[allow(clippy::inconsistent_digit_grouping)]
|
||||
fn encodingbits_construction() {
|
||||
assert_eq!(
|
||||
EncodingBits::new(&[0x66, 0x40], 5, 1).bits(),
|
||||
0b1_101_0001_01000000 // 1 = rex_w, 101 = rrr, 0001 = prefix, 01000000 = opcode
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn encodingbits_panics_at_write_to_invalid_range() {
|
||||
EncodingBits::from(0).write(1..=0, 42);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[should_panic]
|
||||
fn encodingbits_panics_at_read_to_invalid_range() {
|
||||
EncodingBits::from(0).read(1..=0);
|
||||
}
|
||||
}
|
||||
@@ -1,4 +0,0 @@
|
||||
//! Shared x86-specific definitions.
|
||||
|
||||
mod encoding_bits;
|
||||
pub use encoding_bits::*;
|
||||
@@ -22,7 +22,6 @@
|
||||
pub mod condcodes;
|
||||
pub mod constant_hash;
|
||||
pub mod constants;
|
||||
pub mod isa;
|
||||
|
||||
/// Version number of this crate.
|
||||
pub const VERSION: &str = env!("CARGO_PKG_VERSION");
|
||||
|
||||
@@ -3565,45 +3565,6 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
panic!("ALU+imm and ALU+carry ops should not appear here!");
|
||||
}
|
||||
|
||||
#[cfg(feature = "x86")]
|
||||
Opcode::X86Udivmodx
|
||||
| Opcode::X86Sdivmodx
|
||||
| Opcode::X86Umulx
|
||||
| Opcode::X86Smulx
|
||||
| Opcode::X86Cvtt2si
|
||||
| Opcode::X86Fmin
|
||||
| Opcode::X86Fmax
|
||||
| Opcode::X86Push
|
||||
| Opcode::X86Pop
|
||||
| Opcode::X86Bsr
|
||||
| Opcode::X86Bsf
|
||||
| Opcode::X86Pblendw
|
||||
| Opcode::X86Pshufd
|
||||
| Opcode::X86Pshufb
|
||||
| Opcode::X86Pextr
|
||||
| Opcode::X86Pinsr
|
||||
| Opcode::X86Insertps
|
||||
| Opcode::X86Movsd
|
||||
| Opcode::X86Movlhps
|
||||
| Opcode::X86Palignr
|
||||
| Opcode::X86Psll
|
||||
| Opcode::X86Psrl
|
||||
| Opcode::X86Psra
|
||||
| Opcode::X86Ptest
|
||||
| Opcode::X86Pmaxs
|
||||
| Opcode::X86Pmaxu
|
||||
| Opcode::X86Pmins
|
||||
| Opcode::X86Pminu
|
||||
| Opcode::X86Pmullq
|
||||
| Opcode::X86Pmuludq
|
||||
| Opcode::X86Punpckh
|
||||
| Opcode::X86Punpckl
|
||||
| Opcode::X86Vcvtudq2ps
|
||||
| Opcode::X86ElfTlsGetAddr
|
||||
| Opcode::X86MachoTlsGetAddr => {
|
||||
panic!("x86-specific opcode in supposedly arch-neutral IR!");
|
||||
}
|
||||
|
||||
Opcode::DummySargT => unreachable!(),
|
||||
|
||||
Opcode::Iabs => {
|
||||
|
||||
@@ -1,12 +1,4 @@
|
||||
//! Legacy ("old-style") backends that will be removed in the future.
|
||||
|
||||
// N.B.: the old x86-64 backend (`x86`) and the new one (`x64`) are both
|
||||
// included whenever building with x86 support. The new backend is the default,
|
||||
// but the old can be requested with `BackendVariant::Legacy`. However, if this
|
||||
// crate is built with the `old-x86-backend` feature, then the old backend is
|
||||
// default instead.
|
||||
#[cfg(feature = "x86")]
|
||||
pub(crate) mod x86;
|
||||
|
||||
#[cfg(feature = "riscv")]
|
||||
pub(crate) mod riscv;
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,578 +0,0 @@
|
||||
//! Emitting binary x86 machine code.
|
||||
|
||||
use super::enc_tables::{needs_offset, needs_sib_byte};
|
||||
use super::registers::RU;
|
||||
use crate::binemit::{bad_encoding, CodeSink, Reloc};
|
||||
use crate::ir::condcodes::{CondCode, FloatCC, IntCC};
|
||||
use crate::ir::{
|
||||
Block, Constant, ExternalName, Function, Inst, InstructionData, JumpTable, LibCall, Opcode,
|
||||
TrapCode,
|
||||
};
|
||||
use crate::isa::{RegUnit, StackBase, StackBaseMask, StackRef, TargetIsa};
|
||||
use crate::regalloc::RegDiversions;
|
||||
use cranelift_codegen_shared::isa::x86::EncodingBits;
|
||||
|
||||
include!(concat!(env!("OUT_DIR"), "/binemit-x86.rs"));
|
||||
|
||||
// Convert a stack base to the corresponding register.
|
||||
fn stk_base(base: StackBase) -> RegUnit {
|
||||
let ru = match base {
|
||||
StackBase::SP => RU::rsp,
|
||||
StackBase::FP => RU::rbp,
|
||||
StackBase::Zone => unimplemented!(),
|
||||
};
|
||||
ru as RegUnit
|
||||
}
|
||||
|
||||
// Mandatory prefix bytes for Mp* opcodes.
|
||||
const PREFIX: [u8; 3] = [0x66, 0xf3, 0xf2];
|
||||
|
||||
// Second byte for three-byte opcodes for mm=0b10 and mm=0b11.
|
||||
const OP3_BYTE2: [u8; 2] = [0x38, 0x3a];
|
||||
|
||||
// A REX prefix with no bits set: 0b0100WRXB.
|
||||
const BASE_REX: u8 = 0b0100_0000;
|
||||
|
||||
// Create a single-register REX prefix, setting the B bit to bit 3 of the register.
|
||||
// This is used for instructions that encode a register in the low 3 bits of the opcode and for
|
||||
// instructions that use the ModR/M `reg` field for something else.
|
||||
fn rex1(reg_b: RegUnit) -> u8 {
|
||||
let b = ((reg_b >> 3) & 1) as u8;
|
||||
BASE_REX | b
|
||||
}
|
||||
|
||||
// Create a dual-register REX prefix, setting:
|
||||
//
|
||||
// REX.B = bit 3 of r/m register, or SIB base register when a SIB byte is present.
|
||||
// REX.R = bit 3 of reg register.
|
||||
fn rex2(rm: RegUnit, reg: RegUnit) -> u8 {
|
||||
let b = ((rm >> 3) & 1) as u8;
|
||||
let r = ((reg >> 3) & 1) as u8;
|
||||
BASE_REX | b | (r << 2)
|
||||
}
|
||||
|
||||
// Create a three-register REX prefix, setting:
|
||||
//
|
||||
// REX.B = bit 3 of r/m register, or SIB base register when a SIB byte is present.
|
||||
// REX.R = bit 3 of reg register.
|
||||
// REX.X = bit 3 of SIB index register.
|
||||
fn rex3(rm: RegUnit, reg: RegUnit, index: RegUnit) -> u8 {
|
||||
let b = ((rm >> 3) & 1) as u8;
|
||||
let r = ((reg >> 3) & 1) as u8;
|
||||
let x = ((index >> 3) & 1) as u8;
|
||||
BASE_REX | b | (x << 1) | (r << 2)
|
||||
}
|
||||
|
||||
/// Encode the RXBR' bits of the EVEX P0 byte. For an explanation of these bits, see section 2.6.1
|
||||
/// in the Intel Software Development Manual, volume 2A. These bits can be used by different
|
||||
/// addressing modes (see section 2.6.2), requiring different `vex*` functions than this one.
|
||||
fn evex2(rm: RegUnit, reg: RegUnit) -> u8 {
|
||||
let b = (!(rm >> 3) & 1) as u8;
|
||||
let x = (!(rm >> 4) & 1) as u8;
|
||||
let r = (!(reg >> 3) & 1) as u8;
|
||||
let r_ = (!(reg >> 4) & 1) as u8;
|
||||
0x00 | r_ | (b << 1) | (x << 2) | (r << 3)
|
||||
}
|
||||
|
||||
/// Determines whether a REX prefix should be emitted. A REX byte always has 0100 in bits 7:4; bits
|
||||
/// 3:0 correspond to WRXB. W allows certain instructions to declare a 64-bit operand size; because
|
||||
/// [needs_rex] is only used by [infer_rex] and we prevent [infer_rex] from using [w] in
|
||||
/// [Template::build], we do not need to check again whether [w] forces an inferred REX prefix--it
|
||||
/// always does and should be encoded like `.rex().w()`. The RXB are extension of ModR/M or SIB
|
||||
/// fields; see section 2.2.1.2 in the Intel Software Development Manual.
|
||||
#[inline]
|
||||
fn needs_rex(rex: u8) -> bool {
|
||||
rex != BASE_REX
|
||||
}
|
||||
|
||||
// Emit a REX prefix.
|
||||
//
|
||||
// The R, X, and B bits are computed from registers using the functions above. The W bit is
|
||||
// extracted from `bits`.
|
||||
fn rex_prefix<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
debug_assert_eq!(rex & 0xf8, BASE_REX);
|
||||
let w = EncodingBits::from(bits).rex_w();
|
||||
sink.put1(rex | (w << 3));
|
||||
}
|
||||
|
||||
// Emit a single-byte opcode with no REX prefix.
|
||||
fn put_op1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
debug_assert_eq!(bits & 0x8f00, 0, "Invalid encoding bits for Op1*");
|
||||
debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Op1 encoding");
|
||||
sink.put1(bits as u8);
|
||||
}
|
||||
|
||||
// Emit a single-byte opcode with REX prefix.
|
||||
fn put_rexop1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
debug_assert_eq!(bits & 0x0f00, 0, "Invalid encoding bits for RexOp1*");
|
||||
rex_prefix(bits, rex, sink);
|
||||
sink.put1(bits as u8);
|
||||
}
|
||||
|
||||
/// Emit a single-byte opcode with inferred REX prefix.
|
||||
fn put_dynrexop1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
debug_assert_eq!(bits & 0x0f00, 0, "Invalid encoding bits for DynRexOp1*");
|
||||
if needs_rex(rex) {
|
||||
rex_prefix(bits, rex, sink);
|
||||
}
|
||||
sink.put1(bits as u8);
|
||||
}
|
||||
|
||||
// Emit two-byte opcode: 0F XX
|
||||
fn put_op2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
debug_assert_eq!(bits & 0x8f00, 0x0400, "Invalid encoding bits for Op2*");
|
||||
debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Op2 encoding");
|
||||
sink.put1(0x0f);
|
||||
sink.put1(bits as u8);
|
||||
}
|
||||
|
||||
// Emit two-byte opcode: 0F XX with REX prefix.
|
||||
fn put_rexop2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
debug_assert_eq!(bits & 0x0f00, 0x0400, "Invalid encoding bits for RexOp2*");
|
||||
rex_prefix(bits, rex, sink);
|
||||
sink.put1(0x0f);
|
||||
sink.put1(bits as u8);
|
||||
}
|
||||
|
||||
/// Emit two-byte opcode: 0F XX with inferred REX prefix.
|
||||
fn put_dynrexop2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
debug_assert_eq!(
|
||||
bits & 0x0f00,
|
||||
0x0400,
|
||||
"Invalid encoding bits for DynRexOp2*"
|
||||
);
|
||||
if needs_rex(rex) {
|
||||
rex_prefix(bits, rex, sink);
|
||||
}
|
||||
sink.put1(0x0f);
|
||||
sink.put1(bits as u8);
|
||||
}
|
||||
|
||||
// Emit single-byte opcode with mandatory prefix.
|
||||
fn put_mp1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
debug_assert_eq!(bits & 0x8c00, 0, "Invalid encoding bits for Mp1*");
|
||||
let enc = EncodingBits::from(bits);
|
||||
sink.put1(PREFIX[(enc.pp() - 1) as usize]);
|
||||
debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp1 encoding");
|
||||
sink.put1(bits as u8);
|
||||
}
|
||||
|
||||
// Emit single-byte opcode with mandatory prefix and REX.
|
||||
fn put_rexmp1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
debug_assert_eq!(bits & 0x0c00, 0, "Invalid encoding bits for RexMp1*");
|
||||
let enc = EncodingBits::from(bits);
|
||||
sink.put1(PREFIX[(enc.pp() - 1) as usize]);
|
||||
rex_prefix(bits, rex, sink);
|
||||
sink.put1(bits as u8);
|
||||
}
|
||||
|
||||
// Emit two-byte opcode (0F XX) with mandatory prefix.
|
||||
fn put_mp2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
debug_assert_eq!(bits & 0x8c00, 0x0400, "Invalid encoding bits for Mp2*");
|
||||
let enc = EncodingBits::from(bits);
|
||||
sink.put1(PREFIX[(enc.pp() - 1) as usize]);
|
||||
debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp2 encoding");
|
||||
sink.put1(0x0f);
|
||||
sink.put1(bits as u8);
|
||||
}
|
||||
|
||||
// Emit two-byte opcode (0F XX) with mandatory prefix and REX.
|
||||
fn put_rexmp2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
debug_assert_eq!(bits & 0x0c00, 0x0400, "Invalid encoding bits for RexMp2*");
|
||||
let enc = EncodingBits::from(bits);
|
||||
sink.put1(PREFIX[(enc.pp() - 1) as usize]);
|
||||
rex_prefix(bits, rex, sink);
|
||||
sink.put1(0x0f);
|
||||
sink.put1(bits as u8);
|
||||
}
|
||||
|
||||
/// Emit two-byte opcode (0F XX) with mandatory prefix and inferred REX.
|
||||
fn put_dynrexmp2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
debug_assert_eq!(
|
||||
bits & 0x0c00,
|
||||
0x0400,
|
||||
"Invalid encoding bits for DynRexMp2*"
|
||||
);
|
||||
let enc = EncodingBits::from(bits);
|
||||
sink.put1(PREFIX[(enc.pp() - 1) as usize]);
|
||||
if needs_rex(rex) {
|
||||
rex_prefix(bits, rex, sink);
|
||||
}
|
||||
sink.put1(0x0f);
|
||||
sink.put1(bits as u8);
|
||||
}
|
||||
|
||||
/// Emit three-byte opcode (0F 3[8A] XX) with mandatory prefix.
|
||||
fn put_mp3<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
debug_assert_eq!(bits & 0x8800, 0x0800, "Invalid encoding bits for Mp3*");
|
||||
debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp3 encoding");
|
||||
let enc = EncodingBits::from(bits);
|
||||
sink.put1(PREFIX[(enc.pp() - 1) as usize]);
|
||||
sink.put1(0x0f);
|
||||
sink.put1(OP3_BYTE2[(enc.mm() - 2) as usize]);
|
||||
sink.put1(bits as u8);
|
||||
}
|
||||
|
||||
/// Emit three-byte opcode (0F 3[8A] XX) with mandatory prefix and REX
|
||||
fn put_rexmp3<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
debug_assert_eq!(bits & 0x0800, 0x0800, "Invalid encoding bits for RexMp3*");
|
||||
let enc = EncodingBits::from(bits);
|
||||
sink.put1(PREFIX[(enc.pp() - 1) as usize]);
|
||||
rex_prefix(bits, rex, sink);
|
||||
sink.put1(0x0f);
|
||||
sink.put1(OP3_BYTE2[(enc.mm() - 2) as usize]);
|
||||
sink.put1(bits as u8);
|
||||
}
|
||||
|
||||
/// Emit three-byte opcode (0F 3[8A] XX) with mandatory prefix and an inferred REX prefix.
|
||||
fn put_dynrexmp3<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
|
||||
debug_assert_eq!(
|
||||
bits & 0x0800,
|
||||
0x0800,
|
||||
"Invalid encoding bits for DynRexMp3*"
|
||||
);
|
||||
let enc = EncodingBits::from(bits);
|
||||
sink.put1(PREFIX[(enc.pp() - 1) as usize]);
|
||||
if needs_rex(rex) {
|
||||
rex_prefix(bits, rex, sink);
|
||||
}
|
||||
sink.put1(0x0f);
|
||||
sink.put1(OP3_BYTE2[(enc.mm() - 2) as usize]);
|
||||
sink.put1(bits as u8);
|
||||
}
|
||||
|
||||
/// Defines the EVEX context for the `L'`, `L`, and `b` bits (bits 6:4 of EVEX P2 byte). Table 2-36 in
|
||||
/// section 2.6.10 (Intel Software Development Manual, volume 2A) describes how these bits can be
|
||||
/// used together for certain classes of instructions; i.e., special care should be taken to ensure
|
||||
/// that instructions use an applicable correct `EvexContext`. Table 2-39 contains cases where
|
||||
/// opcodes can result in an #UD.
|
||||
#[allow(dead_code)]
|
||||
enum EvexContext {
|
||||
RoundingRegToRegFP {
|
||||
rc: EvexRoundingControl,
|
||||
},
|
||||
NoRoundingFP {
|
||||
sae: bool,
|
||||
length: EvexVectorLength,
|
||||
},
|
||||
MemoryOp {
|
||||
broadcast: bool,
|
||||
length: EvexVectorLength,
|
||||
},
|
||||
Other {
|
||||
length: EvexVectorLength,
|
||||
},
|
||||
}
|
||||
|
||||
impl EvexContext {
|
||||
/// Encode the `L'`, `L`, and `b` bits (bits 6:4 of EVEX P2 byte) for merging with the P2 byte.
|
||||
fn bits(&self) -> u8 {
|
||||
match self {
|
||||
Self::RoundingRegToRegFP { rc } => 0b001 | rc.bits() << 1,
|
||||
Self::NoRoundingFP { sae, length } => (*sae as u8) | length.bits() << 1,
|
||||
Self::MemoryOp { broadcast, length } => (*broadcast as u8) | length.bits() << 1,
|
||||
Self::Other { length } => length.bits() << 1,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The EVEX format allows choosing a vector length in the `L'` and `L` bits; see `EvexContext`.
|
||||
#[allow(dead_code)]
|
||||
enum EvexVectorLength {
|
||||
V128,
|
||||
V256,
|
||||
V512,
|
||||
}
|
||||
|
||||
impl EvexVectorLength {
|
||||
/// Encode the `L'` and `L` bits for merging with the P2 byte.
|
||||
fn bits(&self) -> u8 {
|
||||
match self {
|
||||
Self::V128 => 0b00,
|
||||
Self::V256 => 0b01,
|
||||
Self::V512 => 0b10,
|
||||
// 0b11 is reserved (#UD).
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The EVEX format allows defining rounding control in the `L'` and `L` bits; see `EvexContext`.
|
||||
#[allow(dead_code)]
|
||||
enum EvexRoundingControl {
|
||||
RNE,
|
||||
RD,
|
||||
RU,
|
||||
RZ,
|
||||
}
|
||||
|
||||
impl EvexRoundingControl {
|
||||
/// Encode the `L'` and `L` bits for merging with the P2 byte.
|
||||
fn bits(&self) -> u8 {
|
||||
match self {
|
||||
Self::RNE => 0b00,
|
||||
Self::RD => 0b01,
|
||||
Self::RU => 0b10,
|
||||
Self::RZ => 0b11,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Defines the EVEX masking behavior; masking support is described in section 2.6.4 of the Intel
|
||||
/// Software Development Manual, volume 2A.
|
||||
#[allow(dead_code)]
|
||||
enum EvexMasking {
|
||||
None,
|
||||
Merging { k: u8 },
|
||||
Zeroing { k: u8 },
|
||||
}
|
||||
|
||||
impl EvexMasking {
|
||||
/// Encode the `z` bit for merging with the P2 byte.
|
||||
fn z_bit(&self) -> u8 {
|
||||
match self {
|
||||
Self::None | Self::Merging { .. } => 0,
|
||||
Self::Zeroing { .. } => 1,
|
||||
}
|
||||
}
|
||||
|
||||
/// Encode the `aaa` bits for merging with the P2 byte.
|
||||
fn aaa_bits(&self) -> u8 {
|
||||
match self {
|
||||
Self::None => 0b000,
|
||||
Self::Merging { k } | Self::Zeroing { k } => {
|
||||
debug_assert!(*k <= 7);
|
||||
*k
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Encode an EVEX prefix, including the instruction opcode. To match the current recipe
|
||||
/// convention, the ModR/M byte is written separately in the recipe. This EVEX encoding function
|
||||
/// only encodes the `reg` (operand 1), `vvvv` (operand 2), `rm` (operand 3) form; other forms are
|
||||
/// possible (see section 2.6.2, Intel Software Development Manual, volume 2A), requiring
|
||||
/// refactoring of this function or separate functions for each form (e.g. as for the REX prefix).
|
||||
fn put_evex<CS: CodeSink + ?Sized>(
|
||||
bits: u16,
|
||||
reg: RegUnit,
|
||||
vvvvv: RegUnit,
|
||||
rm: RegUnit,
|
||||
context: EvexContext,
|
||||
masking: EvexMasking,
|
||||
sink: &mut CS,
|
||||
) {
|
||||
let enc = EncodingBits::from(bits);
|
||||
|
||||
// EVEX prefix.
|
||||
sink.put1(0x62);
|
||||
|
||||
debug_assert!(enc.mm() < 0b100);
|
||||
let mut p0 = enc.mm() & 0b11;
|
||||
p0 |= evex2(rm, reg) << 4; // bits 3:2 are always unset
|
||||
sink.put1(p0);
|
||||
|
||||
let mut p1 = enc.pp() | 0b100; // bit 2 is always set
|
||||
p1 |= (!(vvvvv as u8) & 0b1111) << 3;
|
||||
p1 |= (enc.rex_w() & 0b1) << 7;
|
||||
sink.put1(p1);
|
||||
|
||||
let mut p2 = masking.aaa_bits();
|
||||
p2 |= (!(vvvvv as u8 >> 4) & 0b1) << 3;
|
||||
p2 |= context.bits() << 4;
|
||||
p2 |= masking.z_bit() << 7;
|
||||
sink.put1(p2);
|
||||
|
||||
// Opcode
|
||||
sink.put1(enc.opcode_byte());
|
||||
|
||||
// ModR/M byte placed in recipe
|
||||
}
|
||||
|
||||
/// Emit a ModR/M byte for reg-reg operands.
|
||||
fn modrm_rr<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS) {
|
||||
let reg = reg as u8 & 7;
|
||||
let rm = rm as u8 & 7;
|
||||
let mut b = 0b11000000;
|
||||
b |= reg << 3;
|
||||
b |= rm;
|
||||
sink.put1(b);
|
||||
}
|
||||
|
||||
/// Emit a ModR/M byte where the reg bits are part of the opcode.
|
||||
fn modrm_r_bits<CS: CodeSink + ?Sized>(rm: RegUnit, bits: u16, sink: &mut CS) {
|
||||
let reg = (bits >> 12) as u8 & 7;
|
||||
let rm = rm as u8 & 7;
|
||||
let mut b = 0b11000000;
|
||||
b |= reg << 3;
|
||||
b |= rm;
|
||||
sink.put1(b);
|
||||
}
|
||||
|
||||
/// Emit a mode 00 ModR/M byte. This is a register-indirect addressing mode with no offset.
|
||||
/// Registers %rsp and %rbp are invalid for `rm`, %rsp indicates a SIB byte, and %rbp indicates an
|
||||
/// absolute immediate 32-bit address.
|
||||
fn modrm_rm<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS) {
|
||||
let reg = reg as u8 & 7;
|
||||
let rm = rm as u8 & 7;
|
||||
let mut b = 0b00000000;
|
||||
b |= reg << 3;
|
||||
b |= rm;
|
||||
sink.put1(b);
|
||||
}
|
||||
|
||||
/// Emit a mode 00 Mod/RM byte, with a rip-relative displacement in 64-bit mode. Effective address
|
||||
/// is calculated by adding displacement to 64-bit rip of next instruction. See intel Sw dev manual
|
||||
/// section 2.2.1.6.
|
||||
fn modrm_riprel<CS: CodeSink + ?Sized>(reg: RegUnit, sink: &mut CS) {
|
||||
modrm_rm(0b101, reg, sink)
|
||||
}
|
||||
|
||||
/// Emit a mode 01 ModR/M byte. This is a register-indirect addressing mode with 8-bit
|
||||
/// displacement.
|
||||
/// Register %rsp is invalid for `rm`. It indicates the presence of a SIB byte.
|
||||
fn modrm_disp8<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS) {
|
||||
let reg = reg as u8 & 7;
|
||||
let rm = rm as u8 & 7;
|
||||
let mut b = 0b01000000;
|
||||
b |= reg << 3;
|
||||
b |= rm;
|
||||
sink.put1(b);
|
||||
}
|
||||
|
||||
/// Emit a mode 10 ModR/M byte. This is a register-indirect addressing mode with 32-bit
|
||||
/// displacement.
|
||||
/// Register %rsp is invalid for `rm`. It indicates the presence of a SIB byte.
|
||||
fn modrm_disp32<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS) {
|
||||
let reg = reg as u8 & 7;
|
||||
let rm = rm as u8 & 7;
|
||||
let mut b = 0b10000000;
|
||||
b |= reg << 3;
|
||||
b |= rm;
|
||||
sink.put1(b);
|
||||
}
|
||||
|
||||
/// Emit a mode 00 ModR/M with a 100 RM indicating a SIB byte is present.
|
||||
fn modrm_sib<CS: CodeSink + ?Sized>(reg: RegUnit, sink: &mut CS) {
|
||||
modrm_rm(0b100, reg, sink);
|
||||
}
|
||||
|
||||
/// Emit a mode 01 ModR/M with a 100 RM indicating a SIB byte and 8-bit
|
||||
/// displacement are present.
|
||||
fn modrm_sib_disp8<CS: CodeSink + ?Sized>(reg: RegUnit, sink: &mut CS) {
|
||||
modrm_disp8(0b100, reg, sink);
|
||||
}
|
||||
|
||||
/// Emit a mode 10 ModR/M with a 100 RM indicating a SIB byte and 32-bit
|
||||
/// displacement are present.
|
||||
fn modrm_sib_disp32<CS: CodeSink + ?Sized>(reg: RegUnit, sink: &mut CS) {
|
||||
modrm_disp32(0b100, reg, sink);
|
||||
}
|
||||
|
||||
/// Emit a SIB byte with a base register and no scale+index.
|
||||
fn sib_noindex<CS: CodeSink + ?Sized>(base: RegUnit, sink: &mut CS) {
|
||||
let base = base as u8 & 7;
|
||||
// SIB SS_III_BBB.
|
||||
let mut b = 0b00_100_000;
|
||||
b |= base;
|
||||
sink.put1(b);
|
||||
}
|
||||
|
||||
/// Emit a SIB byte with a scale, base, and index.
|
||||
fn sib<CS: CodeSink + ?Sized>(scale: u8, index: RegUnit, base: RegUnit, sink: &mut CS) {
|
||||
// SIB SS_III_BBB.
|
||||
debug_assert_eq!(scale & !0x03, 0, "Scale out of range");
|
||||
let scale = scale & 3;
|
||||
let index = index as u8 & 7;
|
||||
let base = base as u8 & 7;
|
||||
let b: u8 = (scale << 6) | (index << 3) | base;
|
||||
sink.put1(b);
|
||||
}
|
||||
|
||||
/// Get the low 4 bits of an opcode for an integer condition code.
|
||||
///
|
||||
/// Add this offset to a base opcode for:
|
||||
///
|
||||
/// ---- 0x70: Short conditional branch.
|
||||
/// 0x0f 0x80: Long conditional branch.
|
||||
/// 0x0f 0x90: SetCC.
|
||||
///
|
||||
fn icc2opc(cond: IntCC) -> u16 {
|
||||
use crate::ir::condcodes::IntCC::*;
|
||||
match cond {
|
||||
Overflow => 0x0,
|
||||
NotOverflow => 0x1,
|
||||
UnsignedLessThan => 0x2,
|
||||
UnsignedGreaterThanOrEqual => 0x3,
|
||||
Equal => 0x4,
|
||||
NotEqual => 0x5,
|
||||
UnsignedLessThanOrEqual => 0x6,
|
||||
UnsignedGreaterThan => 0x7,
|
||||
// 0x8 = Sign.
|
||||
// 0x9 = !Sign.
|
||||
// 0xa = Parity even.
|
||||
// 0xb = Parity odd.
|
||||
SignedLessThan => 0xc,
|
||||
SignedGreaterThanOrEqual => 0xd,
|
||||
SignedLessThanOrEqual => 0xe,
|
||||
SignedGreaterThan => 0xf,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the low 4 bits of an opcode for a floating point condition code.
|
||||
///
|
||||
/// The ucomiss/ucomisd instructions set the FLAGS bits CF/PF/CF like this:
|
||||
///
|
||||
/// ZPC OSA
|
||||
/// UN 111 000
|
||||
/// GT 000 000
|
||||
/// LT 001 000
|
||||
/// EQ 100 000
|
||||
///
|
||||
/// Not all floating point condition codes are supported.
|
||||
fn fcc2opc(cond: FloatCC) -> u16 {
|
||||
use crate::ir::condcodes::FloatCC::*;
|
||||
match cond {
|
||||
Ordered => 0xb, // EQ|LT|GT => *np (P=0)
|
||||
Unordered => 0xa, // UN => *p (P=1)
|
||||
OrderedNotEqual => 0x5, // LT|GT => *ne (Z=0),
|
||||
UnorderedOrEqual => 0x4, // UN|EQ => *e (Z=1)
|
||||
GreaterThan => 0x7, // GT => *a (C=0&Z=0)
|
||||
GreaterThanOrEqual => 0x3, // GT|EQ => *ae (C=0)
|
||||
UnorderedOrLessThan => 0x2, // UN|LT => *b (C=1)
|
||||
UnorderedOrLessThanOrEqual => 0x6, // UN|LT|EQ => *be (Z=1|C=1)
|
||||
Equal | // EQ
|
||||
NotEqual | // UN|LT|GT
|
||||
LessThan | // LT
|
||||
LessThanOrEqual | // LT|EQ
|
||||
UnorderedOrGreaterThan | // UN|GT
|
||||
UnorderedOrGreaterThanOrEqual // UN|GT|EQ
|
||||
=> panic!("{} not supported", cond),
|
||||
}
|
||||
}
|
||||
|
||||
/// Emit a single-byte branch displacement to `destination`.
|
||||
fn disp1<CS: CodeSink + ?Sized>(destination: Block, func: &Function, sink: &mut CS) {
|
||||
let delta = func.offsets[destination].wrapping_sub(sink.offset() + 1);
|
||||
sink.put1(delta as u8);
|
||||
}
|
||||
|
||||
/// Emit a four-byte branch displacement to `destination`.
|
||||
fn disp4<CS: CodeSink + ?Sized>(destination: Block, func: &Function, sink: &mut CS) {
|
||||
let delta = func.offsets[destination].wrapping_sub(sink.offset() + 4);
|
||||
sink.put4(delta);
|
||||
}
|
||||
|
||||
/// Emit a four-byte displacement to jump table `jt`.
|
||||
fn jt_disp4<CS: CodeSink + ?Sized>(jt: JumpTable, func: &Function, sink: &mut CS) {
|
||||
let delta = func.jt_offsets[jt].wrapping_sub(sink.offset() + 4);
|
||||
sink.put4(delta);
|
||||
sink.reloc_jt(Reloc::X86PCRelRodata4, jt);
|
||||
}
|
||||
|
||||
/// Emit a four-byte displacement to `constant`.
|
||||
fn const_disp4<CS: CodeSink + ?Sized>(constant: Constant, func: &Function, sink: &mut CS) {
|
||||
let offset = func.dfg.constants.get_offset(constant);
|
||||
let delta = offset.wrapping_sub(sink.offset() + 4);
|
||||
sink.put4(delta);
|
||||
sink.reloc_constant(Reloc::X86PCRelRodata4, offset);
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,199 +0,0 @@
|
||||
//! x86 Instruction Set Architectures.
|
||||
|
||||
mod abi;
|
||||
mod binemit;
|
||||
mod enc_tables;
|
||||
mod registers;
|
||||
pub mod settings;
|
||||
#[cfg(feature = "unwind")]
|
||||
pub mod unwind;
|
||||
|
||||
use super::super::settings as shared_settings;
|
||||
#[cfg(feature = "testing_hooks")]
|
||||
use crate::binemit::CodeSink;
|
||||
use crate::binemit::{emit_function, MemoryCodeSink};
|
||||
use crate::ir;
|
||||
use crate::isa::enc_tables::{self as shared_enc_tables, lookup_enclist, Encodings};
|
||||
use crate::isa::Builder as IsaBuilder;
|
||||
#[cfg(feature = "unwind")]
|
||||
use crate::isa::{unwind::systemv::RegisterMappingError, RegUnit};
|
||||
use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa};
|
||||
use crate::regalloc;
|
||||
use crate::result::CodegenResult;
|
||||
use crate::timing;
|
||||
use alloc::{borrow::Cow, boxed::Box, vec::Vec};
|
||||
use core::any::Any;
|
||||
use core::fmt;
|
||||
use core::hash::{Hash, Hasher};
|
||||
use target_lexicon::{PointerWidth, Triple};
|
||||
|
||||
#[allow(dead_code)]
|
||||
struct Isa {
|
||||
triple: Triple,
|
||||
shared_flags: shared_settings::Flags,
|
||||
isa_flags: settings::Flags,
|
||||
cpumode: &'static [shared_enc_tables::Level1Entry<u16>],
|
||||
}
|
||||
|
||||
/// Get an ISA builder for creating x86 targets.
|
||||
pub fn isa_builder(triple: Triple) -> IsaBuilder {
|
||||
IsaBuilder {
|
||||
triple,
|
||||
setup: settings::builder(),
|
||||
constructor: isa_constructor,
|
||||
}
|
||||
}
|
||||
|
||||
fn isa_constructor(
|
||||
triple: Triple,
|
||||
shared_flags: shared_settings::Flags,
|
||||
builder: shared_settings::Builder,
|
||||
) -> Box<dyn TargetIsa> {
|
||||
let level1 = match triple.pointer_width().unwrap() {
|
||||
PointerWidth::U16 => unimplemented!("x86-16"),
|
||||
PointerWidth::U32 => &enc_tables::LEVEL1_I32[..],
|
||||
PointerWidth::U64 => &enc_tables::LEVEL1_I64[..],
|
||||
};
|
||||
|
||||
let isa_flags = settings::Flags::new(&shared_flags, builder);
|
||||
|
||||
Box::new(Isa {
|
||||
triple,
|
||||
isa_flags,
|
||||
shared_flags,
|
||||
cpumode: level1,
|
||||
})
|
||||
}
|
||||
|
||||
impl TargetIsa for Isa {
|
||||
fn name(&self) -> &'static str {
|
||||
"x86"
|
||||
}
|
||||
|
||||
fn triple(&self) -> &Triple {
|
||||
&self.triple
|
||||
}
|
||||
|
||||
fn flags(&self) -> &shared_settings::Flags {
|
||||
&self.shared_flags
|
||||
}
|
||||
|
||||
fn isa_flags(&self) -> Vec<shared_settings::Value> {
|
||||
self.isa_flags.iter().collect()
|
||||
}
|
||||
|
||||
fn hash_all_flags(&self, mut hasher: &mut dyn Hasher) {
|
||||
self.shared_flags.hash(&mut hasher);
|
||||
self.isa_flags.hash(&mut hasher);
|
||||
}
|
||||
|
||||
fn uses_cpu_flags(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn uses_complex_addresses(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn register_info(&self) -> RegInfo {
|
||||
registers::INFO.clone()
|
||||
}
|
||||
|
||||
#[cfg(feature = "unwind")]
|
||||
fn map_dwarf_register(&self, reg: RegUnit) -> Result<u16, RegisterMappingError> {
|
||||
unwind::systemv::map_reg(self, reg).map(|r| r.0)
|
||||
}
|
||||
|
||||
fn encoding_info(&self) -> EncInfo {
|
||||
enc_tables::INFO.clone()
|
||||
}
|
||||
|
||||
fn legal_encodings<'a>(
|
||||
&'a self,
|
||||
func: &'a ir::Function,
|
||||
inst: &'a ir::InstructionData,
|
||||
ctrl_typevar: ir::Type,
|
||||
) -> Encodings<'a> {
|
||||
lookup_enclist(
|
||||
ctrl_typevar,
|
||||
inst,
|
||||
func,
|
||||
self.cpumode,
|
||||
&enc_tables::LEVEL2[..],
|
||||
&enc_tables::ENCLISTS[..],
|
||||
&enc_tables::LEGALIZE_ACTIONS[..],
|
||||
&enc_tables::RECIPE_PREDICATES[..],
|
||||
&enc_tables::INST_PREDICATES[..],
|
||||
self.isa_flags.predicate_view(),
|
||||
)
|
||||
}
|
||||
|
||||
fn legalize_signature(&self, sig: &mut Cow<ir::Signature>, current: bool) {
|
||||
abi::legalize_signature(
|
||||
sig,
|
||||
&self.triple,
|
||||
current,
|
||||
&self.shared_flags,
|
||||
&self.isa_flags,
|
||||
)
|
||||
}
|
||||
|
||||
fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass {
|
||||
abi::regclass_for_abi_type(ty)
|
||||
}
|
||||
|
||||
fn allocatable_registers(&self, _func: &ir::Function) -> regalloc::RegisterSet {
|
||||
abi::allocatable_registers(&self.triple, &self.shared_flags)
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing_hooks")]
|
||||
fn emit_inst(
|
||||
&self,
|
||||
func: &ir::Function,
|
||||
inst: ir::Inst,
|
||||
divert: &mut regalloc::RegDiversions,
|
||||
sink: &mut dyn CodeSink,
|
||||
) {
|
||||
binemit::emit_inst(func, inst, divert, sink, self)
|
||||
}
|
||||
|
||||
fn emit_function_to_memory(&self, func: &ir::Function, sink: &mut MemoryCodeSink) {
|
||||
emit_function(func, binemit::emit_inst, sink, self)
|
||||
}
|
||||
|
||||
fn prologue_epilogue(&self, func: &mut ir::Function) -> CodegenResult<()> {
|
||||
let _tt = timing::prologue_epilogue();
|
||||
abi::prologue_epilogue(func, self)
|
||||
}
|
||||
|
||||
fn unsigned_add_overflow_condition(&self) -> ir::condcodes::IntCC {
|
||||
ir::condcodes::IntCC::UnsignedLessThan
|
||||
}
|
||||
|
||||
fn unsigned_sub_overflow_condition(&self) -> ir::condcodes::IntCC {
|
||||
ir::condcodes::IntCC::UnsignedLessThan
|
||||
}
|
||||
|
||||
#[cfg(feature = "unwind")]
|
||||
fn create_unwind_info(
|
||||
&self,
|
||||
func: &ir::Function,
|
||||
) -> CodegenResult<Option<super::super::unwind::UnwindInfo>> {
|
||||
abi::create_unwind_info(func, self)
|
||||
}
|
||||
|
||||
#[cfg(feature = "unwind")]
|
||||
fn create_systemv_cie(&self) -> Option<gimli::write::CommonInformationEntry> {
|
||||
Some(unwind::systemv::create_cie())
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self as &dyn Any
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Isa {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{}\n{}", self.shared_flags, self.isa_flags)
|
||||
}
|
||||
}
|
||||
@@ -1,86 +0,0 @@
|
||||
//! x86 register descriptions.
|
||||
|
||||
use crate::isa::registers::{RegBank, RegClass, RegClassData, RegInfo, RegUnit};
|
||||
|
||||
include!(concat!(env!("OUT_DIR"), "/registers-x86.rs"));
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::isa::RegUnit;
|
||||
use alloc::string::{String, ToString};
|
||||
|
||||
#[test]
|
||||
fn unit_encodings() {
|
||||
fn gpr(unit: usize) -> Option<u16> {
|
||||
Some(GPR.unit(unit))
|
||||
}
|
||||
// The encoding of integer registers is not alphabetical.
|
||||
assert_eq!(INFO.parse_regunit("rax"), gpr(0));
|
||||
assert_eq!(INFO.parse_regunit("rbx"), gpr(3));
|
||||
assert_eq!(INFO.parse_regunit("rcx"), gpr(1));
|
||||
assert_eq!(INFO.parse_regunit("rdx"), gpr(2));
|
||||
assert_eq!(INFO.parse_regunit("rsi"), gpr(6));
|
||||
assert_eq!(INFO.parse_regunit("rdi"), gpr(7));
|
||||
assert_eq!(INFO.parse_regunit("rbp"), gpr(5));
|
||||
assert_eq!(INFO.parse_regunit("rsp"), gpr(4));
|
||||
assert_eq!(INFO.parse_regunit("r8"), gpr(8));
|
||||
assert_eq!(INFO.parse_regunit("r15"), gpr(15));
|
||||
|
||||
fn fpr(unit: usize) -> Option<u16> {
|
||||
Some(FPR.unit(unit))
|
||||
}
|
||||
assert_eq!(INFO.parse_regunit("xmm0"), fpr(0));
|
||||
assert_eq!(INFO.parse_regunit("xmm15"), fpr(15));
|
||||
|
||||
// FIXME(#1306) Add these tests back in when FPR32 is re-added.
|
||||
// fn fpr32(unit: usize) -> Option<u16> {
|
||||
// Some(FPR32.unit(unit))
|
||||
// }
|
||||
// assert_eq!(INFO.parse_regunit("xmm0"), fpr32(0));
|
||||
// assert_eq!(INFO.parse_regunit("xmm31"), fpr32(31));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unit_names() {
|
||||
fn gpr(ru: RegUnit) -> String {
|
||||
INFO.display_regunit(GPR.first + ru).to_string()
|
||||
}
|
||||
assert_eq!(gpr(0), "%rax");
|
||||
assert_eq!(gpr(3), "%rbx");
|
||||
assert_eq!(gpr(1), "%rcx");
|
||||
assert_eq!(gpr(2), "%rdx");
|
||||
assert_eq!(gpr(6), "%rsi");
|
||||
assert_eq!(gpr(7), "%rdi");
|
||||
assert_eq!(gpr(5), "%rbp");
|
||||
assert_eq!(gpr(4), "%rsp");
|
||||
assert_eq!(gpr(8), "%r8");
|
||||
assert_eq!(gpr(15), "%r15");
|
||||
|
||||
fn fpr(ru: RegUnit) -> String {
|
||||
INFO.display_regunit(FPR.first + ru).to_string()
|
||||
}
|
||||
assert_eq!(fpr(0), "%xmm0");
|
||||
assert_eq!(fpr(15), "%xmm15");
|
||||
|
||||
// FIXME(#1306) Add these tests back in when FPR32 is re-added.
|
||||
// fn fpr32(ru: RegUnit) -> String {
|
||||
// INFO.display_regunit(FPR32.first + ru).to_string()
|
||||
// }
|
||||
// assert_eq!(fpr32(0), "%xmm0");
|
||||
// assert_eq!(fpr32(31), "%xmm31");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn regclasses() {
|
||||
assert_eq!(GPR.intersect_index(GPR), Some(GPR.into()));
|
||||
assert_eq!(GPR.intersect_index(ABCD), Some(ABCD.into()));
|
||||
assert_eq!(GPR.intersect_index(FPR), None);
|
||||
assert_eq!(ABCD.intersect_index(GPR), Some(ABCD.into()));
|
||||
assert_eq!(ABCD.intersect_index(ABCD), Some(ABCD.into()));
|
||||
assert_eq!(ABCD.intersect_index(FPR), None);
|
||||
assert_eq!(FPR.intersect_index(FPR), Some(FPR.into()));
|
||||
assert_eq!(FPR.intersect_index(GPR), None);
|
||||
assert_eq!(FPR.intersect_index(ABCD), None);
|
||||
}
|
||||
}
|
||||
@@ -1,52 +0,0 @@
|
||||
//! x86 Settings.
|
||||
|
||||
use crate::settings::{self, detail, Builder, Value};
|
||||
use core::fmt;
|
||||
|
||||
// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs:`. This file contains a
|
||||
// public `Flags` struct with an impl for all of the settings defined in
|
||||
// `cranelift-codegen/meta/src/isa/x86/settings.rs`.
|
||||
include!(concat!(env!("OUT_DIR"), "/settings-x86.rs"));
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{builder, Flags};
|
||||
use crate::settings::{self, Configurable};
|
||||
|
||||
#[test]
|
||||
fn presets() {
|
||||
let shared = settings::Flags::new(settings::builder());
|
||||
|
||||
// Nehalem has SSE4.1 but not BMI1.
|
||||
let mut b0 = builder();
|
||||
b0.enable("nehalem").unwrap();
|
||||
let f0 = Flags::new(&shared, b0);
|
||||
assert_eq!(f0.has_sse41(), true);
|
||||
assert_eq!(f0.has_bmi1(), false);
|
||||
|
||||
let mut b1 = builder();
|
||||
b1.enable("haswell").unwrap();
|
||||
let f1 = Flags::new(&shared, b1);
|
||||
assert_eq!(f1.has_sse41(), true);
|
||||
assert_eq!(f1.has_bmi1(), true);
|
||||
}
|
||||
#[test]
|
||||
fn display_presets() {
|
||||
// Spot check that the flags Display impl does not cause a panic
|
||||
let shared = settings::Flags::new(settings::builder());
|
||||
|
||||
let b0 = builder();
|
||||
let f0 = Flags::new(&shared, b0);
|
||||
let _ = format!("{}", f0);
|
||||
|
||||
let mut b1 = builder();
|
||||
b1.enable("nehalem").unwrap();
|
||||
let f1 = Flags::new(&shared, b1);
|
||||
let _ = format!("{}", f1);
|
||||
|
||||
let mut b2 = builder();
|
||||
b2.enable("haswell").unwrap();
|
||||
let f2 = Flags::new(&shared, b2);
|
||||
let _ = format!("{}", f2);
|
||||
}
|
||||
}
|
||||
@@ -1,531 +0,0 @@
|
||||
//! Module for x86 unwind generation for supported ABIs.
|
||||
|
||||
pub mod systemv;
|
||||
pub mod winx64;
|
||||
|
||||
use crate::ir::{Function, InstructionData, Opcode, ValueLoc};
|
||||
use crate::isa::x86::registers::{FPR, RU};
|
||||
use crate::isa::{RegUnit, TargetIsa};
|
||||
use crate::result::CodegenResult;
|
||||
use alloc::vec::Vec;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::isa::unwind::input::{UnwindCode, UnwindInfo};
|
||||
|
||||
pub(crate) fn create_unwind_info(
|
||||
func: &Function,
|
||||
isa: &dyn TargetIsa,
|
||||
) -> CodegenResult<Option<UnwindInfo<RegUnit>>> {
|
||||
// Find last block based on max offset.
|
||||
let last_block = func
|
||||
.layout
|
||||
.blocks()
|
||||
.max_by_key(|b| func.offsets[*b])
|
||||
.expect("at least a block");
|
||||
// Find last instruction offset + size, and make it function size.
|
||||
let function_size = func
|
||||
.inst_offsets(last_block, &isa.encoding_info())
|
||||
.fold(0, |_, (offset, _, size)| offset + size);
|
||||
|
||||
let entry_block = func.layout.entry_block().expect("missing entry block");
|
||||
let prologue_end = func.prologue_end.unwrap();
|
||||
let epilogues_start = func
|
||||
.epilogues_start
|
||||
.iter()
|
||||
.map(|(i, b)| (*b, *i))
|
||||
.collect::<HashMap<_, _>>();
|
||||
|
||||
let word_size = isa.pointer_bytes();
|
||||
|
||||
let mut stack_size = None;
|
||||
let mut prologue_size = 0;
|
||||
let mut prologue_unwind_codes = Vec::new();
|
||||
let mut epilogues_unwind_codes = Vec::new();
|
||||
let mut frame_register: Option<RegUnit> = None;
|
||||
|
||||
// Process only entry block and blocks with epilogues.
|
||||
let mut blocks = func
|
||||
.epilogues_start
|
||||
.iter()
|
||||
.map(|(_, b)| *b)
|
||||
.collect::<Vec<_>>();
|
||||
if !blocks.contains(&entry_block) {
|
||||
blocks.push(entry_block);
|
||||
}
|
||||
blocks.sort_by_key(|b| func.offsets[*b]);
|
||||
|
||||
for block in blocks.iter() {
|
||||
let mut in_prologue = block == &entry_block;
|
||||
let mut in_epilogue = false;
|
||||
let mut epilogue_pop_offsets = Vec::new();
|
||||
|
||||
let epilogue_start = epilogues_start.get(block);
|
||||
let is_last_block = block == &last_block;
|
||||
|
||||
for (offset, inst, size) in func.inst_offsets(*block, &isa.encoding_info()) {
|
||||
let offset = offset + size;
|
||||
|
||||
let unwind_codes;
|
||||
if in_prologue {
|
||||
// Check for prologue end (inclusive)
|
||||
if prologue_end == inst {
|
||||
in_prologue = false;
|
||||
}
|
||||
prologue_size += size;
|
||||
unwind_codes = &mut prologue_unwind_codes;
|
||||
} else if !in_epilogue && epilogue_start == Some(&inst) {
|
||||
// Now in an epilogue, emit a remember state instruction if not last block
|
||||
in_epilogue = true;
|
||||
|
||||
epilogues_unwind_codes.push(Vec::new());
|
||||
unwind_codes = epilogues_unwind_codes.last_mut().unwrap();
|
||||
|
||||
if !is_last_block {
|
||||
unwind_codes.push((offset, UnwindCode::RememberState));
|
||||
}
|
||||
} else if in_epilogue {
|
||||
unwind_codes = epilogues_unwind_codes.last_mut().unwrap();
|
||||
} else {
|
||||
// Ignore normal instructions
|
||||
continue;
|
||||
}
|
||||
|
||||
match func.dfg[inst] {
|
||||
InstructionData::Unary { opcode, arg } => {
|
||||
match opcode {
|
||||
Opcode::X86Push => {
|
||||
let reg = func.locations[arg].unwrap_reg();
|
||||
unwind_codes.push((
|
||||
offset,
|
||||
UnwindCode::StackAlloc {
|
||||
size: word_size.into(),
|
||||
},
|
||||
));
|
||||
unwind_codes.push((
|
||||
offset,
|
||||
UnwindCode::SaveRegister {
|
||||
reg,
|
||||
stack_offset: 0,
|
||||
},
|
||||
));
|
||||
}
|
||||
Opcode::AdjustSpDown => {
|
||||
let stack_size =
|
||||
stack_size.expect("expected a previous stack size instruction");
|
||||
|
||||
// This is used when calling a stack check function
|
||||
// We need to track the assignment to RAX which has the size of the stack
|
||||
unwind_codes
|
||||
.push((offset, UnwindCode::StackAlloc { size: stack_size }));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
InstructionData::UnaryImm { opcode, imm } => {
|
||||
match opcode {
|
||||
Opcode::Iconst => {
|
||||
let imm: i64 = imm.into();
|
||||
assert!(imm <= core::u32::MAX as i64);
|
||||
assert!(stack_size.is_none());
|
||||
|
||||
// This instruction should only appear in a prologue to pass an
|
||||
// argument of the stack size to a stack check function.
|
||||
// Record the stack size so we know what it is when we encounter the adjustment
|
||||
// instruction (which will adjust via the register assigned to this instruction).
|
||||
stack_size = Some(imm as u32);
|
||||
}
|
||||
Opcode::AdjustSpDownImm => {
|
||||
let imm: i64 = imm.into();
|
||||
assert!(imm <= core::u32::MAX as i64);
|
||||
|
||||
stack_size = Some(imm as u32);
|
||||
|
||||
unwind_codes
|
||||
.push((offset, UnwindCode::StackAlloc { size: imm as u32 }));
|
||||
}
|
||||
Opcode::AdjustSpUpImm => {
|
||||
let imm: i64 = imm.into();
|
||||
assert!(imm <= core::u32::MAX as i64);
|
||||
|
||||
stack_size = Some(imm as u32);
|
||||
|
||||
unwind_codes
|
||||
.push((offset, UnwindCode::StackDealloc { size: imm as u32 }));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
InstructionData::Store {
|
||||
opcode: Opcode::Store,
|
||||
args: [arg1, arg2],
|
||||
offset: stack_offset,
|
||||
..
|
||||
} => {
|
||||
if let (ValueLoc::Reg(src), ValueLoc::Reg(dst)) =
|
||||
(func.locations[arg1], func.locations[arg2])
|
||||
{
|
||||
// If this is a save of an FPR, record an unwind operation
|
||||
// Note: the stack_offset here is relative to an adjusted SP
|
||||
if dst == (RU::rsp as RegUnit) && FPR.contains(src) {
|
||||
let stack_offset: i32 = stack_offset.into();
|
||||
unwind_codes.push((
|
||||
offset,
|
||||
UnwindCode::SaveRegister {
|
||||
reg: src,
|
||||
stack_offset: stack_offset as u32,
|
||||
},
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
InstructionData::CopySpecial { src, dst, .. } if frame_register.is_none() => {
|
||||
// Check for change in CFA register (RSP is always the starting CFA)
|
||||
if src == (RU::rsp as RegUnit) {
|
||||
unwind_codes.push((offset, UnwindCode::SetFramePointer { reg: dst }));
|
||||
frame_register = Some(dst);
|
||||
}
|
||||
}
|
||||
InstructionData::NullAry { opcode } => match opcode {
|
||||
Opcode::X86Pop => {
|
||||
epilogue_pop_offsets.push(offset);
|
||||
}
|
||||
_ => {}
|
||||
},
|
||||
InstructionData::MultiAry { opcode, .. } if in_epilogue => match opcode {
|
||||
Opcode::Return => {
|
||||
let args = func.dfg.inst_args(inst);
|
||||
for (i, arg) in args.iter().rev().enumerate() {
|
||||
// Only walk back the args for the pop instructions encountered
|
||||
if i >= epilogue_pop_offsets.len() {
|
||||
break;
|
||||
}
|
||||
|
||||
let offset = epilogue_pop_offsets[i];
|
||||
|
||||
let reg = func.locations[*arg].unwrap_reg();
|
||||
unwind_codes.push((offset, UnwindCode::RestoreRegister { reg }));
|
||||
unwind_codes.push((
|
||||
offset,
|
||||
UnwindCode::StackDealloc {
|
||||
size: word_size.into(),
|
||||
},
|
||||
));
|
||||
|
||||
if Some(reg) == frame_register {
|
||||
unwind_codes.push((offset, UnwindCode::RestoreFramePointer));
|
||||
// Keep frame_register assigned for next epilogue.
|
||||
}
|
||||
}
|
||||
epilogue_pop_offsets.clear();
|
||||
|
||||
// TODO ensure unwind codes sorted by offsets ?
|
||||
|
||||
if !is_last_block {
|
||||
unwind_codes.push((offset, UnwindCode::RestoreState));
|
||||
}
|
||||
|
||||
in_epilogue = false;
|
||||
}
|
||||
_ => {}
|
||||
},
|
||||
_ => {}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Some(UnwindInfo {
|
||||
prologue_size,
|
||||
prologue_unwind_codes,
|
||||
epilogues_unwind_codes,
|
||||
function_size,
|
||||
word_size,
|
||||
initial_sp_offset: word_size,
|
||||
}))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::cursor::{Cursor, FuncCursor};
|
||||
use crate::ir::{
|
||||
types, AbiParam, ExternalName, InstBuilder, Signature, StackSlotData, StackSlotKind,
|
||||
};
|
||||
use crate::isa::{lookup_variant, BackendVariant, CallConv};
|
||||
use crate::settings::{builder, Flags};
|
||||
use crate::Context;
|
||||
use std::str::FromStr;
|
||||
use target_lexicon::triple;
|
||||
|
||||
#[test]
|
||||
fn test_small_alloc() {
|
||||
let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy)
|
||||
.expect("expect x86 ISA")
|
||||
.finish(Flags::new(builder()));
|
||||
|
||||
let mut context = Context::for_function(create_function(
|
||||
CallConv::WindowsFastcall,
|
||||
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
|
||||
));
|
||||
|
||||
context.compile(&*isa).expect("expected compilation");
|
||||
|
||||
let unwind = create_unwind_info(&context.func, &*isa)
|
||||
.expect("can create unwind info")
|
||||
.expect("expected unwind info");
|
||||
|
||||
assert_eq!(
|
||||
unwind,
|
||||
UnwindInfo {
|
||||
prologue_size: 9,
|
||||
prologue_unwind_codes: vec![
|
||||
(2, UnwindCode::StackAlloc { size: 8 }),
|
||||
(
|
||||
2,
|
||||
UnwindCode::SaveRegister {
|
||||
reg: RU::rbp.into(),
|
||||
stack_offset: 0,
|
||||
}
|
||||
),
|
||||
(
|
||||
5,
|
||||
UnwindCode::SetFramePointer {
|
||||
reg: RU::rbp.into(),
|
||||
}
|
||||
),
|
||||
(9, UnwindCode::StackAlloc { size: 64 })
|
||||
],
|
||||
epilogues_unwind_codes: vec![vec![
|
||||
(13, UnwindCode::StackDealloc { size: 64 }),
|
||||
(
|
||||
15,
|
||||
UnwindCode::RestoreRegister {
|
||||
reg: RU::rbp.into()
|
||||
}
|
||||
),
|
||||
(15, UnwindCode::StackDealloc { size: 8 }),
|
||||
(15, UnwindCode::RestoreFramePointer)
|
||||
]],
|
||||
function_size: 16,
|
||||
word_size: 8,
|
||||
initial_sp_offset: 8,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_medium_alloc() {
|
||||
let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy)
|
||||
.expect("expect x86 ISA")
|
||||
.finish(Flags::new(builder()));
|
||||
|
||||
let mut context = Context::for_function(create_function(
|
||||
CallConv::WindowsFastcall,
|
||||
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 10000)),
|
||||
));
|
||||
|
||||
context.compile(&*isa).expect("expected compilation");
|
||||
|
||||
let unwind = create_unwind_info(&context.func, &*isa)
|
||||
.expect("can create unwind info")
|
||||
.expect("expected unwind info");
|
||||
|
||||
assert_eq!(
|
||||
unwind,
|
||||
UnwindInfo {
|
||||
prologue_size: 27,
|
||||
prologue_unwind_codes: vec![
|
||||
(2, UnwindCode::StackAlloc { size: 8 }),
|
||||
(
|
||||
2,
|
||||
UnwindCode::SaveRegister {
|
||||
reg: RU::rbp.into(),
|
||||
stack_offset: 0,
|
||||
}
|
||||
),
|
||||
(
|
||||
5,
|
||||
UnwindCode::SetFramePointer {
|
||||
reg: RU::rbp.into(),
|
||||
}
|
||||
),
|
||||
(27, UnwindCode::StackAlloc { size: 10000 })
|
||||
],
|
||||
epilogues_unwind_codes: vec![vec![
|
||||
(34, UnwindCode::StackDealloc { size: 10000 }),
|
||||
(
|
||||
36,
|
||||
UnwindCode::RestoreRegister {
|
||||
reg: RU::rbp.into()
|
||||
}
|
||||
),
|
||||
(36, UnwindCode::StackDealloc { size: 8 }),
|
||||
(36, UnwindCode::RestoreFramePointer)
|
||||
]],
|
||||
function_size: 37,
|
||||
word_size: 8,
|
||||
initial_sp_offset: 8,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_large_alloc() {
|
||||
let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy)
|
||||
.expect("expect x86 ISA")
|
||||
.finish(Flags::new(builder()));
|
||||
|
||||
let mut context = Context::for_function(create_function(
|
||||
CallConv::WindowsFastcall,
|
||||
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 1000000)),
|
||||
));
|
||||
|
||||
context.compile(&*isa).expect("expected compilation");
|
||||
|
||||
let unwind = create_unwind_info(&context.func, &*isa)
|
||||
.expect("can create unwind info")
|
||||
.expect("expected unwind info");
|
||||
|
||||
assert_eq!(
|
||||
unwind,
|
||||
UnwindInfo {
|
||||
prologue_size: 27,
|
||||
prologue_unwind_codes: vec![
|
||||
(2, UnwindCode::StackAlloc { size: 8 }),
|
||||
(
|
||||
2,
|
||||
UnwindCode::SaveRegister {
|
||||
reg: RU::rbp.into(),
|
||||
stack_offset: 0,
|
||||
}
|
||||
),
|
||||
(
|
||||
5,
|
||||
UnwindCode::SetFramePointer {
|
||||
reg: RU::rbp.into(),
|
||||
}
|
||||
),
|
||||
(27, UnwindCode::StackAlloc { size: 1000000 })
|
||||
],
|
||||
epilogues_unwind_codes: vec![vec![
|
||||
(34, UnwindCode::StackDealloc { size: 1000000 }),
|
||||
(
|
||||
36,
|
||||
UnwindCode::RestoreRegister {
|
||||
reg: RU::rbp.into()
|
||||
}
|
||||
),
|
||||
(36, UnwindCode::StackDealloc { size: 8 }),
|
||||
(36, UnwindCode::RestoreFramePointer)
|
||||
]],
|
||||
function_size: 37,
|
||||
word_size: 8,
|
||||
initial_sp_offset: 8,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
fn create_function(call_conv: CallConv, stack_slot: Option<StackSlotData>) -> Function {
|
||||
let mut func =
|
||||
Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv));
|
||||
|
||||
let block0 = func.dfg.make_block();
|
||||
let mut pos = FuncCursor::new(&mut func);
|
||||
pos.insert_block(block0);
|
||||
pos.ins().return_(&[]);
|
||||
|
||||
if let Some(stack_slot) = stack_slot {
|
||||
func.stack_slots.push(stack_slot);
|
||||
}
|
||||
|
||||
func
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_multi_return_func() {
|
||||
let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy)
|
||||
.expect("expect x86 ISA")
|
||||
.finish(Flags::new(builder()));
|
||||
|
||||
let mut context = Context::for_function(create_multi_return_function(CallConv::SystemV));
|
||||
|
||||
context.compile(&*isa).expect("expected compilation");
|
||||
|
||||
let unwind = create_unwind_info(&context.func, &*isa)
|
||||
.expect("can create unwind info")
|
||||
.expect("expected unwind info");
|
||||
|
||||
assert_eq!(
|
||||
unwind,
|
||||
UnwindInfo {
|
||||
prologue_size: 5,
|
||||
prologue_unwind_codes: vec![
|
||||
(2, UnwindCode::StackAlloc { size: 8 }),
|
||||
(
|
||||
2,
|
||||
UnwindCode::SaveRegister {
|
||||
reg: RU::rbp.into(),
|
||||
stack_offset: 0,
|
||||
}
|
||||
),
|
||||
(
|
||||
5,
|
||||
UnwindCode::SetFramePointer {
|
||||
reg: RU::rbp.into()
|
||||
}
|
||||
)
|
||||
],
|
||||
epilogues_unwind_codes: vec![
|
||||
vec![
|
||||
(12, UnwindCode::RememberState),
|
||||
(
|
||||
12,
|
||||
UnwindCode::RestoreRegister {
|
||||
reg: RU::rbp.into()
|
||||
}
|
||||
),
|
||||
(12, UnwindCode::StackDealloc { size: 8 }),
|
||||
(12, UnwindCode::RestoreFramePointer),
|
||||
(13, UnwindCode::RestoreState)
|
||||
],
|
||||
vec![
|
||||
(
|
||||
15,
|
||||
UnwindCode::RestoreRegister {
|
||||
reg: RU::rbp.into()
|
||||
}
|
||||
),
|
||||
(15, UnwindCode::StackDealloc { size: 8 }),
|
||||
(15, UnwindCode::RestoreFramePointer)
|
||||
]
|
||||
],
|
||||
function_size: 16,
|
||||
word_size: 8,
|
||||
initial_sp_offset: 8,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
fn create_multi_return_function(call_conv: CallConv) -> Function {
|
||||
let mut sig = Signature::new(call_conv);
|
||||
sig.params.push(AbiParam::new(types::I32));
|
||||
let mut func = Function::with_name_signature(ExternalName::user(0, 0), sig);
|
||||
|
||||
let block0 = func.dfg.make_block();
|
||||
let v0 = func.dfg.append_block_param(block0, types::I32);
|
||||
let block1 = func.dfg.make_block();
|
||||
let block2 = func.dfg.make_block();
|
||||
|
||||
let mut pos = FuncCursor::new(&mut func);
|
||||
pos.insert_block(block0);
|
||||
pos.ins().brnz(v0, block2, &[]);
|
||||
pos.ins().jump(block1, &[]);
|
||||
|
||||
pos.insert_block(block1);
|
||||
pos.ins().return_(&[]);
|
||||
|
||||
pos.insert_block(block2);
|
||||
pos.ins().return_(&[]);
|
||||
|
||||
func
|
||||
}
|
||||
}
|
||||
@@ -1,235 +0,0 @@
|
||||
//! Unwind information for System V ABI (x86-64).
|
||||
|
||||
use crate::ir::Function;
|
||||
use crate::isa::{
|
||||
unwind::systemv::{RegisterMappingError, UnwindInfo},
|
||||
RegUnit, TargetIsa,
|
||||
};
|
||||
use crate::result::CodegenResult;
|
||||
use gimli::{write::CommonInformationEntry, Encoding, Format, Register, X86_64};
|
||||
|
||||
/// Creates a new x86-64 common information entry (CIE).
|
||||
pub fn create_cie() -> CommonInformationEntry {
|
||||
use gimli::write::CallFrameInstruction;
|
||||
|
||||
let mut entry = CommonInformationEntry::new(
|
||||
Encoding {
|
||||
address_size: 8,
|
||||
format: Format::Dwarf32,
|
||||
version: 1,
|
||||
},
|
||||
1, // Code alignment factor
|
||||
-8, // Data alignment factor
|
||||
X86_64::RA,
|
||||
);
|
||||
|
||||
// Every frame will start with the call frame address (CFA) at RSP+8
|
||||
// It is +8 to account for the push of the return address by the call instruction
|
||||
entry.add_instruction(CallFrameInstruction::Cfa(X86_64::RSP, 8));
|
||||
|
||||
// Every frame will start with the return address at RSP (CFA-8 = RSP+8-8 = RSP)
|
||||
entry.add_instruction(CallFrameInstruction::Offset(X86_64::RA, -8));
|
||||
|
||||
entry
|
||||
}
|
||||
|
||||
/// Map Cranelift registers to their corresponding Gimli registers.
|
||||
pub fn map_reg(isa: &dyn TargetIsa, reg: RegUnit) -> Result<Register, RegisterMappingError> {
|
||||
if isa.name() != "x86" || isa.pointer_bits() != 64 {
|
||||
return Err(RegisterMappingError::UnsupportedArchitecture);
|
||||
}
|
||||
|
||||
// Mapping from https://github.com/bytecodealliance/cranelift/pull/902 by @iximeow
|
||||
const X86_GP_REG_MAP: [gimli::Register; 16] = [
|
||||
X86_64::RAX,
|
||||
X86_64::RCX,
|
||||
X86_64::RDX,
|
||||
X86_64::RBX,
|
||||
X86_64::RSP,
|
||||
X86_64::RBP,
|
||||
X86_64::RSI,
|
||||
X86_64::RDI,
|
||||
X86_64::R8,
|
||||
X86_64::R9,
|
||||
X86_64::R10,
|
||||
X86_64::R11,
|
||||
X86_64::R12,
|
||||
X86_64::R13,
|
||||
X86_64::R14,
|
||||
X86_64::R15,
|
||||
];
|
||||
const X86_XMM_REG_MAP: [gimli::Register; 16] = [
|
||||
X86_64::XMM0,
|
||||
X86_64::XMM1,
|
||||
X86_64::XMM2,
|
||||
X86_64::XMM3,
|
||||
X86_64::XMM4,
|
||||
X86_64::XMM5,
|
||||
X86_64::XMM6,
|
||||
X86_64::XMM7,
|
||||
X86_64::XMM8,
|
||||
X86_64::XMM9,
|
||||
X86_64::XMM10,
|
||||
X86_64::XMM11,
|
||||
X86_64::XMM12,
|
||||
X86_64::XMM13,
|
||||
X86_64::XMM14,
|
||||
X86_64::XMM15,
|
||||
];
|
||||
|
||||
let reg_info = isa.register_info();
|
||||
let bank = reg_info
|
||||
.bank_containing_regunit(reg)
|
||||
.ok_or_else(|| RegisterMappingError::MissingBank)?;
|
||||
match bank.name {
|
||||
"IntRegs" => {
|
||||
// x86 GP registers have a weird mapping to DWARF registers, so we use a
|
||||
// lookup table.
|
||||
Ok(X86_GP_REG_MAP[(reg - bank.first_unit) as usize])
|
||||
}
|
||||
"FloatRegs" => Ok(X86_XMM_REG_MAP[(reg - bank.first_unit) as usize]),
|
||||
_ => Err(RegisterMappingError::UnsupportedRegisterBank(bank.name)),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn create_unwind_info(
|
||||
func: &Function,
|
||||
isa: &dyn TargetIsa,
|
||||
) -> CodegenResult<Option<UnwindInfo>> {
|
||||
// Only System V-like calling conventions are supported
|
||||
match isa.unwind_info_kind() {
|
||||
crate::machinst::UnwindInfoKind::SystemV => {}
|
||||
_ => return Ok(None),
|
||||
}
|
||||
|
||||
if func.prologue_end.is_none() || isa.name() != "x86" || isa.pointer_bits() != 64 {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let unwind = match super::create_unwind_info(func, isa)? {
|
||||
Some(u) => u,
|
||||
None => {
|
||||
return Ok(None);
|
||||
}
|
||||
};
|
||||
|
||||
struct RegisterMapper<'a, 'b>(&'a (dyn TargetIsa + 'b));
|
||||
impl<'a, 'b> crate::isa::unwind::systemv::RegisterMapper<RegUnit> for RegisterMapper<'a, 'b> {
|
||||
fn map(&self, reg: RegUnit) -> Result<u16, RegisterMappingError> {
|
||||
Ok(map_reg(self.0, reg)?.0)
|
||||
}
|
||||
fn sp(&self) -> u16 {
|
||||
X86_64::RSP.0
|
||||
}
|
||||
fn fp(&self) -> Option<u16> {
|
||||
Some(X86_64::RBP.0)
|
||||
}
|
||||
}
|
||||
let map = RegisterMapper(isa);
|
||||
|
||||
Ok(Some(UnwindInfo::build(unwind, &map)?))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::cursor::{Cursor, FuncCursor};
|
||||
use crate::ir::{
|
||||
types, AbiParam, ExternalName, InstBuilder, Signature, StackSlotData, StackSlotKind,
|
||||
};
|
||||
use crate::isa::{lookup_variant, BackendVariant, CallConv};
|
||||
use crate::settings::{builder, Flags};
|
||||
use crate::Context;
|
||||
use gimli::write::Address;
|
||||
use std::str::FromStr;
|
||||
use target_lexicon::triple;
|
||||
|
||||
#[test]
|
||||
fn test_simple_func() {
|
||||
let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy)
|
||||
.expect("expect x86 ISA")
|
||||
.finish(Flags::new(builder()));
|
||||
|
||||
let mut context = Context::for_function(create_function(
|
||||
CallConv::SystemV,
|
||||
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
|
||||
));
|
||||
|
||||
context.compile(&*isa).expect("expected compilation");
|
||||
|
||||
let fde = match isa
|
||||
.create_unwind_info(&context.func)
|
||||
.expect("can create unwind info")
|
||||
{
|
||||
Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => {
|
||||
info.to_fde(Address::Constant(1234))
|
||||
}
|
||||
_ => panic!("expected unwind information"),
|
||||
};
|
||||
|
||||
assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(1234), length: 16, lsda: None, instructions: [(2, CfaOffset(16)), (2, Offset(Register(6), -16)), (5, CfaRegister(Register(6))), (15, SameValue(Register(6))), (15, Cfa(Register(7), 8))] }");
|
||||
}
|
||||
|
||||
fn create_function(call_conv: CallConv, stack_slot: Option<StackSlotData>) -> Function {
|
||||
let mut func =
|
||||
Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv));
|
||||
|
||||
let block0 = func.dfg.make_block();
|
||||
let mut pos = FuncCursor::new(&mut func);
|
||||
pos.insert_block(block0);
|
||||
pos.ins().return_(&[]);
|
||||
|
||||
if let Some(stack_slot) = stack_slot {
|
||||
func.stack_slots.push(stack_slot);
|
||||
}
|
||||
|
||||
func
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_multi_return_func() {
|
||||
let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy)
|
||||
.expect("expect x86 ISA")
|
||||
.finish(Flags::new(builder()));
|
||||
|
||||
let mut context = Context::for_function(create_multi_return_function(CallConv::SystemV));
|
||||
|
||||
context.compile(&*isa).expect("expected compilation");
|
||||
|
||||
let fde = match isa
|
||||
.create_unwind_info(&context.func)
|
||||
.expect("can create unwind info")
|
||||
{
|
||||
Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => {
|
||||
info.to_fde(Address::Constant(4321))
|
||||
}
|
||||
_ => panic!("expected unwind information"),
|
||||
};
|
||||
|
||||
assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(4321), length: 16, lsda: None, instructions: [(2, CfaOffset(16)), (2, Offset(Register(6), -16)), (5, CfaRegister(Register(6))), (12, RememberState), (12, SameValue(Register(6))), (12, Cfa(Register(7), 8)), (13, RestoreState), (15, SameValue(Register(6))), (15, Cfa(Register(7), 8))] }");
|
||||
}
|
||||
|
||||
fn create_multi_return_function(call_conv: CallConv) -> Function {
|
||||
let mut sig = Signature::new(call_conv);
|
||||
sig.params.push(AbiParam::new(types::I32));
|
||||
let mut func = Function::with_name_signature(ExternalName::user(0, 0), sig);
|
||||
|
||||
let block0 = func.dfg.make_block();
|
||||
let v0 = func.dfg.append_block_param(block0, types::I32);
|
||||
let block1 = func.dfg.make_block();
|
||||
let block2 = func.dfg.make_block();
|
||||
|
||||
let mut pos = FuncCursor::new(&mut func);
|
||||
pos.insert_block(block0);
|
||||
pos.ins().brnz(v0, block2, &[]);
|
||||
pos.ins().jump(block1, &[]);
|
||||
|
||||
pos.insert_block(block1);
|
||||
pos.ins().return_(&[]);
|
||||
|
||||
pos.insert_block(block2);
|
||||
pos.ins().return_(&[]);
|
||||
|
||||
func
|
||||
}
|
||||
}
|
||||
@@ -1,265 +0,0 @@
|
||||
//! Unwind information for Windows x64 ABI.
|
||||
|
||||
use crate::ir::Function;
|
||||
use crate::isa::x86::registers::{FPR, GPR};
|
||||
use crate::isa::{unwind::winx64::UnwindInfo, RegUnit, TargetIsa};
|
||||
use crate::result::CodegenResult;
|
||||
|
||||
pub(crate) fn create_unwind_info(
|
||||
func: &Function,
|
||||
isa: &dyn TargetIsa,
|
||||
) -> CodegenResult<Option<UnwindInfo>> {
|
||||
// Only Windows fastcall is supported for unwind information
|
||||
if !func.signature.call_conv.extends_windows_fastcall() || func.prologue_end.is_none() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let unwind = match super::create_unwind_info(func, isa)? {
|
||||
Some(u) => u,
|
||||
None => {
|
||||
return Ok(None);
|
||||
}
|
||||
};
|
||||
|
||||
Ok(Some(UnwindInfo::build::<RegUnit, RegisterMapper>(unwind)?))
|
||||
}
|
||||
|
||||
struct RegisterMapper;
|
||||
|
||||
impl crate::isa::unwind::winx64::RegisterMapper<RegUnit> for RegisterMapper {
|
||||
fn map(reg: RegUnit) -> crate::isa::unwind::winx64::MappedRegister {
|
||||
use crate::isa::unwind::winx64::MappedRegister;
|
||||
if GPR.contains(reg) {
|
||||
MappedRegister::Int(GPR.index_of(reg) as u8)
|
||||
} else if FPR.contains(reg) {
|
||||
MappedRegister::Xmm(reg as u8)
|
||||
} else {
|
||||
panic!()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::cursor::{Cursor, FuncCursor};
|
||||
use crate::ir::{ExternalName, InstBuilder, Signature, StackSlotData, StackSlotKind};
|
||||
use crate::isa::unwind::winx64::UnwindCode;
|
||||
use crate::isa::x86::registers::RU;
|
||||
use crate::isa::{lookup_variant, BackendVariant, CallConv};
|
||||
use crate::settings::{builder, Flags};
|
||||
use crate::Context;
|
||||
use std::str::FromStr;
|
||||
use target_lexicon::triple;
|
||||
|
||||
#[test]
|
||||
fn test_wrong_calling_convention() {
|
||||
let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy)
|
||||
.expect("expect x86 ISA")
|
||||
.finish(Flags::new(builder()));
|
||||
|
||||
let mut context = Context::for_function(create_function(CallConv::SystemV, None));
|
||||
|
||||
context.compile(&*isa).expect("expected compilation");
|
||||
|
||||
assert_eq!(
|
||||
create_unwind_info(&context.func, &*isa).expect("can create unwind info"),
|
||||
None
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_small_alloc() {
|
||||
let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy)
|
||||
.expect("expect x86 ISA")
|
||||
.finish(Flags::new(builder()));
|
||||
|
||||
let mut context = Context::for_function(create_function(
|
||||
CallConv::WindowsFastcall,
|
||||
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
|
||||
));
|
||||
|
||||
context.compile(&*isa).expect("expected compilation");
|
||||
|
||||
let unwind = create_unwind_info(&context.func, &*isa)
|
||||
.expect("can create unwind info")
|
||||
.expect("expected unwind info");
|
||||
|
||||
assert_eq!(
|
||||
unwind,
|
||||
UnwindInfo {
|
||||
flags: 0,
|
||||
prologue_size: 9,
|
||||
frame_register: None,
|
||||
frame_register_offset: 0,
|
||||
unwind_codes: vec![
|
||||
UnwindCode::PushRegister {
|
||||
instruction_offset: 2,
|
||||
reg: GPR.index_of(RU::rbp.into()) as u8
|
||||
},
|
||||
UnwindCode::StackAlloc {
|
||||
instruction_offset: 9,
|
||||
size: 64
|
||||
}
|
||||
]
|
||||
}
|
||||
);
|
||||
|
||||
assert_eq!(unwind.emit_size(), 8);
|
||||
|
||||
let mut buf = [0u8; 8];
|
||||
unwind.emit(&mut buf);
|
||||
|
||||
assert_eq!(
|
||||
buf,
|
||||
[
|
||||
0x01, // Version and flags (version 1, no flags)
|
||||
0x09, // Prologue size
|
||||
0x02, // Unwind code count (1 for stack alloc, 1 for push reg)
|
||||
0x00, // Frame register + offset (no frame register)
|
||||
0x09, // Prolog offset
|
||||
0x72, // Operation 2 (small stack alloc), size = 0xB slots (e.g. (0x7 * 8) + 8 = 64 bytes)
|
||||
0x02, // Prolog offset
|
||||
0x50, // Operation 0 (save nonvolatile register), reg = 5 (RBP)
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_medium_alloc() {
|
||||
let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy)
|
||||
.expect("expect x86 ISA")
|
||||
.finish(Flags::new(builder()));
|
||||
|
||||
let mut context = Context::for_function(create_function(
|
||||
CallConv::WindowsFastcall,
|
||||
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 10000)),
|
||||
));
|
||||
|
||||
context.compile(&*isa).expect("expected compilation");
|
||||
|
||||
let unwind = create_unwind_info(&context.func, &*isa)
|
||||
.expect("can create unwind info")
|
||||
.expect("expected unwind info");
|
||||
|
||||
assert_eq!(
|
||||
unwind,
|
||||
UnwindInfo {
|
||||
flags: 0,
|
||||
prologue_size: 27,
|
||||
frame_register: None,
|
||||
frame_register_offset: 0,
|
||||
unwind_codes: vec![
|
||||
UnwindCode::PushRegister {
|
||||
instruction_offset: 2,
|
||||
reg: GPR.index_of(RU::rbp.into()) as u8
|
||||
},
|
||||
UnwindCode::StackAlloc {
|
||||
instruction_offset: 27,
|
||||
size: 10000
|
||||
}
|
||||
]
|
||||
}
|
||||
);
|
||||
|
||||
assert_eq!(unwind.emit_size(), 12);
|
||||
|
||||
let mut buf = [0u8; 12];
|
||||
unwind.emit(&mut buf);
|
||||
|
||||
assert_eq!(
|
||||
buf,
|
||||
[
|
||||
0x01, // Version and flags (version 1, no flags)
|
||||
0x1B, // Prologue size
|
||||
0x03, // Unwind code count (2 for stack alloc, 1 for push reg)
|
||||
0x00, // Frame register + offset (no frame register)
|
||||
0x1B, // Prolog offset
|
||||
0x01, // Operation 1 (large stack alloc), size is scaled 16-bits (info = 0)
|
||||
0xE2, // Low size byte
|
||||
0x04, // High size byte (e.g. 0x04E2 * 8 = 10000 bytes)
|
||||
0x02, // Prolog offset
|
||||
0x50, // Operation 0 (push nonvolatile register), reg = 5 (RBP)
|
||||
0x00, // Padding
|
||||
0x00, // Padding
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_large_alloc() {
|
||||
let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy)
|
||||
.expect("expect x86 ISA")
|
||||
.finish(Flags::new(builder()));
|
||||
|
||||
let mut context = Context::for_function(create_function(
|
||||
CallConv::WindowsFastcall,
|
||||
Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 1000000)),
|
||||
));
|
||||
|
||||
context.compile(&*isa).expect("expected compilation");
|
||||
|
||||
let unwind = create_unwind_info(&context.func, &*isa)
|
||||
.expect("can create unwind info")
|
||||
.expect("expected unwind info");
|
||||
|
||||
assert_eq!(
|
||||
unwind,
|
||||
UnwindInfo {
|
||||
flags: 0,
|
||||
prologue_size: 27,
|
||||
frame_register: None,
|
||||
frame_register_offset: 0,
|
||||
unwind_codes: vec![
|
||||
UnwindCode::PushRegister {
|
||||
instruction_offset: 2,
|
||||
reg: GPR.index_of(RU::rbp.into()) as u8
|
||||
},
|
||||
UnwindCode::StackAlloc {
|
||||
instruction_offset: 27,
|
||||
size: 1000000
|
||||
}
|
||||
]
|
||||
}
|
||||
);
|
||||
|
||||
assert_eq!(unwind.emit_size(), 12);
|
||||
|
||||
let mut buf = [0u8; 12];
|
||||
unwind.emit(&mut buf);
|
||||
|
||||
assert_eq!(
|
||||
buf,
|
||||
[
|
||||
0x01, // Version and flags (version 1, no flags)
|
||||
0x1B, // Prologue size
|
||||
0x04, // Unwind code count (3 for stack alloc, 1 for push reg)
|
||||
0x00, // Frame register + offset (no frame register)
|
||||
0x1B, // Prolog offset
|
||||
0x11, // Operation 1 (large stack alloc), size is unscaled 32-bits (info = 1)
|
||||
0x40, // Byte 1 of size
|
||||
0x42, // Byte 2 of size
|
||||
0x0F, // Byte 3 of size
|
||||
0x00, // Byte 4 of size (size is 0xF4240 = 1000000 bytes)
|
||||
0x02, // Prolog offset
|
||||
0x50, // Operation 0 (push nonvolatile register), reg = 5 (RBP)
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
fn create_function(call_conv: CallConv, stack_slot: Option<StackSlotData>) -> Function {
|
||||
let mut func =
|
||||
Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv));
|
||||
|
||||
let block0 = func.dfg.make_block();
|
||||
let mut pos = FuncCursor::new(&mut func);
|
||||
pos.insert_block(block0);
|
||||
pos.ins().return_(&[]);
|
||||
|
||||
if let Some(stack_slot) = stack_slot {
|
||||
func.stack_slots.push(stack_slot);
|
||||
}
|
||||
|
||||
func
|
||||
}
|
||||
}
|
||||
@@ -84,12 +84,9 @@ pub(crate) mod aarch64;
|
||||
#[cfg(feature = "s390x")]
|
||||
mod s390x;
|
||||
|
||||
#[cfg(any(feature = "x86", feature = "riscv"))]
|
||||
#[cfg(feature = "riscv")]
|
||||
mod legacy;
|
||||
|
||||
#[cfg(feature = "x86")]
|
||||
use legacy::x86;
|
||||
|
||||
#[cfg(feature = "riscv")]
|
||||
use legacy::riscv;
|
||||
|
||||
@@ -120,49 +117,19 @@ macro_rules! isa_builder {
|
||||
}};
|
||||
}
|
||||
|
||||
/// The "variant" for a given target. On one platform (x86-64), we have two
|
||||
/// backends, the "old" and "new" one; the new one is the default if included
|
||||
/// in the build configuration and not otherwise specified.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub enum BackendVariant {
|
||||
/// Any backend available.
|
||||
Any,
|
||||
/// A "legacy" backend: one that operates using legalizations and encodings.
|
||||
Legacy,
|
||||
/// A backend built on `MachInst`s and the `VCode` framework.
|
||||
MachInst,
|
||||
}
|
||||
|
||||
impl Default for BackendVariant {
|
||||
fn default() -> Self {
|
||||
BackendVariant::Any
|
||||
}
|
||||
}
|
||||
|
||||
/// Look for an ISA for the given `triple`, selecting the backend variant given
|
||||
/// by `variant` if available.
|
||||
pub fn lookup_variant(triple: Triple, variant: BackendVariant) -> Result<Builder, LookupError> {
|
||||
match (triple.architecture, variant) {
|
||||
(Architecture::Riscv32 { .. }, _) | (Architecture::Riscv64 { .. }, _) => {
|
||||
pub fn lookup_variant(triple: Triple) -> Result<Builder, LookupError> {
|
||||
match triple.architecture {
|
||||
Architecture::Riscv32 { .. } | Architecture::Riscv64 { .. } => {
|
||||
isa_builder!(riscv, (feature = "riscv"), triple)
|
||||
}
|
||||
(Architecture::X86_64, BackendVariant::Legacy) => {
|
||||
isa_builder!(x86, (feature = "x86"), triple)
|
||||
}
|
||||
(Architecture::X86_64, BackendVariant::MachInst) => {
|
||||
Architecture::X86_64 => {
|
||||
isa_builder!(x64, (feature = "x86"), triple)
|
||||
}
|
||||
#[cfg(not(feature = "old-x86-backend"))]
|
||||
(Architecture::X86_64, BackendVariant::Any) => {
|
||||
isa_builder!(x64, (feature = "x86"), triple)
|
||||
}
|
||||
#[cfg(feature = "old-x86-backend")]
|
||||
(Architecture::X86_64, BackendVariant::Any) => {
|
||||
isa_builder!(x86, (feature = "x86"), triple)
|
||||
}
|
||||
(Architecture::Arm { .. }, _) => isa_builder!(arm32, (feature = "arm32"), triple),
|
||||
(Architecture::Aarch64 { .. }, _) => isa_builder!(aarch64, (feature = "arm64"), triple),
|
||||
(Architecture::S390x { .. }, _) => isa_builder!(s390x, (feature = "s390x"), triple),
|
||||
Architecture::Arm { .. } => isa_builder!(arm32, (feature = "arm32"), triple),
|
||||
Architecture::Aarch64 { .. } => isa_builder!(aarch64, (feature = "arm64"), triple),
|
||||
Architecture::S390x { .. } => isa_builder!(s390x, (feature = "s390x"), triple),
|
||||
_ => Err(LookupError::Unsupported),
|
||||
}
|
||||
}
|
||||
@@ -170,7 +137,7 @@ pub fn lookup_variant(triple: Triple, variant: BackendVariant) -> Result<Builder
|
||||
/// Look for an ISA for the given `triple`.
|
||||
/// Return a builder that can create a corresponding `TargetIsa`.
|
||||
pub fn lookup(triple: Triple) -> Result<Builder, LookupError> {
|
||||
lookup_variant(triple, BackendVariant::Any)
|
||||
lookup_variant(triple)
|
||||
}
|
||||
|
||||
/// Look for a supported ISA with the given `name`.
|
||||
@@ -292,11 +259,6 @@ pub trait TargetIsa: fmt::Display + Send + Sync {
|
||||
/// Get the ISA-dependent flag values that were used to make this trait object.
|
||||
fn isa_flags(&self) -> Vec<settings::Value>;
|
||||
|
||||
/// Get the variant of this ISA (Legacy or MachInst).
|
||||
fn variant(&self) -> BackendVariant {
|
||||
BackendVariant::Legacy
|
||||
}
|
||||
|
||||
/// Hashes all flags, both ISA-independent and ISA-specific, into the
|
||||
/// specified hasher.
|
||||
fn hash_all_flags(&self, hasher: &mut dyn Hasher);
|
||||
|
||||
@@ -2962,45 +2962,6 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
| Opcode::IfcmpImm => {
|
||||
panic!("ALU+imm and ALU+carry ops should not appear here!");
|
||||
}
|
||||
|
||||
#[cfg(feature = "x86")]
|
||||
Opcode::X86Udivmodx
|
||||
| Opcode::X86Sdivmodx
|
||||
| Opcode::X86Umulx
|
||||
| Opcode::X86Smulx
|
||||
| Opcode::X86Cvtt2si
|
||||
| Opcode::X86Fmin
|
||||
| Opcode::X86Fmax
|
||||
| Opcode::X86Push
|
||||
| Opcode::X86Pop
|
||||
| Opcode::X86Bsr
|
||||
| Opcode::X86Bsf
|
||||
| Opcode::X86Pblendw
|
||||
| Opcode::X86Pshufd
|
||||
| Opcode::X86Pshufb
|
||||
| Opcode::X86Pextr
|
||||
| Opcode::X86Pinsr
|
||||
| Opcode::X86Insertps
|
||||
| Opcode::X86Movsd
|
||||
| Opcode::X86Movlhps
|
||||
| Opcode::X86Psll
|
||||
| Opcode::X86Psrl
|
||||
| Opcode::X86Psra
|
||||
| Opcode::X86Ptest
|
||||
| Opcode::X86Pmaxs
|
||||
| Opcode::X86Pmaxu
|
||||
| Opcode::X86Pmins
|
||||
| Opcode::X86Pminu
|
||||
| Opcode::X86Pmullq
|
||||
| Opcode::X86Pmuludq
|
||||
| Opcode::X86Punpckh
|
||||
| Opcode::X86Punpckl
|
||||
| Opcode::X86Vcvtudq2ps
|
||||
| Opcode::X86Palignr
|
||||
| Opcode::X86ElfTlsGetAddr
|
||||
| Opcode::X86MachoTlsGetAddr => {
|
||||
panic!("x86-specific opcode in supposedly arch-neutral IR!");
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -109,7 +109,6 @@ mod tests {
|
||||
use target_lexicon::triple;
|
||||
|
||||
#[test]
|
||||
#[cfg_attr(feature = "old-x86-backend", ignore)]
|
||||
fn test_simple_func() {
|
||||
let isa = lookup(triple!("x86_64"))
|
||||
.expect("expect x86 ISA")
|
||||
@@ -152,7 +151,6 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg_attr(feature = "old-x86-backend", ignore)]
|
||||
fn test_multi_return_func() {
|
||||
let isa = lookup(triple!("x86_64"))
|
||||
.expect("expect x86 ISA")
|
||||
|
||||
@@ -6900,44 +6900,6 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
panic!("Branch opcode reached non-branch lowering logic!");
|
||||
}
|
||||
|
||||
Opcode::X86Udivmodx
|
||||
| Opcode::X86Sdivmodx
|
||||
| Opcode::X86Umulx
|
||||
| Opcode::X86Smulx
|
||||
| Opcode::X86Cvtt2si
|
||||
| Opcode::X86Fmin
|
||||
| Opcode::X86Fmax
|
||||
| Opcode::X86Push
|
||||
| Opcode::X86Pop
|
||||
| Opcode::X86Bsr
|
||||
| Opcode::X86Bsf
|
||||
| Opcode::X86Pblendw
|
||||
| Opcode::X86Pshufd
|
||||
| Opcode::X86Pshufb
|
||||
| Opcode::X86Pextr
|
||||
| Opcode::X86Pinsr
|
||||
| Opcode::X86Insertps
|
||||
| Opcode::X86Movsd
|
||||
| Opcode::X86Movlhps
|
||||
| Opcode::X86Palignr
|
||||
| Opcode::X86Psll
|
||||
| Opcode::X86Psrl
|
||||
| Opcode::X86Psra
|
||||
| Opcode::X86Ptest
|
||||
| Opcode::X86Pmaxs
|
||||
| Opcode::X86Pmaxu
|
||||
| Opcode::X86Pmins
|
||||
| Opcode::X86Pminu
|
||||
| Opcode::X86Pmullq
|
||||
| Opcode::X86Pmuludq
|
||||
| Opcode::X86Punpckh
|
||||
| Opcode::X86Punpckl
|
||||
| Opcode::X86Vcvtudq2ps
|
||||
| Opcode::X86ElfTlsGetAddr
|
||||
| Opcode::X86MachoTlsGetAddr => {
|
||||
panic!("x86-specific opcode in supposedly arch-neutral IR!");
|
||||
}
|
||||
|
||||
Opcode::Nop => {
|
||||
// Nothing.
|
||||
}
|
||||
|
||||
@@ -21,9 +21,9 @@ use crate::ir::types::{I32, I64};
|
||||
use crate::ir::{self, InstBuilder, MemFlags};
|
||||
use crate::isa::TargetIsa;
|
||||
|
||||
#[cfg(any(feature = "x86", feature = "riscv"))]
|
||||
#[cfg(feature = "riscv")]
|
||||
use crate::predicates;
|
||||
#[cfg(any(feature = "x86", feature = "riscv"))]
|
||||
#[cfg(feature = "riscv")]
|
||||
use alloc::vec::Vec;
|
||||
|
||||
use crate::timing;
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
use crate::binemit;
|
||||
use crate::ir;
|
||||
use crate::isa::{
|
||||
BackendVariant, EncInfo, Encoding, Encodings, Legalize, RegClass, RegInfo, TargetIsa,
|
||||
EncInfo, Encoding, Encodings, Legalize, RegClass, RegInfo, TargetIsa,
|
||||
};
|
||||
use crate::machinst::*;
|
||||
use crate::regalloc::RegisterSet;
|
||||
@@ -64,10 +64,6 @@ impl TargetIsa for TargetIsaAdapter {
|
||||
self.backend.isa_flags()
|
||||
}
|
||||
|
||||
fn variant(&self) -> BackendVariant {
|
||||
BackendVariant::MachInst
|
||||
}
|
||||
|
||||
fn hash_all_flags(&self, hasher: &mut dyn Hasher) {
|
||||
self.backend.hash_all_flags(hasher);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user