Remove the old x86 backend
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@@ -1,723 +0,0 @@
|
||||
#![allow(non_snake_case)]
|
||||
|
||||
use crate::cdsl::instructions::{
|
||||
AllInstructions, InstructionBuilder as Inst, InstructionGroup, InstructionGroupBuilder,
|
||||
};
|
||||
use crate::cdsl::operands::Operand;
|
||||
use crate::cdsl::types::ValueType;
|
||||
use crate::cdsl::typevar::{Interval, TypeSetBuilder, TypeVar};
|
||||
use crate::shared::entities::EntityRefs;
|
||||
use crate::shared::formats::Formats;
|
||||
use crate::shared::immediates::Immediates;
|
||||
use crate::shared::types;
|
||||
|
||||
#[allow(clippy::many_single_char_names)]
|
||||
pub(crate) fn define(
|
||||
mut all_instructions: &mut AllInstructions,
|
||||
formats: &Formats,
|
||||
immediates: &Immediates,
|
||||
entities: &EntityRefs,
|
||||
) -> InstructionGroup {
|
||||
let mut ig = InstructionGroupBuilder::new(&mut all_instructions);
|
||||
|
||||
let iflags: &TypeVar = &ValueType::Special(types::Flag::IFlags.into()).into();
|
||||
|
||||
let iWord = &TypeVar::new(
|
||||
"iWord",
|
||||
"A scalar integer machine word",
|
||||
TypeSetBuilder::new().ints(32..64).build(),
|
||||
);
|
||||
let nlo = &Operand::new("nlo", iWord).with_doc("Low part of numerator");
|
||||
let nhi = &Operand::new("nhi", iWord).with_doc("High part of numerator");
|
||||
let d = &Operand::new("d", iWord).with_doc("Denominator");
|
||||
let q = &Operand::new("q", iWord).with_doc("Quotient");
|
||||
let r = &Operand::new("r", iWord).with_doc("Remainder");
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_udivmodx",
|
||||
r#"
|
||||
Extended unsigned division.
|
||||
|
||||
Concatenate the bits in `nhi` and `nlo` to form the numerator.
|
||||
Interpret the bits as an unsigned number and divide by the unsigned
|
||||
denominator `d`. Trap when `d` is zero or if the quotient is larger
|
||||
than the range of the output.
|
||||
|
||||
Return both quotient and remainder.
|
||||
"#,
|
||||
&formats.ternary,
|
||||
)
|
||||
.operands_in(vec![nlo, nhi, d])
|
||||
.operands_out(vec![q, r])
|
||||
.can_trap(true),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_sdivmodx",
|
||||
r#"
|
||||
Extended signed division.
|
||||
|
||||
Concatenate the bits in `nhi` and `nlo` to form the numerator.
|
||||
Interpret the bits as a signed number and divide by the signed
|
||||
denominator `d`. Trap when `d` is zero or if the quotient is outside
|
||||
the range of the output.
|
||||
|
||||
Return both quotient and remainder.
|
||||
"#,
|
||||
&formats.ternary,
|
||||
)
|
||||
.operands_in(vec![nlo, nhi, d])
|
||||
.operands_out(vec![q, r])
|
||||
.can_trap(true),
|
||||
);
|
||||
|
||||
let argL = &Operand::new("argL", iWord);
|
||||
let argR = &Operand::new("argR", iWord);
|
||||
let resLo = &Operand::new("resLo", iWord);
|
||||
let resHi = &Operand::new("resHi", iWord);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_umulx",
|
||||
r#"
|
||||
Unsigned integer multiplication, producing a double-length result.
|
||||
|
||||
Polymorphic over all scalar integer types, but does not support vector
|
||||
types.
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![argL, argR])
|
||||
.operands_out(vec![resLo, resHi]),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_smulx",
|
||||
r#"
|
||||
Signed integer multiplication, producing a double-length result.
|
||||
|
||||
Polymorphic over all scalar integer types, but does not support vector
|
||||
types.
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![argL, argR])
|
||||
.operands_out(vec![resLo, resHi]),
|
||||
);
|
||||
|
||||
let Float = &TypeVar::new(
|
||||
"Float",
|
||||
"A scalar or vector floating point number",
|
||||
TypeSetBuilder::new()
|
||||
.floats(Interval::All)
|
||||
.simd_lanes(Interval::All)
|
||||
.build(),
|
||||
);
|
||||
let IntTo = &TypeVar::new(
|
||||
"IntTo",
|
||||
"An integer type with the same number of lanes",
|
||||
TypeSetBuilder::new()
|
||||
.ints(32..64)
|
||||
.simd_lanes(Interval::All)
|
||||
.build(),
|
||||
);
|
||||
let x = &Operand::new("x", Float);
|
||||
let a = &Operand::new("a", IntTo);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_cvtt2si",
|
||||
r#"
|
||||
Convert with truncation floating point to signed integer.
|
||||
|
||||
The source floating point operand is converted to a signed integer by
|
||||
rounding towards zero. If the result can't be represented in the output
|
||||
type, returns the smallest signed value the output type can represent.
|
||||
|
||||
This instruction does not trap.
|
||||
"#,
|
||||
&formats.unary,
|
||||
)
|
||||
.operands_in(vec![x])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let f32x4 = &TypeVar::new(
|
||||
"f32x4",
|
||||
"A floating point number",
|
||||
TypeSetBuilder::new()
|
||||
.floats(32..32)
|
||||
.simd_lanes(4..4)
|
||||
.build(),
|
||||
);
|
||||
let i32x4 = &TypeVar::new(
|
||||
"i32x4",
|
||||
"An integer type with the same number of lanes",
|
||||
TypeSetBuilder::new().ints(32..32).simd_lanes(4..4).build(),
|
||||
);
|
||||
let x = &Operand::new("x", i32x4);
|
||||
let a = &Operand::new("a", f32x4);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_vcvtudq2ps",
|
||||
r#"
|
||||
Convert unsigned integer to floating point.
|
||||
|
||||
Convert packed doubleword unsigned integers to packed single-precision floating-point
|
||||
values. This instruction does not trap.
|
||||
"#,
|
||||
&formats.unary,
|
||||
)
|
||||
.operands_in(vec![x])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let x = &Operand::new("x", Float);
|
||||
let a = &Operand::new("a", Float);
|
||||
let y = &Operand::new("y", Float);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_fmin",
|
||||
r#"
|
||||
Floating point minimum with x86 semantics.
|
||||
|
||||
This is equivalent to the C ternary operator `x < y ? x : y` which
|
||||
differs from `fmin` when either operand is NaN or when comparing
|
||||
+0.0 to -0.0.
|
||||
|
||||
When the two operands don't compare as LT, `y` is returned unchanged,
|
||||
even if it is a signalling NaN.
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![x, y])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_fmax",
|
||||
r#"
|
||||
Floating point maximum with x86 semantics.
|
||||
|
||||
This is equivalent to the C ternary operator `x > y ? x : y` which
|
||||
differs from `fmax` when either operand is NaN or when comparing
|
||||
+0.0 to -0.0.
|
||||
|
||||
When the two operands don't compare as GT, `y` is returned unchanged,
|
||||
even if it is a signalling NaN.
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![x, y])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let x = &Operand::new("x", iWord);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_push",
|
||||
r#"
|
||||
Pushes a value onto the stack.
|
||||
|
||||
Decrements the stack pointer and stores the specified value on to the top.
|
||||
|
||||
This is polymorphic in i32 and i64. However, it is only implemented for i64
|
||||
in 64-bit mode, and only for i32 in 32-bit mode.
|
||||
"#,
|
||||
&formats.unary,
|
||||
)
|
||||
.operands_in(vec![x])
|
||||
.other_side_effects(true)
|
||||
.can_store(true),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_pop",
|
||||
r#"
|
||||
Pops a value from the stack.
|
||||
|
||||
Loads a value from the top of the stack and then increments the stack
|
||||
pointer.
|
||||
|
||||
This is polymorphic in i32 and i64. However, it is only implemented for i64
|
||||
in 64-bit mode, and only for i32 in 32-bit mode.
|
||||
"#,
|
||||
&formats.nullary,
|
||||
)
|
||||
.operands_out(vec![x])
|
||||
.other_side_effects(true)
|
||||
.can_load(true),
|
||||
);
|
||||
|
||||
let y = &Operand::new("y", iWord);
|
||||
let rflags = &Operand::new("rflags", iflags);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_bsr",
|
||||
r#"
|
||||
Bit Scan Reverse -- returns the bit-index of the most significant 1
|
||||
in the word. Result is undefined if the argument is zero. However, it
|
||||
sets the Z flag depending on the argument, so it is at least easy to
|
||||
detect and handle that case.
|
||||
|
||||
This is polymorphic in i32 and i64. It is implemented for both i64 and
|
||||
i32 in 64-bit mode, and only for i32 in 32-bit mode.
|
||||
"#,
|
||||
&formats.unary,
|
||||
)
|
||||
.operands_in(vec![x])
|
||||
.operands_out(vec![y, rflags]),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_bsf",
|
||||
r#"
|
||||
Bit Scan Forwards -- returns the bit-index of the least significant 1
|
||||
in the word. Is otherwise identical to 'bsr', just above.
|
||||
"#,
|
||||
&formats.unary,
|
||||
)
|
||||
.operands_in(vec![x])
|
||||
.operands_out(vec![y, rflags]),
|
||||
);
|
||||
|
||||
let uimm8 = &immediates.uimm8;
|
||||
let TxN = &TypeVar::new(
|
||||
"TxN",
|
||||
"A SIMD vector type",
|
||||
TypeSetBuilder::new()
|
||||
.ints(Interval::All)
|
||||
.floats(Interval::All)
|
||||
.bools(Interval::All)
|
||||
.simd_lanes(Interval::All)
|
||||
.includes_scalars(false)
|
||||
.build(),
|
||||
);
|
||||
let a = &Operand::new("a", TxN).with_doc("A vector value (i.e. held in an XMM register)");
|
||||
let b = &Operand::new("b", TxN).with_doc("A vector value (i.e. held in an XMM register)");
|
||||
let i = &Operand::new("i", uimm8).with_doc("An ordering operand controlling the copying of data from the source to the destination; see PSHUFD in Intel manual for details");
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_pshufd",
|
||||
r#"
|
||||
Packed Shuffle Doublewords -- copies data from either memory or lanes in an extended
|
||||
register and re-orders the data according to the passed immediate byte.
|
||||
"#,
|
||||
&formats.binary_imm8,
|
||||
)
|
||||
.operands_in(vec![a, i]) // TODO allow copying from memory here (need more permissive type than TxN)
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_pshufb",
|
||||
r#"
|
||||
Packed Shuffle Bytes -- re-orders data in an extended register using a shuffle
|
||||
mask from either memory or another extended register
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![a, b]) // TODO allow re-ordering from memory here (need more permissive type than TxN)
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let mask = &Operand::new("mask", uimm8).with_doc("mask to select lanes from b");
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_pblendw",
|
||||
r#"
|
||||
Blend packed words using an immediate mask. Each bit of the 8-bit immediate corresponds to a
|
||||
lane in ``b``: if the bit is set, the lane is copied into ``a``.
|
||||
"#,
|
||||
&formats.ternary_imm8,
|
||||
)
|
||||
.operands_in(vec![a, b, mask])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let Idx = &Operand::new("Idx", uimm8).with_doc("Lane index");
|
||||
let x = &Operand::new("x", TxN);
|
||||
let a = &Operand::new("a", &TxN.lane_of());
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_pextr",
|
||||
r#"
|
||||
Extract lane ``Idx`` from ``x``.
|
||||
The lane index, ``Idx``, is an immediate value, not an SSA value. It
|
||||
must indicate a valid lane index for the type of ``x``.
|
||||
"#,
|
||||
&formats.binary_imm8,
|
||||
)
|
||||
.operands_in(vec![x, Idx])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let IBxN = &TypeVar::new(
|
||||
"IBxN",
|
||||
"A SIMD vector type containing only booleans and integers",
|
||||
TypeSetBuilder::new()
|
||||
.ints(Interval::All)
|
||||
.bools(Interval::All)
|
||||
.simd_lanes(Interval::All)
|
||||
.includes_scalars(false)
|
||||
.build(),
|
||||
);
|
||||
let x = &Operand::new("x", IBxN);
|
||||
let y = &Operand::new("y", &IBxN.lane_of()).with_doc("New lane value");
|
||||
let a = &Operand::new("a", IBxN);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_pinsr",
|
||||
r#"
|
||||
Insert ``y`` into ``x`` at lane ``Idx``.
|
||||
The lane index, ``Idx``, is an immediate value, not an SSA value. It
|
||||
must indicate a valid lane index for the type of ``x``.
|
||||
"#,
|
||||
&formats.ternary_imm8,
|
||||
)
|
||||
.operands_in(vec![x, y, Idx])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let FxN = &TypeVar::new(
|
||||
"FxN",
|
||||
"A SIMD vector type containing floats",
|
||||
TypeSetBuilder::new()
|
||||
.floats(Interval::All)
|
||||
.simd_lanes(Interval::All)
|
||||
.includes_scalars(false)
|
||||
.build(),
|
||||
);
|
||||
let x = &Operand::new("x", FxN);
|
||||
let y = &Operand::new("y", &FxN.lane_of()).with_doc("New lane value");
|
||||
let a = &Operand::new("a", FxN);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_insertps",
|
||||
r#"
|
||||
Insert a lane of ``y`` into ``x`` at using ``Idx`` to encode both which lane the value is
|
||||
extracted from and which it is inserted to. This is similar to x86_pinsr but inserts
|
||||
floats, which are already stored in an XMM register.
|
||||
"#,
|
||||
&formats.ternary_imm8,
|
||||
)
|
||||
.operands_in(vec![x, y, Idx])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let x = &Operand::new("x", TxN);
|
||||
let y = &Operand::new("y", TxN);
|
||||
let a = &Operand::new("a", TxN);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_punpckh",
|
||||
r#"
|
||||
Unpack the high-order lanes of ``x`` and ``y`` and interleave into ``a``. With notional
|
||||
i8x4 vectors, where ``x = [x3, x2, x1, x0]`` and ``y = [y3, y2, y1, y0]``, this operation
|
||||
would result in ``a = [y3, x3, y2, x2]`` (using the Intel manual's right-to-left lane
|
||||
ordering).
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![x, y])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_punpckl",
|
||||
r#"
|
||||
Unpack the low-order lanes of ``x`` and ``y`` and interleave into ``a``. With notional
|
||||
i8x4 vectors, where ``x = [x3, x2, x1, x0]`` and ``y = [y3, y2, y1, y0]``, this operation
|
||||
would result in ``a = [y1, x1, y0, x0]`` (using the Intel manual's right-to-left lane
|
||||
ordering).
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![x, y])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let x = &Operand::new("x", FxN);
|
||||
let y = &Operand::new("y", FxN);
|
||||
let a = &Operand::new("a", FxN);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_movsd",
|
||||
r#"
|
||||
Move the low 64 bits of the float vector ``y`` to the low 64 bits of float vector ``x``
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![x, y])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_movlhps",
|
||||
r#"
|
||||
Move the low 64 bits of the float vector ``y`` to the high 64 bits of float vector ``x``
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![x, y])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let IxN = &TypeVar::new(
|
||||
"IxN",
|
||||
"A SIMD vector type containing integers",
|
||||
TypeSetBuilder::new()
|
||||
.ints(Interval::All)
|
||||
.simd_lanes(Interval::All)
|
||||
.includes_scalars(false)
|
||||
.build(),
|
||||
);
|
||||
let I128 = &TypeVar::new(
|
||||
"I128",
|
||||
"A SIMD vector type containing one large integer (due to Cranelift type constraints, \
|
||||
this uses the Cranelift I64X2 type but should be understood as one large value, i.e., the \
|
||||
upper lane is concatenated with the lower lane to form the integer)",
|
||||
TypeSetBuilder::new()
|
||||
.ints(64..64)
|
||||
.simd_lanes(2..2)
|
||||
.includes_scalars(false)
|
||||
.build(),
|
||||
);
|
||||
|
||||
let x = &Operand::new("x", IxN).with_doc("Vector value to shift");
|
||||
let y = &Operand::new("y", I128).with_doc("Number of bits to shift");
|
||||
let a = &Operand::new("a", IxN);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_psll",
|
||||
r#"
|
||||
Shift Packed Data Left Logical -- This implements the behavior of the shared instruction
|
||||
``ishl`` but alters the shift operand to live in an XMM register as expected by the PSLL*
|
||||
family of instructions.
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![x, y])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_psrl",
|
||||
r#"
|
||||
Shift Packed Data Right Logical -- This implements the behavior of the shared instruction
|
||||
``ushr`` but alters the shift operand to live in an XMM register as expected by the PSRL*
|
||||
family of instructions.
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![x, y])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_psra",
|
||||
r#"
|
||||
Shift Packed Data Right Arithmetic -- This implements the behavior of the shared
|
||||
instruction ``sshr`` but alters the shift operand to live in an XMM register as expected by
|
||||
the PSRA* family of instructions.
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![x, y])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let I64x2 = &TypeVar::new(
|
||||
"I64x2",
|
||||
"A SIMD vector type containing two 64-bit integers",
|
||||
TypeSetBuilder::new()
|
||||
.ints(64..64)
|
||||
.simd_lanes(2..2)
|
||||
.includes_scalars(false)
|
||||
.build(),
|
||||
);
|
||||
|
||||
let x = &Operand::new("x", I64x2);
|
||||
let y = &Operand::new("y", I64x2);
|
||||
let a = &Operand::new("a", I64x2);
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_pmullq",
|
||||
r#"
|
||||
Multiply Packed Integers -- Multiply two 64x2 integers and receive a 64x2 result with
|
||||
lane-wise wrapping if the result overflows. This instruction is necessary to add distinct
|
||||
encodings for CPUs with newer vector features.
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![x, y])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_pmuludq",
|
||||
r#"
|
||||
Multiply Packed Integers -- Using only the bottom 32 bits in each lane, multiply two 64x2
|
||||
unsigned integers and receive a 64x2 result. This instruction avoids the need for handling
|
||||
overflow as in `x86_pmullq`.
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![x, y])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let x = &Operand::new("x", TxN);
|
||||
let y = &Operand::new("y", TxN);
|
||||
let f = &Operand::new("f", iflags);
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_ptest",
|
||||
r#"
|
||||
Logical Compare -- PTEST will set the ZF flag if all bits in the result are 0 of the
|
||||
bitwise AND of the first source operand (first operand) and the second source operand
|
||||
(second operand). PTEST sets the CF flag if all bits in the result are 0 of the bitwise
|
||||
AND of the second source operand (second operand) and the logical NOT of the destination
|
||||
operand (first operand).
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![x, y])
|
||||
.operands_out(vec![f]),
|
||||
);
|
||||
|
||||
let x = &Operand::new("x", IxN);
|
||||
let y = &Operand::new("y", IxN);
|
||||
let a = &Operand::new("a", IxN);
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_pmaxs",
|
||||
r#"
|
||||
Maximum of Packed Signed Integers -- Compare signed integers in the first and second
|
||||
operand and return the maximum values.
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![x, y])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_pmaxu",
|
||||
r#"
|
||||
Maximum of Packed Unsigned Integers -- Compare unsigned integers in the first and second
|
||||
operand and return the maximum values.
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![x, y])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_pmins",
|
||||
r#"
|
||||
Minimum of Packed Signed Integers -- Compare signed integers in the first and second
|
||||
operand and return the minimum values.
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![x, y])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_pminu",
|
||||
r#"
|
||||
Minimum of Packed Unsigned Integers -- Compare unsigned integers in the first and second
|
||||
operand and return the minimum values.
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![x, y])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let c = &Operand::new("c", uimm8)
|
||||
.with_doc("The number of bytes to shift right; see PALIGNR in Intel manual for details");
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_palignr",
|
||||
r#"
|
||||
Concatenate destination and source operands, extracting a byte-aligned result shifted to
|
||||
the right by `c`.
|
||||
"#,
|
||||
&formats.ternary_imm8,
|
||||
)
|
||||
.operands_in(vec![x, y, c])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let i64_t = &TypeVar::new(
|
||||
"i64_t",
|
||||
"A scalar 64bit integer",
|
||||
TypeSetBuilder::new().ints(64..64).build(),
|
||||
);
|
||||
|
||||
let GV = &Operand::new("GV", &entities.global_value);
|
||||
let addr = &Operand::new("addr", i64_t);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_elf_tls_get_addr",
|
||||
r#"
|
||||
Elf tls get addr -- This implements the GD TLS model for ELF. The clobber output should
|
||||
not be used.
|
||||
"#,
|
||||
&formats.unary_global_value,
|
||||
)
|
||||
// This is a bit overly broad to mark as clobbering *all* the registers, because it should
|
||||
// only preserve caller-saved registers. There's no way to indicate this to register
|
||||
// allocation yet, though, so mark as clobbering all registers instead.
|
||||
.clobbers_all_regs(true)
|
||||
.operands_in(vec![GV])
|
||||
.operands_out(vec![addr]),
|
||||
);
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_macho_tls_get_addr",
|
||||
r#"
|
||||
Mach-O tls get addr -- This implements TLS access for Mach-O. The clobber output should
|
||||
not be used.
|
||||
"#,
|
||||
&formats.unary_global_value,
|
||||
)
|
||||
// See above comment for x86_elf_tls_get_addr.
|
||||
.clobbers_all_regs(true)
|
||||
.operands_in(vec![GV])
|
||||
.operands_out(vec![addr]),
|
||||
);
|
||||
|
||||
ig.build()
|
||||
}
|
||||
@@ -1,827 +0,0 @@
|
||||
use crate::cdsl::ast::{constant, var, ExprBuilder, Literal};
|
||||
use crate::cdsl::instructions::{vector, Bindable, InstructionGroup};
|
||||
use crate::cdsl::types::{LaneType, ValueType};
|
||||
use crate::cdsl::xform::TransformGroupBuilder;
|
||||
use crate::shared::types::Float::{F32, F64};
|
||||
use crate::shared::types::Int::{I16, I32, I64, I8};
|
||||
use crate::shared::Definitions as SharedDefinitions;
|
||||
|
||||
#[allow(clippy::many_single_char_names)]
|
||||
pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &InstructionGroup) {
|
||||
let mut expand = TransformGroupBuilder::new(
|
||||
"x86_expand",
|
||||
r#"
|
||||
Legalize instructions by expansion.
|
||||
|
||||
Use x86-specific instructions if needed."#,
|
||||
)
|
||||
.isa("x86")
|
||||
.chain_with(shared.transform_groups.by_name("expand_flags").id);
|
||||
|
||||
let mut narrow = TransformGroupBuilder::new(
|
||||
"x86_narrow",
|
||||
r#"
|
||||
Legalize instructions by narrowing.
|
||||
|
||||
Use x86-specific instructions if needed."#,
|
||||
)
|
||||
.isa("x86")
|
||||
.chain_with(shared.transform_groups.by_name("narrow_flags").id);
|
||||
|
||||
let mut narrow_avx = TransformGroupBuilder::new(
|
||||
"x86_narrow_avx",
|
||||
r#"
|
||||
Legalize instructions by narrowing with CPU feature checks.
|
||||
|
||||
This special case converts using x86 AVX instructions where available."#,
|
||||
)
|
||||
.isa("x86");
|
||||
// We cannot chain with the x86_narrow group until this group is built, see bottom of this
|
||||
// function for where this is chained.
|
||||
|
||||
let mut widen = TransformGroupBuilder::new(
|
||||
"x86_widen",
|
||||
r#"
|
||||
Legalize instructions by widening.
|
||||
|
||||
Use x86-specific instructions if needed."#,
|
||||
)
|
||||
.isa("x86")
|
||||
.chain_with(shared.transform_groups.by_name("widen").id);
|
||||
|
||||
// List of instructions.
|
||||
let insts = &shared.instructions;
|
||||
let band = insts.by_name("band");
|
||||
let bor = insts.by_name("bor");
|
||||
let clz = insts.by_name("clz");
|
||||
let ctz = insts.by_name("ctz");
|
||||
let fcmp = insts.by_name("fcmp");
|
||||
let fcvt_from_uint = insts.by_name("fcvt_from_uint");
|
||||
let fcvt_to_sint = insts.by_name("fcvt_to_sint");
|
||||
let fcvt_to_uint = insts.by_name("fcvt_to_uint");
|
||||
let fcvt_to_sint_sat = insts.by_name("fcvt_to_sint_sat");
|
||||
let fcvt_to_uint_sat = insts.by_name("fcvt_to_uint_sat");
|
||||
let fmax = insts.by_name("fmax");
|
||||
let fmin = insts.by_name("fmin");
|
||||
let iadd = insts.by_name("iadd");
|
||||
let iconst = insts.by_name("iconst");
|
||||
let imul = insts.by_name("imul");
|
||||
let ineg = insts.by_name("ineg");
|
||||
let isub = insts.by_name("isub");
|
||||
let ishl = insts.by_name("ishl");
|
||||
let ireduce = insts.by_name("ireduce");
|
||||
let popcnt = insts.by_name("popcnt");
|
||||
let sdiv = insts.by_name("sdiv");
|
||||
let selectif = insts.by_name("selectif");
|
||||
let smulhi = insts.by_name("smulhi");
|
||||
let srem = insts.by_name("srem");
|
||||
let tls_value = insts.by_name("tls_value");
|
||||
let udiv = insts.by_name("udiv");
|
||||
let umulhi = insts.by_name("umulhi");
|
||||
let ushr = insts.by_name("ushr");
|
||||
let ushr_imm = insts.by_name("ushr_imm");
|
||||
let urem = insts.by_name("urem");
|
||||
|
||||
let x86_bsf = x86_instructions.by_name("x86_bsf");
|
||||
let x86_bsr = x86_instructions.by_name("x86_bsr");
|
||||
let x86_umulx = x86_instructions.by_name("x86_umulx");
|
||||
let x86_smulx = x86_instructions.by_name("x86_smulx");
|
||||
|
||||
let imm = &shared.imm;
|
||||
|
||||
// Shift by a 64-bit amount is equivalent to a shift by that amount mod 32, so we can reduce
|
||||
// the size of the shift amount. This is useful for x86_32, where an I64 shift amount is
|
||||
// not encodable.
|
||||
let a = var("a");
|
||||
let x = var("x");
|
||||
let y = var("y");
|
||||
let z = var("z");
|
||||
|
||||
for &ty in &[I8, I16, I32] {
|
||||
let ishl_by_i64 = ishl.bind(ty).bind(I64);
|
||||
let ireduce = ireduce.bind(I32);
|
||||
expand.legalize(
|
||||
def!(a = ishl_by_i64(x, y)),
|
||||
vec![def!(z = ireduce(y)), def!(a = ishl(x, z))],
|
||||
);
|
||||
}
|
||||
|
||||
for &ty in &[I8, I16, I32] {
|
||||
let ushr_by_i64 = ushr.bind(ty).bind(I64);
|
||||
let ireduce = ireduce.bind(I32);
|
||||
expand.legalize(
|
||||
def!(a = ushr_by_i64(x, y)),
|
||||
vec![def!(z = ireduce(y)), def!(a = ishl(x, z))],
|
||||
);
|
||||
}
|
||||
|
||||
// Division and remainder.
|
||||
//
|
||||
// The srem expansion requires custom code because srem INT_MIN, -1 is not
|
||||
// allowed to trap. The other ops need to check avoid_div_traps.
|
||||
expand.custom_legalize(sdiv, "expand_sdivrem");
|
||||
expand.custom_legalize(srem, "expand_sdivrem");
|
||||
expand.custom_legalize(udiv, "expand_udivrem");
|
||||
expand.custom_legalize(urem, "expand_udivrem");
|
||||
|
||||
// Double length (widening) multiplication.
|
||||
let a = var("a");
|
||||
let x = var("x");
|
||||
let y = var("y");
|
||||
let a1 = var("a1");
|
||||
let a2 = var("a2");
|
||||
let res_lo = var("res_lo");
|
||||
let res_hi = var("res_hi");
|
||||
|
||||
expand.legalize(
|
||||
def!(res_hi = umulhi(x, y)),
|
||||
vec![def!((res_lo, res_hi) = x86_umulx(x, y))],
|
||||
);
|
||||
|
||||
expand.legalize(
|
||||
def!(res_hi = smulhi(x, y)),
|
||||
vec![def!((res_lo, res_hi) = x86_smulx(x, y))],
|
||||
);
|
||||
|
||||
// Floating point condition codes.
|
||||
//
|
||||
// The 8 condition codes in `supported_floatccs` are directly supported by a
|
||||
// `ucomiss` or `ucomisd` instruction. The remaining codes need legalization
|
||||
// patterns.
|
||||
|
||||
let floatcc_eq = Literal::enumerator_for(&imm.floatcc, "eq");
|
||||
let floatcc_ord = Literal::enumerator_for(&imm.floatcc, "ord");
|
||||
let floatcc_ueq = Literal::enumerator_for(&imm.floatcc, "ueq");
|
||||
let floatcc_ne = Literal::enumerator_for(&imm.floatcc, "ne");
|
||||
let floatcc_uno = Literal::enumerator_for(&imm.floatcc, "uno");
|
||||
let floatcc_one = Literal::enumerator_for(&imm.floatcc, "one");
|
||||
|
||||
// Equality needs an explicit `ord` test which checks the parity bit.
|
||||
expand.legalize(
|
||||
def!(a = fcmp(floatcc_eq, x, y)),
|
||||
vec![
|
||||
def!(a1 = fcmp(floatcc_ord, x, y)),
|
||||
def!(a2 = fcmp(floatcc_ueq, x, y)),
|
||||
def!(a = band(a1, a2)),
|
||||
],
|
||||
);
|
||||
expand.legalize(
|
||||
def!(a = fcmp(floatcc_ne, x, y)),
|
||||
vec![
|
||||
def!(a1 = fcmp(floatcc_uno, x, y)),
|
||||
def!(a2 = fcmp(floatcc_one, x, y)),
|
||||
def!(a = bor(a1, a2)),
|
||||
],
|
||||
);
|
||||
|
||||
let floatcc_lt = &Literal::enumerator_for(&imm.floatcc, "lt");
|
||||
let floatcc_gt = &Literal::enumerator_for(&imm.floatcc, "gt");
|
||||
let floatcc_le = &Literal::enumerator_for(&imm.floatcc, "le");
|
||||
let floatcc_ge = &Literal::enumerator_for(&imm.floatcc, "ge");
|
||||
let floatcc_ugt = &Literal::enumerator_for(&imm.floatcc, "ugt");
|
||||
let floatcc_ult = &Literal::enumerator_for(&imm.floatcc, "ult");
|
||||
let floatcc_uge = &Literal::enumerator_for(&imm.floatcc, "uge");
|
||||
let floatcc_ule = &Literal::enumerator_for(&imm.floatcc, "ule");
|
||||
|
||||
// Inequalities that need to be reversed.
|
||||
for &(cc, rev_cc) in &[
|
||||
(floatcc_lt, floatcc_gt),
|
||||
(floatcc_le, floatcc_ge),
|
||||
(floatcc_ugt, floatcc_ult),
|
||||
(floatcc_uge, floatcc_ule),
|
||||
] {
|
||||
expand.legalize(def!(a = fcmp(cc, x, y)), vec![def!(a = fcmp(rev_cc, y, x))]);
|
||||
}
|
||||
|
||||
// We need to modify the CFG for min/max legalization.
|
||||
expand.custom_legalize(fmin, "expand_minmax");
|
||||
expand.custom_legalize(fmax, "expand_minmax");
|
||||
|
||||
// Conversions from unsigned need special handling.
|
||||
expand.custom_legalize(fcvt_from_uint, "expand_fcvt_from_uint");
|
||||
// Conversions from float to int can trap and modify the control flow graph.
|
||||
expand.custom_legalize(fcvt_to_sint, "expand_fcvt_to_sint");
|
||||
expand.custom_legalize(fcvt_to_uint, "expand_fcvt_to_uint");
|
||||
expand.custom_legalize(fcvt_to_sint_sat, "expand_fcvt_to_sint_sat");
|
||||
expand.custom_legalize(fcvt_to_uint_sat, "expand_fcvt_to_uint_sat");
|
||||
|
||||
// Count leading and trailing zeroes, for baseline x86_64
|
||||
let c_minus_one = var("c_minus_one");
|
||||
let c_thirty_one = var("c_thirty_one");
|
||||
let c_thirty_two = var("c_thirty_two");
|
||||
let c_sixty_three = var("c_sixty_three");
|
||||
let c_sixty_four = var("c_sixty_four");
|
||||
let index1 = var("index1");
|
||||
let r2flags = var("r2flags");
|
||||
let index2 = var("index2");
|
||||
|
||||
let intcc_eq = Literal::enumerator_for(&imm.intcc, "eq");
|
||||
let imm64_minus_one = Literal::constant(&imm.imm64, -1);
|
||||
let imm64_63 = Literal::constant(&imm.imm64, 63);
|
||||
expand.legalize(
|
||||
def!(a = clz.I64(x)),
|
||||
vec![
|
||||
def!(c_minus_one = iconst(imm64_minus_one)),
|
||||
def!(c_sixty_three = iconst(imm64_63)),
|
||||
def!((index1, r2flags) = x86_bsr(x)),
|
||||
def!(index2 = selectif(intcc_eq, r2flags, c_minus_one, index1)),
|
||||
def!(a = isub(c_sixty_three, index2)),
|
||||
],
|
||||
);
|
||||
|
||||
let imm64_31 = Literal::constant(&imm.imm64, 31);
|
||||
expand.legalize(
|
||||
def!(a = clz.I32(x)),
|
||||
vec![
|
||||
def!(c_minus_one = iconst(imm64_minus_one)),
|
||||
def!(c_thirty_one = iconst(imm64_31)),
|
||||
def!((index1, r2flags) = x86_bsr(x)),
|
||||
def!(index2 = selectif(intcc_eq, r2flags, c_minus_one, index1)),
|
||||
def!(a = isub(c_thirty_one, index2)),
|
||||
],
|
||||
);
|
||||
|
||||
let imm64_64 = Literal::constant(&imm.imm64, 64);
|
||||
expand.legalize(
|
||||
def!(a = ctz.I64(x)),
|
||||
vec![
|
||||
def!(c_sixty_four = iconst(imm64_64)),
|
||||
def!((index1, r2flags) = x86_bsf(x)),
|
||||
def!(a = selectif(intcc_eq, r2flags, c_sixty_four, index1)),
|
||||
],
|
||||
);
|
||||
|
||||
let imm64_32 = Literal::constant(&imm.imm64, 32);
|
||||
expand.legalize(
|
||||
def!(a = ctz.I32(x)),
|
||||
vec![
|
||||
def!(c_thirty_two = iconst(imm64_32)),
|
||||
def!((index1, r2flags) = x86_bsf(x)),
|
||||
def!(a = selectif(intcc_eq, r2flags, c_thirty_two, index1)),
|
||||
],
|
||||
);
|
||||
|
||||
// Population count for baseline x86_64
|
||||
let x = var("x");
|
||||
let r = var("r");
|
||||
|
||||
let qv3 = var("qv3");
|
||||
let qv4 = var("qv4");
|
||||
let qv5 = var("qv5");
|
||||
let qv6 = var("qv6");
|
||||
let qv7 = var("qv7");
|
||||
let qv8 = var("qv8");
|
||||
let qv9 = var("qv9");
|
||||
let qv10 = var("qv10");
|
||||
let qv11 = var("qv11");
|
||||
let qv12 = var("qv12");
|
||||
let qv13 = var("qv13");
|
||||
let qv14 = var("qv14");
|
||||
let qv15 = var("qv15");
|
||||
let qc77 = var("qc77");
|
||||
#[allow(non_snake_case)]
|
||||
let qc0F = var("qc0F");
|
||||
let qc01 = var("qc01");
|
||||
|
||||
let imm64_1 = Literal::constant(&imm.imm64, 1);
|
||||
let imm64_4 = Literal::constant(&imm.imm64, 4);
|
||||
expand.legalize(
|
||||
def!(r = popcnt.I64(x)),
|
||||
vec![
|
||||
def!(qv3 = ushr_imm(x, imm64_1)),
|
||||
def!(qc77 = iconst(Literal::constant(&imm.imm64, 0x7777_7777_7777_7777))),
|
||||
def!(qv4 = band(qv3, qc77)),
|
||||
def!(qv5 = isub(x, qv4)),
|
||||
def!(qv6 = ushr_imm(qv4, imm64_1)),
|
||||
def!(qv7 = band(qv6, qc77)),
|
||||
def!(qv8 = isub(qv5, qv7)),
|
||||
def!(qv9 = ushr_imm(qv7, imm64_1)),
|
||||
def!(qv10 = band(qv9, qc77)),
|
||||
def!(qv11 = isub(qv8, qv10)),
|
||||
def!(qv12 = ushr_imm(qv11, imm64_4)),
|
||||
def!(qv13 = iadd(qv11, qv12)),
|
||||
def!(qc0F = iconst(Literal::constant(&imm.imm64, 0x0F0F_0F0F_0F0F_0F0F))),
|
||||
def!(qv14 = band(qv13, qc0F)),
|
||||
def!(qc01 = iconst(Literal::constant(&imm.imm64, 0x0101_0101_0101_0101))),
|
||||
def!(qv15 = imul(qv14, qc01)),
|
||||
def!(r = ushr_imm(qv15, Literal::constant(&imm.imm64, 56))),
|
||||
],
|
||||
);
|
||||
|
||||
let lv3 = var("lv3");
|
||||
let lv4 = var("lv4");
|
||||
let lv5 = var("lv5");
|
||||
let lv6 = var("lv6");
|
||||
let lv7 = var("lv7");
|
||||
let lv8 = var("lv8");
|
||||
let lv9 = var("lv9");
|
||||
let lv10 = var("lv10");
|
||||
let lv11 = var("lv11");
|
||||
let lv12 = var("lv12");
|
||||
let lv13 = var("lv13");
|
||||
let lv14 = var("lv14");
|
||||
let lv15 = var("lv15");
|
||||
let lc77 = var("lc77");
|
||||
#[allow(non_snake_case)]
|
||||
let lc0F = var("lc0F");
|
||||
let lc01 = var("lc01");
|
||||
|
||||
expand.legalize(
|
||||
def!(r = popcnt.I32(x)),
|
||||
vec![
|
||||
def!(lv3 = ushr_imm(x, imm64_1)),
|
||||
def!(lc77 = iconst(Literal::constant(&imm.imm64, 0x7777_7777))),
|
||||
def!(lv4 = band(lv3, lc77)),
|
||||
def!(lv5 = isub(x, lv4)),
|
||||
def!(lv6 = ushr_imm(lv4, imm64_1)),
|
||||
def!(lv7 = band(lv6, lc77)),
|
||||
def!(lv8 = isub(lv5, lv7)),
|
||||
def!(lv9 = ushr_imm(lv7, imm64_1)),
|
||||
def!(lv10 = band(lv9, lc77)),
|
||||
def!(lv11 = isub(lv8, lv10)),
|
||||
def!(lv12 = ushr_imm(lv11, imm64_4)),
|
||||
def!(lv13 = iadd(lv11, lv12)),
|
||||
def!(lc0F = iconst(Literal::constant(&imm.imm64, 0x0F0F_0F0F))),
|
||||
def!(lv14 = band(lv13, lc0F)),
|
||||
def!(lc01 = iconst(Literal::constant(&imm.imm64, 0x0101_0101))),
|
||||
def!(lv15 = imul(lv14, lc01)),
|
||||
def!(r = ushr_imm(lv15, Literal::constant(&imm.imm64, 24))),
|
||||
],
|
||||
);
|
||||
|
||||
expand.custom_legalize(ineg, "convert_ineg");
|
||||
expand.custom_legalize(tls_value, "expand_tls_value");
|
||||
widen.custom_legalize(ineg, "convert_ineg");
|
||||
|
||||
// To reduce compilation times, separate out large blocks of legalizations by theme.
|
||||
define_simd(shared, x86_instructions, &mut narrow, &mut narrow_avx);
|
||||
|
||||
expand.build_and_add_to(&mut shared.transform_groups);
|
||||
let narrow_id = narrow.build_and_add_to(&mut shared.transform_groups);
|
||||
narrow_avx
|
||||
.chain_with(narrow_id)
|
||||
.build_and_add_to(&mut shared.transform_groups);
|
||||
widen.build_and_add_to(&mut shared.transform_groups);
|
||||
}
|
||||
|
||||
fn define_simd(
|
||||
shared: &mut SharedDefinitions,
|
||||
x86_instructions: &InstructionGroup,
|
||||
narrow: &mut TransformGroupBuilder,
|
||||
narrow_avx: &mut TransformGroupBuilder,
|
||||
) {
|
||||
let insts = &shared.instructions;
|
||||
let band = insts.by_name("band");
|
||||
let band_not = insts.by_name("band_not");
|
||||
let bitcast = insts.by_name("bitcast");
|
||||
let bitselect = insts.by_name("bitselect");
|
||||
let bor = insts.by_name("bor");
|
||||
let bnot = insts.by_name("bnot");
|
||||
let bxor = insts.by_name("bxor");
|
||||
let extractlane = insts.by_name("extractlane");
|
||||
let fabs = insts.by_name("fabs");
|
||||
let fcmp = insts.by_name("fcmp");
|
||||
let fcvt_from_uint = insts.by_name("fcvt_from_uint");
|
||||
let fcvt_to_sint_sat = insts.by_name("fcvt_to_sint_sat");
|
||||
let fcvt_to_uint_sat = insts.by_name("fcvt_to_uint_sat");
|
||||
let fmax = insts.by_name("fmax");
|
||||
let fmin = insts.by_name("fmin");
|
||||
let fneg = insts.by_name("fneg");
|
||||
let iadd_imm = insts.by_name("iadd_imm");
|
||||
let icmp = insts.by_name("icmp");
|
||||
let imax = insts.by_name("imax");
|
||||
let imin = insts.by_name("imin");
|
||||
let imul = insts.by_name("imul");
|
||||
let ineg = insts.by_name("ineg");
|
||||
let insertlane = insts.by_name("insertlane");
|
||||
let ishl = insts.by_name("ishl");
|
||||
let ishl_imm = insts.by_name("ishl_imm");
|
||||
let raw_bitcast = insts.by_name("raw_bitcast");
|
||||
let scalar_to_vector = insts.by_name("scalar_to_vector");
|
||||
let splat = insts.by_name("splat");
|
||||
let shuffle = insts.by_name("shuffle");
|
||||
let sshr = insts.by_name("sshr");
|
||||
let swizzle = insts.by_name("swizzle");
|
||||
let trueif = insts.by_name("trueif");
|
||||
let uadd_sat = insts.by_name("uadd_sat");
|
||||
let umax = insts.by_name("umax");
|
||||
let umin = insts.by_name("umin");
|
||||
let snarrow = insts.by_name("snarrow");
|
||||
let swiden_high = insts.by_name("swiden_high");
|
||||
let swiden_low = insts.by_name("swiden_low");
|
||||
let ushr_imm = insts.by_name("ushr_imm");
|
||||
let ushr = insts.by_name("ushr");
|
||||
let uwiden_high = insts.by_name("uwiden_high");
|
||||
let uwiden_low = insts.by_name("uwiden_low");
|
||||
let vconst = insts.by_name("vconst");
|
||||
let vall_true = insts.by_name("vall_true");
|
||||
let vany_true = insts.by_name("vany_true");
|
||||
let vselect = insts.by_name("vselect");
|
||||
|
||||
let x86_palignr = x86_instructions.by_name("x86_palignr");
|
||||
let x86_pmaxs = x86_instructions.by_name("x86_pmaxs");
|
||||
let x86_pmaxu = x86_instructions.by_name("x86_pmaxu");
|
||||
let x86_pmins = x86_instructions.by_name("x86_pmins");
|
||||
let x86_pminu = x86_instructions.by_name("x86_pminu");
|
||||
let x86_pshufb = x86_instructions.by_name("x86_pshufb");
|
||||
let x86_pshufd = x86_instructions.by_name("x86_pshufd");
|
||||
let x86_psra = x86_instructions.by_name("x86_psra");
|
||||
let x86_ptest = x86_instructions.by_name("x86_ptest");
|
||||
let x86_punpckh = x86_instructions.by_name("x86_punpckh");
|
||||
let x86_punpckl = x86_instructions.by_name("x86_punpckl");
|
||||
|
||||
let imm = &shared.imm;
|
||||
|
||||
// Set up variables and immediates.
|
||||
let uimm8_zero = Literal::constant(&imm.uimm8, 0x00);
|
||||
let uimm8_one = Literal::constant(&imm.uimm8, 0x01);
|
||||
let uimm8_eight = Literal::constant(&imm.uimm8, 8);
|
||||
let u128_zeroes = constant(vec![0x00; 16]);
|
||||
let u128_ones = constant(vec![0xff; 16]);
|
||||
let u128_seventies = constant(vec![0x70; 16]);
|
||||
let a = var("a");
|
||||
let b = var("b");
|
||||
let c = var("c");
|
||||
let d = var("d");
|
||||
let e = var("e");
|
||||
let f = var("f");
|
||||
let g = var("g");
|
||||
let h = var("h");
|
||||
let x = var("x");
|
||||
let y = var("y");
|
||||
let z = var("z");
|
||||
|
||||
// Limit the SIMD vector size: eventually multiple vector sizes may be supported
|
||||
// but for now only SSE-sized vectors are available.
|
||||
let sse_vector_size: u64 = 128;
|
||||
let allowed_simd_type = |t: &LaneType| t.lane_bits() >= 8 && t.lane_bits() < 128;
|
||||
|
||||
// SIMD splat: 8-bits
|
||||
for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 8) {
|
||||
let splat_any8x16 = splat.bind(vector(ty, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(y = splat_any8x16(x)),
|
||||
vec![
|
||||
// Move into the lowest 8 bits of an XMM register.
|
||||
def!(a = scalar_to_vector(x)),
|
||||
// Zero out a different XMM register; the shuffle mask for moving the lowest byte
|
||||
// to all other byte lanes is 0x0.
|
||||
def!(b = vconst(u128_zeroes)),
|
||||
// PSHUFB takes two XMM operands, one of which is a shuffle mask (i.e. b).
|
||||
def!(y = x86_pshufb(a, b)),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
// SIMD splat: 16-bits
|
||||
for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 16) {
|
||||
let splat_x16x8 = splat.bind(vector(ty, sse_vector_size));
|
||||
let raw_bitcast_any16x8_to_i32x4 = raw_bitcast
|
||||
.bind(vector(I32, sse_vector_size))
|
||||
.bind(vector(ty, sse_vector_size));
|
||||
let raw_bitcast_i32x4_to_any16x8 = raw_bitcast
|
||||
.bind(vector(ty, sse_vector_size))
|
||||
.bind(vector(I32, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(y = splat_x16x8(x)),
|
||||
vec![
|
||||
// Move into the lowest 16 bits of an XMM register.
|
||||
def!(a = scalar_to_vector(x)),
|
||||
// Insert the value again but in the next lowest 16 bits.
|
||||
def!(b = insertlane(a, x, uimm8_one)),
|
||||
// No instruction emitted; pretend this is an I32x4 so we can use PSHUFD.
|
||||
def!(c = raw_bitcast_any16x8_to_i32x4(b)),
|
||||
// Broadcast the bytes in the XMM register with PSHUFD.
|
||||
def!(d = x86_pshufd(c, uimm8_zero)),
|
||||
// No instruction emitted; pretend this is an X16x8 again.
|
||||
def!(y = raw_bitcast_i32x4_to_any16x8(d)),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
// SIMD splat: 32-bits
|
||||
for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 32) {
|
||||
let splat_any32x4 = splat.bind(vector(ty, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(y = splat_any32x4(x)),
|
||||
vec![
|
||||
// Translate to an x86 MOV to get the value in an XMM register.
|
||||
def!(a = scalar_to_vector(x)),
|
||||
// Broadcast the bytes in the XMM register with PSHUFD.
|
||||
def!(y = x86_pshufd(a, uimm8_zero)),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
// SIMD splat: 64-bits
|
||||
for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 64) {
|
||||
let splat_any64x2 = splat.bind(vector(ty, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(y = splat_any64x2(x)),
|
||||
vec![
|
||||
// Move into the lowest 64 bits of an XMM register.
|
||||
def!(a = scalar_to_vector(x)),
|
||||
// Move into the highest 64 bits of the same XMM register.
|
||||
def!(y = insertlane(a, x, uimm8_one)),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
// SIMD swizzle; the following inefficient implementation is due to the Wasm SIMD spec requiring
|
||||
// mask indexes greater than 15 to have the same semantics as a 0 index. For the spec discussion,
|
||||
// see https://github.com/WebAssembly/simd/issues/93.
|
||||
{
|
||||
let swizzle = swizzle.bind(vector(I8, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(a = swizzle(x, y)),
|
||||
vec![
|
||||
def!(b = vconst(u128_seventies)),
|
||||
def!(c = uadd_sat(y, b)),
|
||||
def!(a = x86_pshufb(x, c)),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
// SIMD bnot
|
||||
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||
let bnot = bnot.bind(vector(ty, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(y = bnot(x)),
|
||||
vec![def!(a = vconst(u128_ones)), def!(y = bxor(a, x))],
|
||||
);
|
||||
}
|
||||
|
||||
// SIMD shift right (arithmetic, i16x8 and i32x4)
|
||||
for ty in &[I16, I32] {
|
||||
let sshr = sshr.bind(vector(*ty, sse_vector_size));
|
||||
let bitcast_i64x2 = bitcast.bind(vector(I64, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(a = sshr(x, y)),
|
||||
vec![def!(b = bitcast_i64x2(y)), def!(a = x86_psra(x, b))],
|
||||
);
|
||||
}
|
||||
// SIMD shift right (arithmetic, i8x16)
|
||||
{
|
||||
let sshr = sshr.bind(vector(I8, sse_vector_size));
|
||||
let bitcast_i64x2 = bitcast.bind(vector(I64, sse_vector_size));
|
||||
let raw_bitcast_i16x8 = raw_bitcast.bind(vector(I16, sse_vector_size));
|
||||
let raw_bitcast_i16x8_again = raw_bitcast.bind(vector(I16, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(z = sshr(x, y)),
|
||||
vec![
|
||||
// Since we will use the high byte of each 16x8 lane, shift an extra 8 bits.
|
||||
def!(a = iadd_imm(y, uimm8_eight)),
|
||||
def!(b = bitcast_i64x2(a)),
|
||||
// Take the low 8 bytes of x, duplicate them in 16x8 lanes, then shift right.
|
||||
def!(c = x86_punpckl(x, x)),
|
||||
def!(d = raw_bitcast_i16x8(c)),
|
||||
def!(e = x86_psra(d, b)),
|
||||
// Take the high 8 bytes of x, duplicate them in 16x8 lanes, then shift right.
|
||||
def!(f = x86_punpckh(x, x)),
|
||||
def!(g = raw_bitcast_i16x8_again(f)),
|
||||
def!(h = x86_psra(g, b)),
|
||||
// Re-pack the vector.
|
||||
def!(z = snarrow(e, h)),
|
||||
],
|
||||
);
|
||||
}
|
||||
// SIMD shift right (arithmetic, i64x2)
|
||||
{
|
||||
let sshr_vector = sshr.bind(vector(I64, sse_vector_size));
|
||||
let sshr_scalar_lane0 = sshr.bind(I64);
|
||||
let sshr_scalar_lane1 = sshr.bind(I64);
|
||||
narrow.legalize(
|
||||
def!(z = sshr_vector(x, y)),
|
||||
vec![
|
||||
// Use scalar operations to shift the first lane.
|
||||
def!(a = extractlane(x, uimm8_zero)),
|
||||
def!(b = sshr_scalar_lane0(a, y)),
|
||||
def!(c = insertlane(x, b, uimm8_zero)),
|
||||
// Do the same for the second lane.
|
||||
def!(d = extractlane(x, uimm8_one)),
|
||||
def!(e = sshr_scalar_lane1(d, y)),
|
||||
def!(z = insertlane(c, e, uimm8_one)),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
// SIMD select
|
||||
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||
let bitselect = bitselect.bind(vector(ty, sse_vector_size)); // must bind both x/y and c
|
||||
narrow.legalize(
|
||||
def!(d = bitselect(c, x, y)),
|
||||
vec![
|
||||
def!(a = band(x, c)),
|
||||
def!(b = band_not(y, c)),
|
||||
def!(d = bor(a, b)),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
// SIMD vselect; replace with bitselect if BLEND* instructions are not available.
|
||||
// This works, because each lane of boolean vector is filled with zeroes or ones.
|
||||
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||
let vselect = vselect.bind(vector(ty, sse_vector_size));
|
||||
let raw_bitcast = raw_bitcast.bind(vector(ty, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(d = vselect(c, x, y)),
|
||||
vec![def!(a = raw_bitcast(c)), def!(d = bitselect(a, x, y))],
|
||||
);
|
||||
}
|
||||
|
||||
// SIMD vany_true
|
||||
let ne = Literal::enumerator_for(&imm.intcc, "ne");
|
||||
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||
let vany_true = vany_true.bind(vector(ty, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(y = vany_true(x)),
|
||||
vec![def!(a = x86_ptest(x, x)), def!(y = trueif(ne, a))],
|
||||
);
|
||||
}
|
||||
|
||||
// SIMD vall_true
|
||||
let eq = Literal::enumerator_for(&imm.intcc, "eq");
|
||||
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||
let vall_true = vall_true.bind(vector(ty, sse_vector_size));
|
||||
if ty.is_int() {
|
||||
// In the common case (Wasm's integer-only all_true), we do not require a
|
||||
// bitcast.
|
||||
narrow.legalize(
|
||||
def!(y = vall_true(x)),
|
||||
vec![
|
||||
def!(a = vconst(u128_zeroes)),
|
||||
def!(c = icmp(eq, x, a)),
|
||||
def!(d = x86_ptest(c, c)),
|
||||
def!(y = trueif(eq, d)),
|
||||
],
|
||||
);
|
||||
} else {
|
||||
// However, to support other types we must bitcast them to an integer vector to
|
||||
// use icmp.
|
||||
let lane_type_as_int = LaneType::int_from_bits(ty.lane_bits() as u16);
|
||||
let raw_bitcast_to_int = raw_bitcast.bind(vector(lane_type_as_int, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(y = vall_true(x)),
|
||||
vec![
|
||||
def!(a = vconst(u128_zeroes)),
|
||||
def!(b = raw_bitcast_to_int(x)),
|
||||
def!(c = icmp(eq, b, a)),
|
||||
def!(d = x86_ptest(c, c)),
|
||||
def!(y = trueif(eq, d)),
|
||||
],
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// SIMD icmp ne
|
||||
let ne = Literal::enumerator_for(&imm.intcc, "ne");
|
||||
for ty in ValueType::all_lane_types().filter(|ty| allowed_simd_type(ty) && ty.is_int()) {
|
||||
let icmp_ = icmp.bind(vector(ty, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(c = icmp_(ne, a, b)),
|
||||
vec![def!(x = icmp(eq, a, b)), def!(c = bnot(x))],
|
||||
);
|
||||
}
|
||||
|
||||
// SIMD icmp greater-/less-than
|
||||
let sgt = Literal::enumerator_for(&imm.intcc, "sgt");
|
||||
let ugt = Literal::enumerator_for(&imm.intcc, "ugt");
|
||||
let sge = Literal::enumerator_for(&imm.intcc, "sge");
|
||||
let uge = Literal::enumerator_for(&imm.intcc, "uge");
|
||||
let slt = Literal::enumerator_for(&imm.intcc, "slt");
|
||||
let ult = Literal::enumerator_for(&imm.intcc, "ult");
|
||||
let sle = Literal::enumerator_for(&imm.intcc, "sle");
|
||||
let ule = Literal::enumerator_for(&imm.intcc, "ule");
|
||||
for ty in &[I8, I16, I32] {
|
||||
// greater-than
|
||||
let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(c = icmp_(ugt, a, b)),
|
||||
vec![
|
||||
def!(x = x86_pmaxu(a, b)),
|
||||
def!(y = icmp(eq, x, b)),
|
||||
def!(c = bnot(y)),
|
||||
],
|
||||
);
|
||||
let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(c = icmp_(sge, a, b)),
|
||||
vec![def!(x = x86_pmins(a, b)), def!(c = icmp(eq, x, b))],
|
||||
);
|
||||
let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(c = icmp_(uge, a, b)),
|
||||
vec![def!(x = x86_pminu(a, b)), def!(c = icmp(eq, x, b))],
|
||||
);
|
||||
|
||||
// less-than
|
||||
let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(def!(c = icmp_(slt, a, b)), vec![def!(c = icmp(sgt, b, a))]);
|
||||
let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(def!(c = icmp_(ult, a, b)), vec![def!(c = icmp(ugt, b, a))]);
|
||||
let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(def!(c = icmp_(sle, a, b)), vec![def!(c = icmp(sge, b, a))]);
|
||||
let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(def!(c = icmp_(ule, a, b)), vec![def!(c = icmp(uge, b, a))]);
|
||||
}
|
||||
|
||||
// SIMD integer min/max
|
||||
for ty in &[I8, I16, I32] {
|
||||
let imin = imin.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(def!(c = imin(a, b)), vec![def!(c = x86_pmins(a, b))]);
|
||||
let umin = umin.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(def!(c = umin(a, b)), vec![def!(c = x86_pminu(a, b))]);
|
||||
let imax = imax.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(def!(c = imax(a, b)), vec![def!(c = x86_pmaxs(a, b))]);
|
||||
let umax = umax.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(def!(c = umax(a, b)), vec![def!(c = x86_pmaxu(a, b))]);
|
||||
}
|
||||
|
||||
// SIMD fcmp greater-/less-than
|
||||
let gt = Literal::enumerator_for(&imm.floatcc, "gt");
|
||||
let lt = Literal::enumerator_for(&imm.floatcc, "lt");
|
||||
let ge = Literal::enumerator_for(&imm.floatcc, "ge");
|
||||
let le = Literal::enumerator_for(&imm.floatcc, "le");
|
||||
let ugt = Literal::enumerator_for(&imm.floatcc, "ugt");
|
||||
let ult = Literal::enumerator_for(&imm.floatcc, "ult");
|
||||
let uge = Literal::enumerator_for(&imm.floatcc, "uge");
|
||||
let ule = Literal::enumerator_for(&imm.floatcc, "ule");
|
||||
for ty in &[F32, F64] {
|
||||
let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(def!(c = fcmp_(gt, a, b)), vec![def!(c = fcmp(lt, b, a))]);
|
||||
let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(def!(c = fcmp_(ge, a, b)), vec![def!(c = fcmp(le, b, a))]);
|
||||
let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(def!(c = fcmp_(ult, a, b)), vec![def!(c = fcmp(ugt, b, a))]);
|
||||
let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(def!(c = fcmp_(ule, a, b)), vec![def!(c = fcmp(uge, b, a))]);
|
||||
}
|
||||
|
||||
for ty in &[F32, F64] {
|
||||
let fneg = fneg.bind(vector(*ty, sse_vector_size));
|
||||
let lane_type_as_int = LaneType::int_from_bits(LaneType::from(*ty).lane_bits() as u16);
|
||||
let uimm8_shift = Literal::constant(&imm.uimm8, lane_type_as_int.lane_bits() as i64 - 1);
|
||||
let vconst = vconst.bind(vector(lane_type_as_int, sse_vector_size));
|
||||
let bitcast_to_float = raw_bitcast.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(b = fneg(a)),
|
||||
vec![
|
||||
def!(c = vconst(u128_ones)),
|
||||
def!(d = ishl_imm(c, uimm8_shift)), // Create a mask of all 0s except the MSB.
|
||||
def!(e = bitcast_to_float(d)), // Cast mask to the floating-point type.
|
||||
def!(b = bxor(a, e)), // Flip the MSB.
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
// SIMD fabs
|
||||
for ty in &[F32, F64] {
|
||||
let fabs = fabs.bind(vector(*ty, sse_vector_size));
|
||||
let lane_type_as_int = LaneType::int_from_bits(LaneType::from(*ty).lane_bits() as u16);
|
||||
let vconst = vconst.bind(vector(lane_type_as_int, sse_vector_size));
|
||||
let bitcast_to_float = raw_bitcast.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(b = fabs(a)),
|
||||
vec![
|
||||
def!(c = vconst(u128_ones)),
|
||||
def!(d = ushr_imm(c, uimm8_one)), // Create a mask of all 1s except the MSB.
|
||||
def!(e = bitcast_to_float(d)), // Cast mask to the floating-point type.
|
||||
def!(b = band(a, e)), // Unset the MSB.
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
// SIMD widen
|
||||
for ty in &[I8, I16] {
|
||||
let swiden_high = swiden_high.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(b = swiden_high(a)),
|
||||
vec![
|
||||
def!(c = x86_palignr(a, a, uimm8_eight)),
|
||||
def!(b = swiden_low(c)),
|
||||
],
|
||||
);
|
||||
let uwiden_high = uwiden_high.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(
|
||||
def!(b = uwiden_high(a)),
|
||||
vec![
|
||||
def!(c = x86_palignr(a, a, uimm8_eight)),
|
||||
def!(b = uwiden_low(c)),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
||||
narrow.custom_legalize(shuffle, "convert_shuffle");
|
||||
narrow.custom_legalize(extractlane, "convert_extractlane");
|
||||
narrow.custom_legalize(insertlane, "convert_insertlane");
|
||||
narrow.custom_legalize(ineg, "convert_ineg");
|
||||
narrow.custom_legalize(ushr, "convert_ushr");
|
||||
narrow.custom_legalize(ishl, "convert_ishl");
|
||||
narrow.custom_legalize(fcvt_to_sint_sat, "expand_fcvt_to_sint_sat_vector");
|
||||
narrow.custom_legalize(fmin, "expand_minmax_vector");
|
||||
narrow.custom_legalize(fmax, "expand_minmax_vector");
|
||||
|
||||
narrow_avx.custom_legalize(imul, "convert_i64x2_imul");
|
||||
narrow_avx.custom_legalize(fcvt_from_uint, "expand_fcvt_from_uint_vector");
|
||||
narrow_avx.custom_legalize(fcvt_to_uint_sat, "expand_fcvt_to_uint_sat_vector");
|
||||
}
|
||||
@@ -1,87 +1,25 @@
|
||||
use crate::cdsl::cpu_modes::CpuMode;
|
||||
use crate::cdsl::instructions::{InstructionGroupBuilder, InstructionPredicateMap};
|
||||
use crate::cdsl::isa::TargetIsa;
|
||||
use crate::cdsl::types::{ReferenceType, VectorType};
|
||||
use crate::cdsl::recipes::Recipes;
|
||||
use crate::cdsl::regs::IsaRegsBuilder;
|
||||
|
||||
use crate::shared::types::Bool::B1;
|
||||
use crate::shared::types::Float::{F32, F64};
|
||||
use crate::shared::types::Int::{I16, I32, I64, I8};
|
||||
use crate::shared::types::Reference::{R32, R64};
|
||||
use crate::shared::Definitions as SharedDefinitions;
|
||||
|
||||
mod encodings;
|
||||
mod instructions;
|
||||
mod legalize;
|
||||
mod opcodes;
|
||||
mod recipes;
|
||||
mod registers;
|
||||
pub(crate) mod settings;
|
||||
|
||||
pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
|
||||
let settings = settings::define(&shared_defs.settings);
|
||||
let regs = registers::define();
|
||||
|
||||
let inst_group = instructions::define(
|
||||
&mut shared_defs.all_instructions,
|
||||
&shared_defs.formats,
|
||||
&shared_defs.imm,
|
||||
&shared_defs.entities,
|
||||
);
|
||||
legalize::define(shared_defs, &inst_group);
|
||||
let inst_group = InstructionGroupBuilder::new(&mut shared_defs.all_instructions).build();
|
||||
|
||||
// CPU modes for 32-bit and 64-bit operations.
|
||||
let mut x86_64 = CpuMode::new("I64");
|
||||
let mut x86_32 = CpuMode::new("I32");
|
||||
|
||||
let expand_flags = shared_defs.transform_groups.by_name("expand_flags");
|
||||
let x86_widen = shared_defs.transform_groups.by_name("x86_widen");
|
||||
let x86_narrow = shared_defs.transform_groups.by_name("x86_narrow");
|
||||
let x86_narrow_avx = shared_defs.transform_groups.by_name("x86_narrow_avx");
|
||||
let x86_expand = shared_defs.transform_groups.by_name("x86_expand");
|
||||
|
||||
x86_32.legalize_monomorphic(expand_flags);
|
||||
x86_32.legalize_default(x86_narrow);
|
||||
x86_32.legalize_type(B1, expand_flags);
|
||||
x86_32.legalize_type(I8, x86_widen);
|
||||
x86_32.legalize_type(I16, x86_widen);
|
||||
x86_32.legalize_type(I32, x86_expand);
|
||||
x86_32.legalize_value_type(ReferenceType(R32), x86_expand);
|
||||
x86_32.legalize_type(F32, x86_expand);
|
||||
x86_32.legalize_type(F64, x86_expand);
|
||||
x86_32.legalize_value_type(VectorType::new(I32.into(), 4), x86_narrow_avx);
|
||||
x86_32.legalize_value_type(VectorType::new(I64.into(), 2), x86_narrow_avx);
|
||||
x86_32.legalize_value_type(VectorType::new(F32.into(), 4), x86_narrow_avx);
|
||||
|
||||
x86_64.legalize_monomorphic(expand_flags);
|
||||
x86_64.legalize_default(x86_narrow);
|
||||
x86_64.legalize_type(B1, expand_flags);
|
||||
x86_64.legalize_type(I8, x86_widen);
|
||||
x86_64.legalize_type(I16, x86_widen);
|
||||
x86_64.legalize_type(I32, x86_expand);
|
||||
x86_64.legalize_type(I64, x86_expand);
|
||||
x86_64.legalize_value_type(ReferenceType(R64), x86_expand);
|
||||
x86_64.legalize_type(F32, x86_expand);
|
||||
x86_64.legalize_type(F64, x86_expand);
|
||||
x86_64.legalize_value_type(VectorType::new(I32.into(), 4), x86_narrow_avx);
|
||||
x86_64.legalize_value_type(VectorType::new(I64.into(), 2), x86_narrow_avx);
|
||||
x86_64.legalize_value_type(VectorType::new(F32.into(), 4), x86_narrow_avx);
|
||||
|
||||
let recipes = recipes::define(shared_defs, &settings, ®s);
|
||||
|
||||
let encodings = encodings::define(shared_defs, &settings, &inst_group, &recipes);
|
||||
x86_32.set_encodings(encodings.enc32);
|
||||
x86_64.set_encodings(encodings.enc64);
|
||||
let encodings_predicates = encodings.inst_pred_reg.extract();
|
||||
|
||||
let recipes = encodings.recipes;
|
||||
|
||||
let cpu_modes = vec![x86_64, x86_32];
|
||||
let cpu_modes = vec![];
|
||||
|
||||
TargetIsa::new(
|
||||
"x86",
|
||||
settings,
|
||||
regs,
|
||||
recipes,
|
||||
IsaRegsBuilder::new().build(),
|
||||
Recipes::new(),
|
||||
cpu_modes,
|
||||
encodings_predicates,
|
||||
InstructionPredicateMap::new(),
|
||||
)
|
||||
}
|
||||
|
||||
@@ -1,721 +0,0 @@
|
||||
//! Static, named definitions of instruction opcodes.
|
||||
|
||||
/// Empty opcode for use as a default.
|
||||
pub static EMPTY: [u8; 0] = [];
|
||||
|
||||
/// Add with carry flag r{16,32,64} to r/m of the same size.
|
||||
pub static ADC: [u8; 1] = [0x11];
|
||||
|
||||
/// Add r{16,32,64} to r/m of the same size.
|
||||
pub static ADD: [u8; 1] = [0x01];
|
||||
|
||||
/// Add imm{16,32} to r/m{16,32,64}, possibly sign-extended.
|
||||
pub static ADD_IMM: [u8; 1] = [0x81];
|
||||
|
||||
/// Add sign-extended imm8 to r/m{16,32,64}.
|
||||
pub static ADD_IMM8_SIGN_EXTEND: [u8; 1] = [0x83];
|
||||
|
||||
/// Add packed double-precision floating-point values from xmm2/mem to xmm1 and store result in
|
||||
/// xmm1 (SSE2).
|
||||
pub static ADDPD: [u8; 3] = [0x66, 0x0f, 0x58];
|
||||
|
||||
/// Add packed single-precision floating-point values from xmm2/mem to xmm1 and store result in
|
||||
/// xmm1 (SSE).
|
||||
pub static ADDPS: [u8; 2] = [0x0f, 0x58];
|
||||
|
||||
/// Add the low double-precision floating-point value from xmm2/mem to xmm1
|
||||
/// and store the result in xmm1.
|
||||
pub static ADDSD: [u8; 3] = [0xf2, 0x0f, 0x58];
|
||||
|
||||
/// Add the low single-precision floating-point value from xmm2/mem to xmm1
|
||||
/// and store the result in xmm1.
|
||||
pub static ADDSS: [u8; 3] = [0xf3, 0x0f, 0x58];
|
||||
|
||||
/// r/m{16,32,64} AND register of the same size (Intel docs have a typo).
|
||||
pub static AND: [u8; 1] = [0x21];
|
||||
|
||||
/// imm{16,32} AND r/m{16,32,64}, possibly sign-extended.
|
||||
pub static AND_IMM: [u8; 1] = [0x81];
|
||||
|
||||
/// r/m{16,32,64} AND sign-extended imm8.
|
||||
pub static AND_IMM8_SIGN_EXTEND: [u8; 1] = [0x83];
|
||||
|
||||
/// Return the bitwise logical AND NOT of packed single-precision floating-point
|
||||
/// values in xmm1 and xmm2/mem.
|
||||
pub static ANDNPS: [u8; 2] = [0x0f, 0x55];
|
||||
|
||||
/// Return the bitwise logical AND of packed single-precision floating-point values
|
||||
/// in xmm1 and xmm2/mem.
|
||||
pub static ANDPS: [u8; 2] = [0x0f, 0x54];
|
||||
|
||||
/// Bit scan forward (stores index of first encountered 1 from the front).
|
||||
pub static BIT_SCAN_FORWARD: [u8; 2] = [0x0f, 0xbc];
|
||||
|
||||
/// Bit scan reverse (stores index of first encountered 1 from the back).
|
||||
pub static BIT_SCAN_REVERSE: [u8; 2] = [0x0f, 0xbd];
|
||||
|
||||
/// Select packed single-precision floating-point values from xmm1 and xmm2/m128
|
||||
/// from mask specified in XMM0 and store the values into xmm1 (SSE4.1).
|
||||
pub static BLENDVPS: [u8; 4] = [0x66, 0x0f, 0x38, 0x14];
|
||||
|
||||
/// Select packed double-precision floating-point values from xmm1 and xmm2/m128
|
||||
/// from mask specified in XMM0 and store the values into xmm1 (SSE4.1).
|
||||
pub static BLENDVPD: [u8; 4] = [0x66, 0x0f, 0x38, 0x15];
|
||||
|
||||
/// Call near, relative, displacement relative to next instruction (sign-extended).
|
||||
pub static CALL_RELATIVE: [u8; 1] = [0xe8];
|
||||
|
||||
/// Move r/m{16,32,64} if overflow (OF=1).
|
||||
pub static CMOV_OVERFLOW: [u8; 2] = [0x0f, 0x40];
|
||||
|
||||
/// Compare imm{16,32} with r/m{16,32,64} (sign-extended if 64).
|
||||
pub static CMP_IMM: [u8; 1] = [0x81];
|
||||
|
||||
/// Compare imm8 with r/m{16,32,64}.
|
||||
pub static CMP_IMM8: [u8; 1] = [0x83];
|
||||
|
||||
/// Compare r{16,32,64} with r/m of the same size.
|
||||
pub static CMP_REG: [u8; 1] = [0x39];
|
||||
|
||||
/// Compare packed double-precision floating-point value in xmm2/m32 and xmm1 using bits 2:0 of
|
||||
/// imm8 as comparison predicate (SSE2).
|
||||
pub static CMPPD: [u8; 3] = [0x66, 0x0f, 0xc2];
|
||||
|
||||
/// Compare packed single-precision floating-point value in xmm2/m32 and xmm1 using bits 2:0 of
|
||||
/// imm8 as comparison predicate (SSE).
|
||||
pub static CMPPS: [u8; 2] = [0x0f, 0xc2];
|
||||
|
||||
/// Convert four packed signed doubleword integers from xmm2/mem to four packed single-precision
|
||||
/// floating-point values in xmm1 (SSE2).
|
||||
pub static CVTDQ2PS: [u8; 2] = [0x0f, 0x5b];
|
||||
|
||||
/// Convert scalar double-precision floating-point value to scalar single-precision
|
||||
/// floating-point value.
|
||||
pub static CVTSD2SS: [u8; 3] = [0xf2, 0x0f, 0x5a];
|
||||
|
||||
/// Convert doubleword integer to scalar double-precision floating-point value.
|
||||
pub static CVTSI2SD: [u8; 3] = [0xf2, 0x0f, 0x2a];
|
||||
|
||||
/// Convert doubleword integer to scalar single-precision floating-point value.
|
||||
pub static CVTSI2SS: [u8; 3] = [0xf3, 0x0f, 0x2a];
|
||||
|
||||
/// Convert scalar single-precision floating-point value to scalar double-precision
|
||||
/// float-point value.
|
||||
pub static CVTSS2SD: [u8; 3] = [0xf3, 0x0f, 0x5a];
|
||||
|
||||
/// Convert four packed single-precision floating-point values from xmm2/mem to four packed signed
|
||||
/// doubleword values in xmm1 using truncation (SSE2).
|
||||
pub static CVTTPS2DQ: [u8; 3] = [0xf3, 0x0f, 0x5b];
|
||||
|
||||
/// Convert with truncation scalar double-precision floating-point value to signed
|
||||
/// integer.
|
||||
pub static CVTTSD2SI: [u8; 3] = [0xf2, 0x0f, 0x2c];
|
||||
|
||||
/// Convert with truncation scalar single-precision floating-point value to integer.
|
||||
pub static CVTTSS2SI: [u8; 3] = [0xf3, 0x0f, 0x2c];
|
||||
|
||||
/// Unsigned divide for {16,32,64}-bit.
|
||||
pub static DIV: [u8; 1] = [0xf7];
|
||||
|
||||
/// Divide packed double-precision floating-point values in xmm1 by packed double-precision
|
||||
/// floating-point values in xmm2/mem (SSE2).
|
||||
pub static DIVPD: [u8; 3] = [0x66, 0x0f, 0x5e];
|
||||
|
||||
/// Divide packed single-precision floating-point values in xmm1 by packed single-precision
|
||||
/// floating-point values in xmm2/mem (SSE).
|
||||
pub static DIVPS: [u8; 2] = [0x0f, 0x5e];
|
||||
|
||||
/// Divide low double-precision floating-point value in xmm1 by low double-precision
|
||||
/// floating-point value in xmm2/m64.
|
||||
pub static DIVSD: [u8; 3] = [0xf2, 0x0f, 0x5e];
|
||||
|
||||
/// Divide low single-precision floating-point value in xmm1 by low single-precision
|
||||
/// floating-point value in xmm2/m32.
|
||||
pub static DIVSS: [u8; 3] = [0xf3, 0x0f, 0x5e];
|
||||
|
||||
/// Signed divide for {16,32,64}-bit.
|
||||
pub static IDIV: [u8; 1] = [0xf7];
|
||||
|
||||
/// Signed multiply for {16,32,64}-bit, generic registers.
|
||||
pub static IMUL: [u8; 2] = [0x0f, 0xaf];
|
||||
|
||||
/// Signed multiply for {16,32,64}-bit, storing into RDX:RAX.
|
||||
pub static IMUL_RDX_RAX: [u8; 1] = [0xf7];
|
||||
|
||||
/// Insert scalar single-precision floating-point value.
|
||||
pub static INSERTPS: [u8; 4] = [0x66, 0x0f, 0x3a, 0x21];
|
||||
|
||||
/// Either:
|
||||
/// 1. Jump near, absolute indirect, RIP = 64-bit offset from register or memory.
|
||||
/// 2. Jump far, absolute indirect, address given in m16:64.
|
||||
pub static JUMP_ABSOLUTE: [u8; 1] = [0xff];
|
||||
|
||||
/// Jump near, relative, RIP = RIP + 32-bit displacement sign extended to 64 bits.
|
||||
pub static JUMP_NEAR_RELATIVE: [u8; 1] = [0xe9];
|
||||
|
||||
/// Jump near (rel32) if overflow (OF=1).
|
||||
pub static JUMP_NEAR_IF_OVERFLOW: [u8; 2] = [0x0f, 0x80];
|
||||
|
||||
/// Jump short, relative, RIP = RIP + 8-bit displacement sign extended to 64 bits.
|
||||
pub static JUMP_SHORT: [u8; 1] = [0xeb];
|
||||
|
||||
/// Jump short (rel8) if equal (ZF=1).
|
||||
pub static JUMP_SHORT_IF_EQUAL: [u8; 1] = [0x74];
|
||||
|
||||
/// Jump short (rel8) if not equal (ZF=0).
|
||||
pub static JUMP_SHORT_IF_NOT_EQUAL: [u8; 1] = [0x75];
|
||||
|
||||
/// Jump short (rel8) if overflow (OF=1).
|
||||
pub static JUMP_SHORT_IF_OVERFLOW: [u8; 1] = [0x70];
|
||||
|
||||
/// Store effective address for m in register r{16,32,64}.
|
||||
pub static LEA: [u8; 1] = [0x8d];
|
||||
|
||||
/// Count the number of leading zero bits.
|
||||
pub static LZCNT: [u8; 3] = [0xf3, 0x0f, 0xbd];
|
||||
|
||||
/// Return the maximum packed double-precision floating-point values between xmm1 and xmm2/m128
|
||||
/// (SSE2).
|
||||
pub static MAXPD: [u8; 3] = [0x66, 0x0f, 0x5f];
|
||||
|
||||
/// Return the maximum packed single-precision floating-point values between xmm1 and xmm2/m128
|
||||
/// (SSE).
|
||||
pub static MAXPS: [u8; 2] = [0x0f, 0x5f];
|
||||
|
||||
/// Return the maximum scalar double-precision floating-point value between
|
||||
/// xmm2/m64 and xmm1.
|
||||
pub static MAXSD: [u8; 3] = [0xf2, 0x0f, 0x5f];
|
||||
|
||||
/// Return the maximum scalar single-precision floating-point value between
|
||||
/// xmm2/m32 and xmm1.
|
||||
pub static MAXSS: [u8; 3] = [0xf3, 0x0f, 0x5f];
|
||||
|
||||
/// Return the minimum packed double-precision floating-point values between xmm1 and xmm2/m128
|
||||
/// (SSE2).
|
||||
pub static MINPD: [u8; 3] = [0x66, 0x0f, 0x5d];
|
||||
|
||||
/// Return the minimum packed single-precision floating-point values between xmm1 and xmm2/m128
|
||||
/// (SSE).
|
||||
pub static MINPS: [u8; 2] = [0x0f, 0x5d];
|
||||
|
||||
/// Return the minimum scalar double-precision floating-point value between
|
||||
/// xmm2/m64 and xmm1.
|
||||
pub static MINSD: [u8; 3] = [0xf2, 0x0f, 0x5d];
|
||||
|
||||
/// Return the minimum scalar single-precision floating-point value between
|
||||
/// xmm2/m32 and xmm1.
|
||||
pub static MINSS: [u8; 3] = [0xf3, 0x0f, 0x5d];
|
||||
|
||||
/// Move r8 to r/m8.
|
||||
pub static MOV_BYTE_STORE: [u8; 1] = [0x88];
|
||||
|
||||
/// Move imm{16,32,64} to same-sized register.
|
||||
pub static MOV_IMM: [u8; 1] = [0xb8];
|
||||
|
||||
/// Move imm{16,32} to r{16,32,64}, sign-extended if 64-bit target.
|
||||
pub static MOV_IMM_SIGNEXTEND: [u8; 1] = [0xc7];
|
||||
|
||||
/// Move {r/m16, r/m32, r/m64} to same-sized register.
|
||||
pub static MOV_LOAD: [u8; 1] = [0x8b];
|
||||
|
||||
/// Move r16 to r/m16.
|
||||
pub static MOV_STORE_16: [u8; 2] = [0x66, 0x89];
|
||||
|
||||
/// Move {r16, r32, r64} to same-sized register or memory.
|
||||
pub static MOV_STORE: [u8; 1] = [0x89];
|
||||
|
||||
/// Move aligned packed single-precision floating-point values from x/m to xmm (SSE).
|
||||
pub static MOVAPS_LOAD: [u8; 2] = [0x0f, 0x28];
|
||||
|
||||
/// Move doubleword from r/m32 to xmm (SSE2). Quadword with REX prefix.
|
||||
pub static MOVD_LOAD_XMM: [u8; 3] = [0x66, 0x0f, 0x6e];
|
||||
|
||||
/// Move doubleword from xmm to r/m32 (SSE2). Quadword with REX prefix.
|
||||
pub static MOVD_STORE_XMM: [u8; 3] = [0x66, 0x0f, 0x7e];
|
||||
|
||||
/// Move packed single-precision floating-point values low to high (SSE).
|
||||
pub static MOVLHPS: [u8; 2] = [0x0f, 0x16];
|
||||
|
||||
/// Move scalar double-precision floating-point value (from reg/mem to reg).
|
||||
pub static MOVSD_LOAD: [u8; 3] = [0xf2, 0x0f, 0x10];
|
||||
|
||||
/// Move scalar double-precision floating-point value (from reg to reg/mem).
|
||||
pub static MOVSD_STORE: [u8; 3] = [0xf2, 0x0f, 0x11];
|
||||
|
||||
/// Move scalar single-precision floating-point value (from reg to reg/mem).
|
||||
pub static MOVSS_STORE: [u8; 3] = [0xf3, 0x0f, 0x11];
|
||||
|
||||
/// Move scalar single-precision floating-point-value (from reg/mem to reg).
|
||||
pub static MOVSS_LOAD: [u8; 3] = [0xf3, 0x0f, 0x10];
|
||||
|
||||
/// Move byte to register with sign-extension.
|
||||
pub static MOVSX_BYTE: [u8; 2] = [0x0f, 0xbe];
|
||||
|
||||
/// Move word to register with sign-extension.
|
||||
pub static MOVSX_WORD: [u8; 2] = [0x0f, 0xbf];
|
||||
|
||||
/// Move doubleword to register with sign-extension.
|
||||
pub static MOVSXD: [u8; 1] = [0x63];
|
||||
|
||||
/// Move unaligned packed single-precision floating-point from x/m to xmm (SSE).
|
||||
pub static MOVUPS_LOAD: [u8; 2] = [0x0f, 0x10];
|
||||
|
||||
/// Move unaligned packed single-precision floating-point value from xmm to x/m (SSE).
|
||||
pub static MOVUPS_STORE: [u8; 2] = [0x0f, 0x11];
|
||||
|
||||
/// Move byte to register with zero-extension.
|
||||
pub static MOVZX_BYTE: [u8; 2] = [0x0f, 0xb6];
|
||||
|
||||
/// Move word to register with zero-extension.
|
||||
pub static MOVZX_WORD: [u8; 2] = [0x0f, 0xb7];
|
||||
|
||||
/// Unsigned multiply for {16,32,64}-bit.
|
||||
pub static MUL: [u8; 1] = [0xf7];
|
||||
|
||||
/// Multiply packed double-precision floating-point values from xmm2/mem to xmm1 and store result
|
||||
/// in xmm1 (SSE2).
|
||||
pub static MULPD: [u8; 3] = [0x66, 0x0f, 0x59];
|
||||
|
||||
/// Multiply packed single-precision floating-point values from xmm2/mem to xmm1 and store result
|
||||
/// in xmm1 (SSE).
|
||||
pub static MULPS: [u8; 2] = [0x0f, 0x59];
|
||||
|
||||
/// Multiply the low double-precision floating-point value in xmm2/m64 by the
|
||||
/// low double-precision floating-point value in xmm1.
|
||||
pub static MULSD: [u8; 3] = [0xf2, 0x0f, 0x59];
|
||||
|
||||
/// Multiply the low single-precision floating-point value in xmm2/m32 by the
|
||||
/// low single-precision floating-point value in xmm1.
|
||||
pub static MULSS: [u8; 3] = [0xf3, 0x0f, 0x59];
|
||||
|
||||
/// Reverse each bit of r/m{16,32,64}.
|
||||
pub static NOT: [u8; 1] = [0xf7];
|
||||
|
||||
/// r{16,32,64} OR register of same size.
|
||||
pub static OR: [u8; 1] = [0x09];
|
||||
|
||||
/// imm{16,32} OR r/m{16,32,64}, possibly sign-extended.
|
||||
pub static OR_IMM: [u8; 1] = [0x81];
|
||||
|
||||
/// r/m{16,32,64} OR sign-extended imm8.
|
||||
pub static OR_IMM8_SIGN_EXTEND: [u8; 1] = [0x83];
|
||||
|
||||
/// Return the bitwise logical OR of packed single-precision values in xmm and x/m (SSE).
|
||||
pub static ORPS: [u8; 2] = [0x0f, 0x56];
|
||||
|
||||
/// Compute the absolute value of bytes in xmm2/m128 and store the unsigned result in xmm1 (SSSE3).
|
||||
pub static PABSB: [u8; 4] = [0x66, 0x0f, 0x38, 0x1c];
|
||||
|
||||
/// Compute the absolute value of 32-bit integers in xmm2/m128 and store the unsigned result in
|
||||
/// xmm1 (SSSE3).
|
||||
pub static PABSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x1e];
|
||||
|
||||
/// Compute the absolute value of 16-bit integers in xmm2/m128 and store the unsigned result in
|
||||
/// xmm1 (SSSE3).
|
||||
pub static PABSW: [u8; 4] = [0x66, 0x0f, 0x38, 0x1d];
|
||||
|
||||
/// Converts 8 packed signed word integers from xmm1 and from xmm2/m128 into 16 packed signed byte
|
||||
/// integers in xmm1 using signed saturation (SSE2).
|
||||
pub static PACKSSWB: [u8; 3] = [0x66, 0x0f, 0x63];
|
||||
|
||||
/// Converts 4 packed signed doubleword integers from xmm1 and from xmm2/m128 into 8 packed signed
|
||||
/// word integers in xmm1 using signed saturation (SSE2).
|
||||
pub static PACKSSDW: [u8; 3] = [0x66, 0x0f, 0x6b];
|
||||
|
||||
/// Converts 8 packed signed word integers from xmm1 and from xmm2/m128 into 16 packed unsigned byte
|
||||
/// integers in xmm1 using unsigned saturation (SSE2).
|
||||
pub static PACKUSWB: [u8; 3] = [0x66, 0x0f, 0x67];
|
||||
|
||||
/// Converts 4 packed signed doubleword integers from xmm1 and from xmm2/m128 into 8 unpacked signed
|
||||
/// word integers in xmm1 using unsigned saturation (SSE4.1).
|
||||
pub static PACKUSDW: [u8; 4] = [0x66, 0x0f, 0x38, 0x2b];
|
||||
|
||||
/// Add packed byte integers from xmm2/m128 and xmm1 (SSE2).
|
||||
pub static PADDB: [u8; 3] = [0x66, 0x0f, 0xfc];
|
||||
|
||||
/// Add packed doubleword integers from xmm2/m128 and xmm1 (SSE2).
|
||||
pub static PADDD: [u8; 3] = [0x66, 0x0f, 0xfe];
|
||||
|
||||
/// Add packed quadword integers from xmm2/m128 and xmm1 (SSE2).
|
||||
pub static PADDQ: [u8; 3] = [0x66, 0x0f, 0xd4];
|
||||
|
||||
/// Add packed word integers from xmm2/m128 and xmm1 (SSE2).
|
||||
pub static PADDW: [u8; 3] = [0x66, 0x0f, 0xfd];
|
||||
|
||||
/// Add packed signed byte integers from xmm2/m128 and xmm1 saturate the results (SSE).
|
||||
pub static PADDSB: [u8; 3] = [0x66, 0x0f, 0xec];
|
||||
|
||||
/// Add packed signed word integers from xmm2/m128 and xmm1 saturate the results (SSE).
|
||||
pub static PADDSW: [u8; 3] = [0x66, 0x0f, 0xed];
|
||||
|
||||
/// Add packed unsigned byte integers from xmm2/m128 and xmm1 saturate the results (SSE).
|
||||
pub static PADDUSB: [u8; 3] = [0x66, 0x0f, 0xdc];
|
||||
|
||||
/// Add packed unsigned word integers from xmm2/m128 and xmm1 saturate the results (SSE).
|
||||
pub static PADDUSW: [u8; 3] = [0x66, 0x0f, 0xdd];
|
||||
|
||||
/// Concatenate destination and source operands, extract a byte-aligned result into xmm1 that is
|
||||
/// shifted to the right by the constant number of bytes in imm8 (SSSE3).
|
||||
pub static PALIGNR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0f];
|
||||
|
||||
/// Bitwise AND of xmm2/m128 and xmm1 (SSE2).
|
||||
pub static PAND: [u8; 3] = [0x66, 0x0f, 0xdb];
|
||||
|
||||
/// Bitwise AND NOT of xmm2/m128 and xmm1 (SSE2).
|
||||
pub static PANDN: [u8; 3] = [0x66, 0x0f, 0xdf];
|
||||
|
||||
/// Average packed unsigned byte integers from xmm2/m128 and xmm1 with rounding (SSE2).
|
||||
pub static PAVGB: [u8; 3] = [0x66, 0x0f, 0xE0];
|
||||
|
||||
/// Average packed unsigned word integers from xmm2/m128 and xmm1 with rounding (SSE2).
|
||||
pub static PAVGW: [u8; 3] = [0x66, 0x0f, 0xE3];
|
||||
|
||||
/// Select byte values from xmm1 and xmm2/m128 from mask specified in the high bit of each byte
|
||||
/// in XMM0 and store the values into xmm1 (SSE4.1).
|
||||
pub static PBLENDVB: [u8; 4] = [0x66, 0x0f, 0x38, 0x10];
|
||||
|
||||
/// Select words from xmm1 and xmm2/m128 from mask specified in imm8 and store the values into xmm1
|
||||
/// (SSE4.1).
|
||||
pub static PBLENDW: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0e];
|
||||
|
||||
/// Compare packed data for equal (SSE2).
|
||||
pub static PCMPEQB: [u8; 3] = [0x66, 0x0f, 0x74];
|
||||
|
||||
/// Compare packed data for equal (SSE2).
|
||||
pub static PCMPEQD: [u8; 3] = [0x66, 0x0f, 0x76];
|
||||
|
||||
/// Compare packed data for equal (SSE4.1).
|
||||
pub static PCMPEQQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x29];
|
||||
|
||||
/// Compare packed data for equal (SSE2).
|
||||
pub static PCMPEQW: [u8; 3] = [0x66, 0x0f, 0x75];
|
||||
|
||||
/// Compare packed signed byte integers for greater than (SSE2).
|
||||
pub static PCMPGTB: [u8; 3] = [0x66, 0x0f, 0x64];
|
||||
|
||||
/// Compare packed signed doubleword integers for greater than (SSE2).
|
||||
pub static PCMPGTD: [u8; 3] = [0x66, 0x0f, 0x66];
|
||||
|
||||
/// Compare packed signed quadword integers for greater than (SSE4.2).
|
||||
pub static PCMPGTQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x37];
|
||||
|
||||
/// Compare packed signed word integers for greater than (SSE2).
|
||||
pub static PCMPGTW: [u8; 3] = [0x66, 0x0f, 0x65];
|
||||
|
||||
/// Extract doubleword or quadword, depending on REX.W (SSE4.1).
|
||||
pub static PEXTR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x16];
|
||||
|
||||
/// Extract byte (SSE4.1).
|
||||
pub static PEXTRB: [u8; 4] = [0x66, 0x0f, 0x3a, 0x14];
|
||||
|
||||
/// Extract word (SSE4.1). There is a 3-byte SSE2 variant that can also move to m/16.
|
||||
pub static PEXTRW: [u8; 4] = [0x66, 0x0f, 0x3a, 0x15];
|
||||
|
||||
/// Insert doubleword or quadword, depending on REX.W (SSE4.1).
|
||||
pub static PINSR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x22];
|
||||
|
||||
/// Insert byte (SSE4.1).
|
||||
pub static PINSRB: [u8; 4] = [0x66, 0x0f, 0x3a, 0x20];
|
||||
|
||||
/// Insert word (SSE2).
|
||||
pub static PINSRW: [u8; 3] = [0x66, 0x0f, 0xc4];
|
||||
|
||||
/// Compare packed signed byte integers in xmm1 and xmm2/m128 and store packed maximum values in
|
||||
/// xmm1 (SSE4.1).
|
||||
pub static PMAXSB: [u8; 4] = [0x66, 0x0f, 0x38, 0x3c];
|
||||
|
||||
/// Compare packed signed doubleword integers in xmm1 and xmm2/m128 and store packed maximum
|
||||
/// values in xmm1 (SSE4.1).
|
||||
pub static PMAXSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x3d];
|
||||
|
||||
/// Compare packed signed word integers in xmm1 and xmm2/m128 and store packed maximum values in
|
||||
/// xmm1 (SSE2).
|
||||
pub static PMAXSW: [u8; 3] = [0x66, 0x0f, 0xee];
|
||||
|
||||
/// Compare packed unsigned byte integers in xmm1 and xmm2/m128 and store packed maximum values in
|
||||
/// xmm1 (SSE2).
|
||||
pub static PMAXUB: [u8; 3] = [0x66, 0x0f, 0xde];
|
||||
|
||||
/// Compare packed unsigned doubleword integers in xmm1 and xmm2/m128 and store packed maximum
|
||||
/// values in xmm1 (SSE4.1).
|
||||
pub static PMAXUD: [u8; 4] = [0x66, 0x0f, 0x38, 0x3f];
|
||||
|
||||
/// Compare packed unsigned word integers in xmm1 and xmm2/m128 and store packed maximum values in
|
||||
/// xmm1 (SSE4.1).
|
||||
pub static PMAXUW: [u8; 4] = [0x66, 0x0f, 0x38, 0x3e];
|
||||
|
||||
/// Compare packed signed byte integers in xmm1 and xmm2/m128 and store packed minimum values in
|
||||
/// xmm1 (SSE4.1).
|
||||
pub static PMINSB: [u8; 4] = [0x66, 0x0f, 0x38, 0x38];
|
||||
|
||||
/// Compare packed signed doubleword integers in xmm1 and xmm2/m128 and store packed minimum
|
||||
/// values in xmm1 (SSE4.1).
|
||||
pub static PMINSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x39];
|
||||
|
||||
/// Compare packed signed word integers in xmm1 and xmm2/m128 and store packed minimum values in
|
||||
/// xmm1 (SSE2).
|
||||
pub static PMINSW: [u8; 3] = [0x66, 0x0f, 0xea];
|
||||
|
||||
/// Compare packed unsigned byte integers in xmm1 and xmm2/m128 and store packed minimum values in
|
||||
/// xmm1 (SSE2).
|
||||
pub static PMINUB: [u8; 3] = [0x66, 0x0f, 0xda];
|
||||
|
||||
/// Compare packed unsigned doubleword integers in xmm1 and xmm2/m128 and store packed minimum
|
||||
/// values in xmm1 (SSE4.1).
|
||||
pub static PMINUD: [u8; 4] = [0x66, 0x0f, 0x38, 0x3b];
|
||||
|
||||
/// Compare packed unsigned word integers in xmm1 and xmm2/m128 and store packed minimum values in
|
||||
/// xmm1 (SSE4.1).
|
||||
pub static PMINUW: [u8; 4] = [0x66, 0x0f, 0x38, 0x3a];
|
||||
|
||||
/// Sign extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit
|
||||
/// integers in xmm1 (SSE4.1).
|
||||
pub static PMOVSXBW: [u8; 4] = [0x66, 0x0f, 0x38, 0x20];
|
||||
|
||||
/// Sign extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 32-bit
|
||||
/// integers in xmm1 (SSE4.1).
|
||||
pub static PMOVSXWD: [u8; 4] = [0x66, 0x0f, 0x38, 0x23];
|
||||
|
||||
/// Sign extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit
|
||||
/// integers in xmm1 (SSE4.1).
|
||||
pub static PMOVSXDQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x25];
|
||||
|
||||
/// Zero extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit
|
||||
/// integers in xmm1 (SSE4.1).
|
||||
pub static PMOVZXBW: [u8; 4] = [0x66, 0x0f, 0x38, 0x30];
|
||||
|
||||
/// Zero extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 32-bit
|
||||
/// integers in xmm1 (SSE4.1).
|
||||
pub static PMOVZXWD: [u8; 4] = [0x66, 0x0f, 0x38, 0x33];
|
||||
|
||||
/// Zero extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit
|
||||
/// integers in xmm1 (SSE4.1).
|
||||
pub static PMOVZXDQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x35];
|
||||
|
||||
/// Multiply the packed signed word integers in xmm1 and xmm2/m128, and store the low 16 bits of
|
||||
/// the results in xmm1 (SSE2).
|
||||
pub static PMULLW: [u8; 3] = [0x66, 0x0f, 0xd5];
|
||||
|
||||
/// Multiply the packed doubleword signed integers in xmm1 and xmm2/m128 and store the low 32
|
||||
/// bits of each product in xmm1 (SSE4.1).
|
||||
pub static PMULLD: [u8; 4] = [0x66, 0x0f, 0x38, 0x40];
|
||||
|
||||
/// Multiply the packed quadword signed integers in xmm2 and xmm3/m128 and store the low 64
|
||||
/// bits of each product in xmm1 (AVX512VL/DQ). Requires an EVEX encoding.
|
||||
pub static VPMULLQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x40];
|
||||
|
||||
/// Multiply packed unsigned doubleword integers in xmm1 by packed unsigned doubleword integers
|
||||
/// in xmm2/m128, and store the quadword results in xmm1 (SSE2).
|
||||
pub static PMULUDQ: [u8; 3] = [0x66, 0x0f, 0xf4];
|
||||
|
||||
/// Multiply the packed word integers, add adjacent doubleword results.
|
||||
pub static PMADDWD: [u8; 3] = [0x66, 0x0f, 0xf5];
|
||||
|
||||
/// Pop top of stack into r{16,32,64}; increment stack pointer.
|
||||
pub static POP_REG: [u8; 1] = [0x58];
|
||||
|
||||
/// Returns the count of number of bits set to 1.
|
||||
pub static POPCNT: [u8; 3] = [0xf3, 0x0f, 0xb8];
|
||||
|
||||
/// Bitwise OR of xmm2/m128 and xmm1 (SSE2).
|
||||
pub static POR: [u8; 3] = [0x66, 0x0f, 0xeb];
|
||||
|
||||
/// Shuffle bytes in xmm1 according to contents of xmm2/m128 (SSE3).
|
||||
pub static PSHUFB: [u8; 4] = [0x66, 0x0f, 0x38, 0x00];
|
||||
|
||||
/// Shuffle the doublewords in xmm2/m128 based on the encoding in imm8 and
|
||||
/// store the result in xmm1 (SSE2).
|
||||
pub static PSHUFD: [u8; 3] = [0x66, 0x0f, 0x70];
|
||||
|
||||
/// Shift words in xmm1 by imm8; the direction and sign-bit behavior is controlled by the RRR
|
||||
/// digit used in the ModR/M byte (SSE2).
|
||||
pub static PS_W_IMM: [u8; 3] = [0x66, 0x0f, 0x71];
|
||||
|
||||
/// Shift doublewords in xmm1 by imm8; the direction and sign-bit behavior is controlled by the RRR
|
||||
/// digit used in the ModR/M byte (SSE2).
|
||||
pub static PS_D_IMM: [u8; 3] = [0x66, 0x0f, 0x72];
|
||||
|
||||
/// Shift quadwords in xmm1 by imm8; the direction and sign-bit behavior is controlled by the RRR
|
||||
/// digit used in the ModR/M byte (SSE2).
|
||||
pub static PS_Q_IMM: [u8; 3] = [0x66, 0x0f, 0x73];
|
||||
|
||||
/// Shift words in xmm1 left by xmm2/m128 while shifting in 0s (SSE2).
|
||||
pub static PSLLW: [u8; 3] = [0x66, 0x0f, 0xf1];
|
||||
|
||||
/// Shift doublewords in xmm1 left by xmm2/m128 while shifting in 0s (SSE2).
|
||||
pub static PSLLD: [u8; 3] = [0x66, 0x0f, 0xf2];
|
||||
|
||||
/// Shift quadwords in xmm1 left by xmm2/m128 while shifting in 0s (SSE2).
|
||||
pub static PSLLQ: [u8; 3] = [0x66, 0x0f, 0xf3];
|
||||
|
||||
/// Shift words in xmm1 right by xmm2/m128 while shifting in 0s (SSE2).
|
||||
pub static PSRLW: [u8; 3] = [0x66, 0x0f, 0xd1];
|
||||
|
||||
/// Shift doublewords in xmm1 right by xmm2/m128 while shifting in 0s (SSE2).
|
||||
pub static PSRLD: [u8; 3] = [0x66, 0x0f, 0xd2];
|
||||
|
||||
/// Shift quadwords in xmm1 right by xmm2/m128 while shifting in 0s (SSE2).
|
||||
pub static PSRLQ: [u8; 3] = [0x66, 0x0f, 0xd3];
|
||||
|
||||
/// Shift words in xmm1 right by xmm2/m128 while shifting in sign bits (SSE2).
|
||||
pub static PSRAW: [u8; 3] = [0x66, 0x0f, 0xe1];
|
||||
|
||||
/// Shift doublewords in xmm1 right by xmm2/m128 while shifting in sign bits (SSE2).
|
||||
pub static PSRAD: [u8; 3] = [0x66, 0x0f, 0xe2];
|
||||
|
||||
/// Subtract packed byte integers in xmm2/m128 from packed byte integers in xmm1 (SSE2).
|
||||
pub static PSUBB: [u8; 3] = [0x66, 0x0f, 0xf8];
|
||||
|
||||
/// Subtract packed word integers in xmm2/m128 from packed word integers in xmm1 (SSE2).
|
||||
pub static PSUBW: [u8; 3] = [0x66, 0x0f, 0xf9];
|
||||
|
||||
/// Subtract packed doubleword integers in xmm2/m128 from doubleword byte integers in xmm1 (SSE2).
|
||||
pub static PSUBD: [u8; 3] = [0x66, 0x0f, 0xfa];
|
||||
|
||||
/// Subtract packed quadword integers in xmm2/m128 from xmm1 (SSE2).
|
||||
pub static PSUBQ: [u8; 3] = [0x66, 0x0f, 0xfb];
|
||||
|
||||
/// Subtract packed signed byte integers in xmm2/m128 from packed signed byte integers in xmm1
|
||||
/// and saturate results (SSE2).
|
||||
pub static PSUBSB: [u8; 3] = [0x66, 0x0f, 0xe8];
|
||||
|
||||
/// Subtract packed signed word integers in xmm2/m128 from packed signed word integers in xmm1
|
||||
/// and saturate results (SSE2).
|
||||
pub static PSUBSW: [u8; 3] = [0x66, 0x0f, 0xe9];
|
||||
|
||||
/// Subtract packed unsigned byte integers in xmm2/m128 from packed unsigned byte integers in xmm1
|
||||
/// and saturate results (SSE2).
|
||||
pub static PSUBUSB: [u8; 3] = [0x66, 0x0f, 0xd8];
|
||||
|
||||
/// Subtract packed unsigned word integers in xmm2/m128 from packed unsigned word integers in xmm1
|
||||
/// and saturate results (SSE2).
|
||||
pub static PSUBUSW: [u8; 3] = [0x66, 0x0f, 0xd9];
|
||||
|
||||
/// Set ZF if xmm2/m128 AND xmm1 result is all 0s; set CF if xmm2/m128 AND NOT xmm1 result is all
|
||||
/// 0s (SSE4.1).
|
||||
pub static PTEST: [u8; 4] = [0x66, 0x0f, 0x38, 0x17];
|
||||
|
||||
/// Unpack and interleave high-order bytes from xmm1 and xmm2/m128 into xmm1 (SSE2).
|
||||
pub static PUNPCKHBW: [u8; 3] = [0x66, 0x0f, 0x68];
|
||||
|
||||
/// Unpack and interleave high-order words from xmm1 and xmm2/m128 into xmm1 (SSE2).
|
||||
pub static PUNPCKHWD: [u8; 3] = [0x66, 0x0f, 0x69];
|
||||
|
||||
/// Unpack and interleave high-order doublewords from xmm1 and xmm2/m128 into xmm1 (SSE2).
|
||||
pub static PUNPCKHDQ: [u8; 3] = [0x66, 0x0f, 0x6A];
|
||||
|
||||
/// Unpack and interleave high-order quadwords from xmm1 and xmm2/m128 into xmm1 (SSE2).
|
||||
pub static PUNPCKHQDQ: [u8; 3] = [0x66, 0x0f, 0x6D];
|
||||
|
||||
/// Unpack and interleave low-order bytes from xmm1 and xmm2/m128 into xmm1 (SSE2).
|
||||
pub static PUNPCKLBW: [u8; 3] = [0x66, 0x0f, 0x60];
|
||||
|
||||
/// Unpack and interleave low-order words from xmm1 and xmm2/m128 into xmm1 (SSE2).
|
||||
pub static PUNPCKLWD: [u8; 3] = [0x66, 0x0f, 0x61];
|
||||
|
||||
/// Unpack and interleave low-order doublewords from xmm1 and xmm2/m128 into xmm1 (SSE2).
|
||||
pub static PUNPCKLDQ: [u8; 3] = [0x66, 0x0f, 0x62];
|
||||
|
||||
/// Unpack and interleave low-order quadwords from xmm1 and xmm2/m128 into xmm1 (SSE2).
|
||||
pub static PUNPCKLQDQ: [u8; 3] = [0x66, 0x0f, 0x6C];
|
||||
|
||||
/// Push r{16,32,64}.
|
||||
pub static PUSH_REG: [u8; 1] = [0x50];
|
||||
|
||||
/// Logical exclusive OR (SSE2).
|
||||
pub static PXOR: [u8; 3] = [0x66, 0x0f, 0xef];
|
||||
|
||||
/// Near return to calling procedure.
|
||||
pub static RET_NEAR: [u8; 1] = [0xc3];
|
||||
|
||||
/// General rotation opcode. Kind of rotation depends on encoding.
|
||||
pub static ROTATE_CL: [u8; 1] = [0xd3];
|
||||
|
||||
/// General rotation opcode. Kind of rotation depends on encoding.
|
||||
pub static ROTATE_IMM8: [u8; 1] = [0xc1];
|
||||
|
||||
/// Round scalar doubl-precision floating-point values.
|
||||
pub static ROUNDSD: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0b];
|
||||
|
||||
/// Round scalar single-precision floating-point values.
|
||||
pub static ROUNDSS: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0a];
|
||||
|
||||
/// Subtract with borrow r{16,32,64} from r/m of the same size.
|
||||
pub static SBB: [u8; 1] = [0x19];
|
||||
|
||||
/// Set byte if overflow (OF=1).
|
||||
pub static SET_BYTE_IF_OVERFLOW: [u8; 2] = [0x0f, 0x90];
|
||||
|
||||
/// Compute the square root of the packed double-precision floating-point values and store the
|
||||
/// result in xmm1 (SSE2).
|
||||
pub static SQRTPD: [u8; 3] = [0x66, 0x0f, 0x51];
|
||||
|
||||
/// Compute the square root of the packed double-precision floating-point values and store the
|
||||
/// result in xmm1 (SSE).
|
||||
pub static SQRTPS: [u8; 2] = [0x0f, 0x51];
|
||||
|
||||
/// Compute square root of scalar double-precision floating-point value.
|
||||
pub static SQRTSD: [u8; 3] = [0xf2, 0x0f, 0x51];
|
||||
|
||||
/// Compute square root of scalar single-precision value.
|
||||
pub static SQRTSS: [u8; 3] = [0xf3, 0x0f, 0x51];
|
||||
|
||||
/// Subtract r{16,32,64} from r/m of same size.
|
||||
pub static SUB: [u8; 1] = [0x29];
|
||||
|
||||
/// Subtract packed double-precision floating-point values in xmm2/mem from xmm1 and store result
|
||||
/// in xmm1 (SSE2).
|
||||
pub static SUBPD: [u8; 3] = [0x66, 0x0f, 0x5c];
|
||||
|
||||
/// Subtract packed single-precision floating-point values in xmm2/mem from xmm1 and store result
|
||||
/// in xmm1 (SSE).
|
||||
pub static SUBPS: [u8; 2] = [0x0f, 0x5c];
|
||||
|
||||
/// Subtract the low double-precision floating-point value in xmm2/m64 from xmm1
|
||||
/// and store the result in xmm1.
|
||||
pub static SUBSD: [u8; 3] = [0xf2, 0x0f, 0x5c];
|
||||
|
||||
/// Subtract the low single-precision floating-point value in xmm2/m32 from xmm1
|
||||
/// and store the result in xmm1.
|
||||
pub static SUBSS: [u8; 3] = [0xf3, 0x0f, 0x5c];
|
||||
|
||||
/// AND r8 with r/m8; set SF, ZF, PF according to result.
|
||||
pub static TEST_BYTE_REG: [u8; 1] = [0x84];
|
||||
|
||||
/// AND {r16, r32, r64} with r/m of the same size; set SF, ZF, PF according to result.
|
||||
pub static TEST_REG: [u8; 1] = [0x85];
|
||||
|
||||
/// Count the number of trailing zero bits.
|
||||
pub static TZCNT: [u8; 3] = [0xf3, 0x0f, 0xbc];
|
||||
|
||||
/// Compare low double-precision floating-point values in xmm1 and xmm2/mem64
|
||||
/// and set the EFLAGS flags accordingly.
|
||||
pub static UCOMISD: [u8; 3] = [0x66, 0x0f, 0x2e];
|
||||
|
||||
/// Compare low single-precision floating-point values in xmm1 and xmm2/mem32
|
||||
/// and set the EFLAGS flags accordingly.
|
||||
pub static UCOMISS: [u8; 2] = [0x0f, 0x2e];
|
||||
|
||||
/// Raise invalid opcode instruction.
|
||||
pub static UNDEFINED2: [u8; 2] = [0x0f, 0x0b];
|
||||
|
||||
/// Convert four packed unsigned doubleword integers from xmm2/m128/m32bcst to packed
|
||||
/// single-precision floating-point values in xmm1 with writemask k1. Rounding behavior
|
||||
/// is controlled by MXCSR but can be overriden by EVEX.L'L in static rounding mode
|
||||
/// (AVX512VL, AVX512F).
|
||||
pub static VCVTUDQ2PS: [u8; 3] = [0xf2, 0x0f, 0x7a];
|
||||
|
||||
/// imm{16,32} XOR r/m{16,32,64}, possibly sign-extended.
|
||||
pub static XOR_IMM: [u8; 1] = [0x81];
|
||||
|
||||
/// r/m{16,32,64} XOR sign-extended imm8.
|
||||
pub static XOR_IMM8_SIGN_EXTEND: [u8; 1] = [0x83];
|
||||
|
||||
/// r/m{16,32,64} XOR register of the same size.
|
||||
pub static XOR: [u8; 1] = [0x31];
|
||||
|
||||
/// Bitwise logical XOR of packed double-precision floating-point values.
|
||||
pub static XORPD: [u8; 3] = [0x66, 0x0f, 0x57];
|
||||
|
||||
/// Bitwise logical XOR of packed single-precision floating-point values.
|
||||
pub static XORPS: [u8; 2] = [0x0f, 0x57];
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,43 +0,0 @@
|
||||
use crate::cdsl::regs::{IsaRegs, IsaRegsBuilder, RegBankBuilder, RegClassBuilder};
|
||||
|
||||
pub(crate) fn define() -> IsaRegs {
|
||||
let mut regs = IsaRegsBuilder::new();
|
||||
|
||||
let builder = RegBankBuilder::new("FloatRegs", "xmm")
|
||||
.units(16)
|
||||
.track_pressure(true);
|
||||
let float_regs = regs.add_bank(builder);
|
||||
|
||||
let builder = RegBankBuilder::new("IntRegs", "r")
|
||||
.units(16)
|
||||
.names(vec!["rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi"])
|
||||
.track_pressure(true)
|
||||
.pinned_reg(15);
|
||||
let int_regs = regs.add_bank(builder);
|
||||
|
||||
let builder = RegBankBuilder::new("FlagRegs", "")
|
||||
.units(1)
|
||||
.names(vec!["rflags"])
|
||||
.track_pressure(false);
|
||||
let flag_reg = regs.add_bank(builder);
|
||||
|
||||
let builder = RegClassBuilder::new_toplevel("GPR", int_regs);
|
||||
let gpr = regs.add_class(builder);
|
||||
|
||||
let builder = RegClassBuilder::new_toplevel("FPR", float_regs);
|
||||
let fpr = regs.add_class(builder);
|
||||
|
||||
let builder = RegClassBuilder::new_toplevel("FLAG", flag_reg);
|
||||
regs.add_class(builder);
|
||||
|
||||
let builder = RegClassBuilder::subclass_of("GPR8", gpr, 0, 8);
|
||||
let gpr8 = regs.add_class(builder);
|
||||
|
||||
let builder = RegClassBuilder::subclass_of("ABCD", gpr8, 0, 4);
|
||||
regs.add_class(builder);
|
||||
|
||||
let builder = RegClassBuilder::subclass_of("FPR8", fpr, 0, 8);
|
||||
regs.add_class(builder);
|
||||
|
||||
regs.build()
|
||||
}
|
||||
Reference in New Issue
Block a user