x64 backend: implement 128-bit ops and misc fixes.
This implements all of the ops on I128 that are implemented by the legacy x86 backend, and includes all that are required by at least one major use-case (cg_clif rustc backend). The sequences are open-coded where necessary; for e.g. the bit operations, this can be somewhat complex, but these sequences have been tested carefully. This PR also includes a drive-by fix of clz/ctz for 8- and 16-bit cases where they were incorrect previously. Also includes ridealong fixes developed while bringing up cg_clif support, because they are difficult to completely separate due to other refactors that occurred in this PR: - fix REX prefix logic for some 8-bit instructions. When using an 8-bit register in 64-bit mode on x86-64, the REX prefix semantics are somewhat subtle: without the REX prefix, register numbers 4--7 correspond to the second-to-lowest byte of the first four registers (AH, CH, BH, DH), whereas with the REX prefix, these register numbers correspond to the usual encoding (SPL, BPL, SIL, DIL). We could always emit a REX byte for instructions with 8-bit cases (this is harmless even if unneeded), but this would unnecessarily inflate code size; instead, the usual approach is to emit it only for these registers. This logic was present in some cases but missing for some other instructions: divide, not, negate, shifts. Fixes #2508. - avoid unaligned SSE loads on some f64 ops. The implementations of several FP ops, such as fabs/fneg, used SSE instructions. This is not a problem per-se, except that load-op merging did not take *alignment* into account. Specifically, if an op on an f64 loaded from memory happened to merge that load, and the instruction into which it was merged was an SSE instruction, then the SSE instruction imposes stricter (128-bit) alignment requirements than the load.f64 did. This PR simply forces any instruction lowerings that could use SSE instructions to implement non-SIMD operations to take inputs in registers only, and avoid load-op merging. Fixes #2507. - two bugfixes exposed by cg_clif: urem/srem.i8, select.b1. - urem/srem.i8: the 8-bit form of the DIV instruction on x86-64 places the remainder in AH, not RDX, different from all the other width-forms of this instruction. - select.b1: we were not recognizing selects of boolean values as integer-typed operations, so we were generating XMM moves instead (!).
This commit is contained in:
@@ -138,42 +138,62 @@ impl ABIMachineSpec for X64ABIMachineSpec {
|
|||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
let intreg = in_int_reg(param.value_type);
|
|
||||||
let vecreg = in_vec_reg(param.value_type);
|
|
||||||
debug_assert!(intreg || vecreg);
|
|
||||||
debug_assert!(!(intreg && vecreg));
|
|
||||||
|
|
||||||
let (next_reg, candidate) = if intreg {
|
|
||||||
let candidate = match args_or_rets {
|
|
||||||
ArgsOrRets::Args => get_intreg_for_arg_systemv(&call_conv, next_gpr),
|
|
||||||
ArgsOrRets::Rets => get_intreg_for_retval_systemv(&call_conv, next_gpr, i),
|
|
||||||
};
|
|
||||||
debug_assert!(candidate
|
|
||||||
.map(|r| r.get_class() == RegClass::I64)
|
|
||||||
.unwrap_or(true));
|
|
||||||
(&mut next_gpr, candidate)
|
|
||||||
} else {
|
|
||||||
let candidate = match args_or_rets {
|
|
||||||
ArgsOrRets::Args => get_fltreg_for_arg_systemv(&call_conv, next_vreg),
|
|
||||||
ArgsOrRets::Rets => get_fltreg_for_retval_systemv(&call_conv, next_vreg, i),
|
|
||||||
};
|
|
||||||
debug_assert!(candidate
|
|
||||||
.map(|r| r.get_class() == RegClass::V128)
|
|
||||||
.unwrap_or(true));
|
|
||||||
(&mut next_vreg, candidate)
|
|
||||||
};
|
|
||||||
|
|
||||||
if let Some(param) = try_fill_baldrdash_reg(call_conv, param) {
|
if let Some(param) = try_fill_baldrdash_reg(call_conv, param) {
|
||||||
assert!(intreg);
|
|
||||||
ret.push(param);
|
ret.push(param);
|
||||||
} else if let Some(reg) = candidate {
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find regclass(es) of the register(s) used to store a value of this type.
|
||||||
|
let (rcs, _) = Inst::rc_for_type(param.value_type)?;
|
||||||
|
let intreg = rcs[0] == RegClass::I64;
|
||||||
|
let num_regs = rcs.len();
|
||||||
|
assert!(num_regs <= 2);
|
||||||
|
if num_regs == 2 {
|
||||||
|
assert_eq!(rcs[0], rcs[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut regs: SmallVec<[RealReg; 2]> = smallvec![];
|
||||||
|
for j in 0..num_regs {
|
||||||
|
let nextreg = if intreg {
|
||||||
|
match args_or_rets {
|
||||||
|
ArgsOrRets::Args => get_intreg_for_arg_systemv(&call_conv, next_gpr + j),
|
||||||
|
ArgsOrRets::Rets => {
|
||||||
|
get_intreg_for_retval_systemv(&call_conv, next_gpr + j, i + j)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
match args_or_rets {
|
||||||
|
ArgsOrRets::Args => get_fltreg_for_arg_systemv(&call_conv, next_vreg + j),
|
||||||
|
ArgsOrRets::Rets => {
|
||||||
|
get_fltreg_for_retval_systemv(&call_conv, next_vreg + j, i + j)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
if let Some(reg) = nextreg {
|
||||||
|
regs.push(reg.to_real_reg());
|
||||||
|
} else {
|
||||||
|
regs.clear();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if regs.len() > 0 {
|
||||||
|
let regs = match num_regs {
|
||||||
|
1 => ValueRegs::one(regs[0]),
|
||||||
|
2 => ValueRegs::two(regs[0], regs[1]),
|
||||||
|
_ => panic!("More than two registers unexpected"),
|
||||||
|
};
|
||||||
ret.push(ABIArg::Reg(
|
ret.push(ABIArg::Reg(
|
||||||
ValueRegs::one(reg.to_real_reg()),
|
regs,
|
||||||
param.value_type,
|
param.value_type,
|
||||||
param.extension,
|
param.extension,
|
||||||
param.purpose,
|
param.purpose,
|
||||||
));
|
));
|
||||||
*next_reg += 1;
|
if intreg {
|
||||||
|
next_gpr += num_regs;
|
||||||
|
} else {
|
||||||
|
next_vreg += num_regs;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// Compute size. Every arg takes a minimum slot of 8 bytes. (16-byte
|
// Compute size. Every arg takes a minimum slot of 8 bytes. (16-byte
|
||||||
// stack alignment happens separately after all args.)
|
// stack alignment happens separately after all args.)
|
||||||
@@ -658,31 +678,6 @@ impl From<StackAMode> for SyntheticAmode {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn in_int_reg(ty: types::Type) -> bool {
|
|
||||||
match ty {
|
|
||||||
types::I8
|
|
||||||
| types::I16
|
|
||||||
| types::I32
|
|
||||||
| types::I64
|
|
||||||
| types::B1
|
|
||||||
| types::B8
|
|
||||||
| types::B16
|
|
||||||
| types::B32
|
|
||||||
| types::B64
|
|
||||||
| types::R64 => true,
|
|
||||||
types::R32 => panic!("unexpected 32-bits refs on x64!"),
|
|
||||||
_ => false,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn in_vec_reg(ty: types::Type) -> bool {
|
|
||||||
match ty {
|
|
||||||
types::F32 | types::F64 => true,
|
|
||||||
_ if ty.is_vector() => true,
|
|
||||||
_ => false,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn get_intreg_for_arg_systemv(call_conv: &CallConv, idx: usize) -> Option<Reg> {
|
fn get_intreg_for_arg_systemv(call_conv: &CallConv, idx: usize) -> Option<Reg> {
|
||||||
match call_conv {
|
match call_conv {
|
||||||
CallConv::Fast
|
CallConv::Fast
|
||||||
|
|||||||
@@ -346,23 +346,35 @@ impl PrettyPrintSized for RegMem {
|
|||||||
#[derive(Copy, Clone, PartialEq)]
|
#[derive(Copy, Clone, PartialEq)]
|
||||||
pub enum AluRmiROpcode {
|
pub enum AluRmiROpcode {
|
||||||
Add,
|
Add,
|
||||||
|
Adc,
|
||||||
Sub,
|
Sub,
|
||||||
|
Sbb,
|
||||||
And,
|
And,
|
||||||
Or,
|
Or,
|
||||||
Xor,
|
Xor,
|
||||||
/// The signless, non-extending (N x N -> N, for N in {32,64}) variant.
|
/// The signless, non-extending (N x N -> N, for N in {32,64}) variant.
|
||||||
Mul,
|
Mul,
|
||||||
|
/// 8-bit form of And. Handled separately as we don't have full 8-bit op
|
||||||
|
/// support (we just use wider instructions). Used only with some sequences
|
||||||
|
/// with SETcc.
|
||||||
|
And8,
|
||||||
|
/// 8-bit form of Or.
|
||||||
|
Or8,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Debug for AluRmiROpcode {
|
impl fmt::Debug for AluRmiROpcode {
|
||||||
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
|
||||||
let name = match self {
|
let name = match self {
|
||||||
AluRmiROpcode::Add => "add",
|
AluRmiROpcode::Add => "add",
|
||||||
|
AluRmiROpcode::Adc => "adc",
|
||||||
AluRmiROpcode::Sub => "sub",
|
AluRmiROpcode::Sub => "sub",
|
||||||
|
AluRmiROpcode::Sbb => "sbb",
|
||||||
AluRmiROpcode::And => "and",
|
AluRmiROpcode::And => "and",
|
||||||
AluRmiROpcode::Or => "or",
|
AluRmiROpcode::Or => "or",
|
||||||
AluRmiROpcode::Xor => "xor",
|
AluRmiROpcode::Xor => "xor",
|
||||||
AluRmiROpcode::Mul => "imul",
|
AluRmiROpcode::Mul => "imul",
|
||||||
|
AluRmiROpcode::And8 => "and",
|
||||||
|
AluRmiROpcode::Or8 => "or",
|
||||||
};
|
};
|
||||||
write!(fmt, "{}", name)
|
write!(fmt, "{}", name)
|
||||||
}
|
}
|
||||||
@@ -374,6 +386,16 @@ impl fmt::Display for AluRmiROpcode {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl AluRmiROpcode {
|
||||||
|
/// Is this a special-cased 8-bit ALU op?
|
||||||
|
pub fn is_8bit(self) -> bool {
|
||||||
|
match self {
|
||||||
|
AluRmiROpcode::And8 | AluRmiROpcode::Or8 => true,
|
||||||
|
_ => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Clone, PartialEq)]
|
#[derive(Clone, PartialEq)]
|
||||||
pub enum UnaryRmROpcode {
|
pub enum UnaryRmROpcode {
|
||||||
/// Bit-scan reverse.
|
/// Bit-scan reverse.
|
||||||
@@ -1010,7 +1032,7 @@ impl fmt::Display for ExtMode {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// These indicate the form of a scalar shift/rotate: left, signed right, unsigned right.
|
/// These indicate the form of a scalar shift/rotate: left, signed right, unsigned right.
|
||||||
#[derive(Clone)]
|
#[derive(Clone, Copy)]
|
||||||
pub enum ShiftKind {
|
pub enum ShiftKind {
|
||||||
ShiftLeft,
|
ShiftLeft,
|
||||||
/// Inserts zeros in the most significant bits.
|
/// Inserts zeros in the most significant bits.
|
||||||
|
|||||||
@@ -83,6 +83,14 @@ impl RexFlags {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn always_emit_if_8bit_needed(&mut self, reg: u8) -> &mut Self {
|
||||||
|
if reg >= 4 && reg <= 7 {
|
||||||
|
self.always_emit();
|
||||||
|
}
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn must_clear_w(&self) -> bool {
|
fn must_clear_w(&self) -> bool {
|
||||||
(self.0 & 1) != 0
|
(self.0 & 1) != 0
|
||||||
@@ -527,7 +535,7 @@ pub(crate) fn emit(
|
|||||||
src,
|
src,
|
||||||
dst: reg_g,
|
dst: reg_g,
|
||||||
} => {
|
} => {
|
||||||
let rex = if *is_64 {
|
let mut rex = if *is_64 {
|
||||||
RexFlags::set_w()
|
RexFlags::set_w()
|
||||||
} else {
|
} else {
|
||||||
RexFlags::clear_w()
|
RexFlags::clear_w()
|
||||||
@@ -581,17 +589,26 @@ pub(crate) fn emit(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
let (opcode_r, opcode_m, subopcode_i) = match op {
|
let (opcode_r, opcode_m, subopcode_i, is_8bit) = match op {
|
||||||
AluRmiROpcode::Add => (0x01, 0x03, 0),
|
AluRmiROpcode::Add => (0x01, 0x03, 0, false),
|
||||||
AluRmiROpcode::Sub => (0x29, 0x2B, 5),
|
AluRmiROpcode::Adc => (0x11, 0x03, 0, false),
|
||||||
AluRmiROpcode::And => (0x21, 0x23, 4),
|
AluRmiROpcode::Sub => (0x29, 0x2B, 5, false),
|
||||||
AluRmiROpcode::Or => (0x09, 0x0B, 1),
|
AluRmiROpcode::Sbb => (0x19, 0x2B, 5, false),
|
||||||
AluRmiROpcode::Xor => (0x31, 0x33, 6),
|
AluRmiROpcode::And => (0x21, 0x23, 4, false),
|
||||||
|
AluRmiROpcode::Or => (0x09, 0x0B, 1, false),
|
||||||
|
AluRmiROpcode::Xor => (0x31, 0x33, 6, false),
|
||||||
|
AluRmiROpcode::And8 => (0x20, 0x22, 4, true),
|
||||||
|
AluRmiROpcode::Or8 => (0x08, 0x0A, 1, true),
|
||||||
AluRmiROpcode::Mul => panic!("unreachable"),
|
AluRmiROpcode::Mul => panic!("unreachable"),
|
||||||
};
|
};
|
||||||
|
assert!(!(is_8bit && *is_64));
|
||||||
|
|
||||||
match src {
|
match src {
|
||||||
RegMemImm::Reg { reg: reg_e } => {
|
RegMemImm::Reg { reg: reg_e } => {
|
||||||
|
if is_8bit {
|
||||||
|
rex.always_emit_if_8bit_needed(int_reg_enc(*reg_e));
|
||||||
|
rex.always_emit_if_8bit_needed(int_reg_enc(reg_g.to_reg()));
|
||||||
|
}
|
||||||
// GCC/llvm use the swapped operand encoding (viz., the R/RM vs RM/R
|
// GCC/llvm use the swapped operand encoding (viz., the R/RM vs RM/R
|
||||||
// duality). Do this too, so as to be able to compare generated machine
|
// duality). Do this too, so as to be able to compare generated machine
|
||||||
// code easily.
|
// code easily.
|
||||||
@@ -604,11 +621,12 @@ pub(crate) fn emit(
|
|||||||
reg_g.to_reg(),
|
reg_g.to_reg(),
|
||||||
rex,
|
rex,
|
||||||
);
|
);
|
||||||
// NB: if this is ever extended to handle byte size ops, be sure to retain
|
|
||||||
// redundant REX prefixes.
|
|
||||||
}
|
}
|
||||||
|
|
||||||
RegMemImm::Mem { addr } => {
|
RegMemImm::Mem { addr } => {
|
||||||
|
if is_8bit {
|
||||||
|
rex.always_emit_if_8bit_needed(int_reg_enc(reg_g.to_reg()));
|
||||||
|
}
|
||||||
// Here we revert to the "normal" G-E ordering.
|
// Here we revert to the "normal" G-E ordering.
|
||||||
let amode = addr.finalize(state, sink);
|
let amode = addr.finalize(state, sink);
|
||||||
emit_std_reg_mem(
|
emit_std_reg_mem(
|
||||||
@@ -625,6 +643,7 @@ pub(crate) fn emit(
|
|||||||
}
|
}
|
||||||
|
|
||||||
RegMemImm::Imm { simm32 } => {
|
RegMemImm::Imm { simm32 } => {
|
||||||
|
assert!(!is_8bit);
|
||||||
let use_imm8 = low8_will_sign_extend_to_32(*simm32);
|
let use_imm8 = low8_will_sign_extend_to_32(*simm32);
|
||||||
let opcode = if use_imm8 { 0x83 } else { 0x81 };
|
let opcode = if use_imm8 { 0x83 } else { 0x81 };
|
||||||
// And also here we use the "normal" G-E ordering.
|
// And also here we use the "normal" G-E ordering.
|
||||||
@@ -685,8 +704,13 @@ pub(crate) fn emit(
|
|||||||
}
|
}
|
||||||
|
|
||||||
Inst::Not { size, src } => {
|
Inst::Not { size, src } => {
|
||||||
|
let src = int_reg_enc(src.to_reg());
|
||||||
let (opcode, prefix, rex_flags) = match size {
|
let (opcode, prefix, rex_flags) = match size {
|
||||||
1 => (0xF6, LegacyPrefixes::None, RexFlags::clear_w()),
|
1 => (
|
||||||
|
0xF6,
|
||||||
|
LegacyPrefixes::None,
|
||||||
|
*RexFlags::clear_w().always_emit_if_8bit_needed(src),
|
||||||
|
),
|
||||||
2 => (0xF7, LegacyPrefixes::_66, RexFlags::clear_w()),
|
2 => (0xF7, LegacyPrefixes::_66, RexFlags::clear_w()),
|
||||||
4 => (0xF7, LegacyPrefixes::None, RexFlags::clear_w()),
|
4 => (0xF7, LegacyPrefixes::None, RexFlags::clear_w()),
|
||||||
8 => (0xF7, LegacyPrefixes::None, RexFlags::set_w()),
|
8 => (0xF7, LegacyPrefixes::None, RexFlags::set_w()),
|
||||||
@@ -694,13 +718,17 @@ pub(crate) fn emit(
|
|||||||
};
|
};
|
||||||
|
|
||||||
let subopcode = 2;
|
let subopcode = 2;
|
||||||
let src = int_reg_enc(src.to_reg());
|
|
||||||
emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, src, rex_flags)
|
emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, src, rex_flags)
|
||||||
}
|
}
|
||||||
|
|
||||||
Inst::Neg { size, src } => {
|
Inst::Neg { size, src } => {
|
||||||
|
let src = int_reg_enc(src.to_reg());
|
||||||
let (opcode, prefix, rex_flags) = match size {
|
let (opcode, prefix, rex_flags) = match size {
|
||||||
1 => (0xF6, LegacyPrefixes::None, RexFlags::clear_w()),
|
1 => (
|
||||||
|
0xF6,
|
||||||
|
LegacyPrefixes::None,
|
||||||
|
*RexFlags::clear_w().always_emit_if_8bit_needed(src),
|
||||||
|
),
|
||||||
2 => (0xF7, LegacyPrefixes::_66, RexFlags::clear_w()),
|
2 => (0xF7, LegacyPrefixes::_66, RexFlags::clear_w()),
|
||||||
4 => (0xF7, LegacyPrefixes::None, RexFlags::clear_w()),
|
4 => (0xF7, LegacyPrefixes::None, RexFlags::clear_w()),
|
||||||
8 => (0xF7, LegacyPrefixes::None, RexFlags::set_w()),
|
8 => (0xF7, LegacyPrefixes::None, RexFlags::set_w()),
|
||||||
@@ -708,7 +736,6 @@ pub(crate) fn emit(
|
|||||||
};
|
};
|
||||||
|
|
||||||
let subopcode = 3;
|
let subopcode = 3;
|
||||||
let src = int_reg_enc(src.to_reg());
|
|
||||||
emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, src, rex_flags)
|
emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, src, rex_flags)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -717,7 +744,7 @@ pub(crate) fn emit(
|
|||||||
signed,
|
signed,
|
||||||
divisor,
|
divisor,
|
||||||
} => {
|
} => {
|
||||||
let (opcode, prefix, rex_flags) = match size {
|
let (opcode, prefix, mut rex_flags) = match size {
|
||||||
1 => (0xF6, LegacyPrefixes::None, RexFlags::clear_w()),
|
1 => (0xF6, LegacyPrefixes::None, RexFlags::clear_w()),
|
||||||
2 => (0xF7, LegacyPrefixes::_66, RexFlags::clear_w()),
|
2 => (0xF7, LegacyPrefixes::_66, RexFlags::clear_w()),
|
||||||
4 => (0xF7, LegacyPrefixes::None, RexFlags::clear_w()),
|
4 => (0xF7, LegacyPrefixes::None, RexFlags::clear_w()),
|
||||||
@@ -732,6 +759,9 @@ pub(crate) fn emit(
|
|||||||
match divisor {
|
match divisor {
|
||||||
RegMem::Reg { reg } => {
|
RegMem::Reg { reg } => {
|
||||||
let src = int_reg_enc(*reg);
|
let src = int_reg_enc(*reg);
|
||||||
|
if *size == 1 {
|
||||||
|
rex_flags.always_emit_if_8bit_needed(src);
|
||||||
|
}
|
||||||
emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, src, rex_flags)
|
emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, src, rex_flags)
|
||||||
}
|
}
|
||||||
RegMem::Mem { addr: src } => {
|
RegMem::Mem { addr: src } => {
|
||||||
@@ -987,9 +1017,7 @@ pub(crate) fn emit(
|
|||||||
ExtMode::BL | ExtMode::BQ => {
|
ExtMode::BL | ExtMode::BQ => {
|
||||||
// A redundant REX prefix must be emitted for certain register inputs.
|
// A redundant REX prefix must be emitted for certain register inputs.
|
||||||
let enc_src = int_reg_enc(*src);
|
let enc_src = int_reg_enc(*src);
|
||||||
if enc_src >= 4 && enc_src <= 7 {
|
rex_flags.always_emit_if_8bit_needed(enc_src);
|
||||||
rex_flags.always_emit();
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
_ => {}
|
_ => {}
|
||||||
}
|
}
|
||||||
@@ -1084,9 +1112,7 @@ pub(crate) fn emit(
|
|||||||
ExtMode::BL | ExtMode::BQ => {
|
ExtMode::BL | ExtMode::BQ => {
|
||||||
// A redundant REX prefix must be emitted for certain register inputs.
|
// A redundant REX prefix must be emitted for certain register inputs.
|
||||||
let enc_src = int_reg_enc(*src);
|
let enc_src = int_reg_enc(*src);
|
||||||
if enc_src >= 4 && enc_src <= 7 {
|
rex_flags.always_emit_if_8bit_needed(enc_src);
|
||||||
rex_flags.always_emit();
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
_ => {}
|
_ => {}
|
||||||
}
|
}
|
||||||
@@ -1130,9 +1156,7 @@ pub(crate) fn emit(
|
|||||||
let mut rex = RexFlags::clear_w();
|
let mut rex = RexFlags::clear_w();
|
||||||
|
|
||||||
let enc_src = int_reg_enc(*src);
|
let enc_src = int_reg_enc(*src);
|
||||||
if enc_src >= 4 && enc_src <= 7 {
|
rex.always_emit_if_8bit_needed(enc_src);
|
||||||
rex.always_emit();
|
|
||||||
};
|
|
||||||
|
|
||||||
// MOV r8, r/m8 is (REX.W==0) 88 /r
|
// MOV r8, r/m8 is (REX.W==0) 88 /r
|
||||||
emit_std_reg_mem(
|
emit_std_reg_mem(
|
||||||
@@ -1215,7 +1239,11 @@ pub(crate) fn emit(
|
|||||||
match num_bits {
|
match num_bits {
|
||||||
None => {
|
None => {
|
||||||
let (opcode, prefix, rex_flags) = match size {
|
let (opcode, prefix, rex_flags) = match size {
|
||||||
1 => (0xD2, LegacyPrefixes::None, RexFlags::clear_w()),
|
1 => (
|
||||||
|
0xD2,
|
||||||
|
LegacyPrefixes::None,
|
||||||
|
*RexFlags::clear_w().always_emit_if_8bit_needed(enc_dst),
|
||||||
|
),
|
||||||
2 => (0xD3, LegacyPrefixes::_66, RexFlags::clear_w()),
|
2 => (0xD3, LegacyPrefixes::_66, RexFlags::clear_w()),
|
||||||
4 => (0xD3, LegacyPrefixes::None, RexFlags::clear_w()),
|
4 => (0xD3, LegacyPrefixes::None, RexFlags::clear_w()),
|
||||||
8 => (0xD3, LegacyPrefixes::None, RexFlags::set_w()),
|
8 => (0xD3, LegacyPrefixes::None, RexFlags::set_w()),
|
||||||
@@ -1231,7 +1259,11 @@ pub(crate) fn emit(
|
|||||||
|
|
||||||
Some(num_bits) => {
|
Some(num_bits) => {
|
||||||
let (opcode, prefix, rex_flags) = match size {
|
let (opcode, prefix, rex_flags) = match size {
|
||||||
1 => (0xC0, LegacyPrefixes::None, RexFlags::clear_w()),
|
1 => (
|
||||||
|
0xC0,
|
||||||
|
LegacyPrefixes::None,
|
||||||
|
*RexFlags::clear_w().always_emit_if_8bit_needed(enc_dst),
|
||||||
|
),
|
||||||
2 => (0xC1, LegacyPrefixes::_66, RexFlags::clear_w()),
|
2 => (0xC1, LegacyPrefixes::_66, RexFlags::clear_w()),
|
||||||
4 => (0xC1, LegacyPrefixes::None, RexFlags::clear_w()),
|
4 => (0xC1, LegacyPrefixes::None, RexFlags::clear_w()),
|
||||||
8 => (0xC1, LegacyPrefixes::None, RexFlags::set_w()),
|
8 => (0xC1, LegacyPrefixes::None, RexFlags::set_w()),
|
||||||
@@ -1330,9 +1362,7 @@ pub(crate) fn emit(
|
|||||||
let mut rex = RexFlags::clear_w();
|
let mut rex = RexFlags::clear_w();
|
||||||
// Here, a redundant REX prefix changes the meaning of the instruction.
|
// Here, a redundant REX prefix changes the meaning of the instruction.
|
||||||
let enc_g = int_reg_enc(*reg_g);
|
let enc_g = int_reg_enc(*reg_g);
|
||||||
if enc_g >= 4 && enc_g <= 7 {
|
rex.always_emit_if_8bit_needed(enc_g);
|
||||||
rex.always_emit();
|
|
||||||
}
|
|
||||||
rex
|
rex
|
||||||
}
|
}
|
||||||
_ => panic!("x64::Inst::Cmp_RMI_R::emit: unreachable"),
|
_ => panic!("x64::Inst::Cmp_RMI_R::emit: unreachable"),
|
||||||
@@ -1343,9 +1373,7 @@ pub(crate) fn emit(
|
|||||||
if *size == 1 {
|
if *size == 1 {
|
||||||
// Check whether the E register forces the use of a redundant REX.
|
// Check whether the E register forces the use of a redundant REX.
|
||||||
let enc_e = int_reg_enc(*reg_e);
|
let enc_e = int_reg_enc(*reg_e);
|
||||||
if enc_e >= 4 && enc_e <= 7 {
|
rex.always_emit_if_8bit_needed(enc_e);
|
||||||
rex.always_emit();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Use the swapped operands encoding for CMP, to stay consistent with the output of
|
// Use the swapped operands encoding for CMP, to stay consistent with the output of
|
||||||
@@ -2761,9 +2789,7 @@ pub(crate) fn emit(
|
|||||||
types::I8 => {
|
types::I8 => {
|
||||||
let mut rex_flags = RexFlags::clear_w();
|
let mut rex_flags = RexFlags::clear_w();
|
||||||
let enc_src = int_reg_enc(*src);
|
let enc_src = int_reg_enc(*src);
|
||||||
if enc_src >= 4 && enc_src <= 7 {
|
rex_flags.always_emit_if_8bit_needed(enc_src);
|
||||||
rex_flags.always_emit();
|
|
||||||
};
|
|
||||||
(LegacyPrefixes::_F0, rex_flags, 0x0FB0)
|
(LegacyPrefixes::_F0, rex_flags, 0x0FB0)
|
||||||
}
|
}
|
||||||
types::I16 => (LegacyPrefixes::_66F0, RexFlags::clear_w(), 0x0FB1),
|
types::I16 => (LegacyPrefixes::_66F0, RexFlags::clear_w(), 0x0FB1),
|
||||||
|
|||||||
@@ -1025,6 +1025,56 @@ fn test_x64_emit() {
|
|||||||
"4C09FA",
|
"4C09FA",
|
||||||
"orq %r15, %rdx",
|
"orq %r15, %rdx",
|
||||||
));
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::alu_rmi_r(false, AluRmiROpcode::And8, RegMemImm::reg(r15), w_rdx),
|
||||||
|
"4420FA",
|
||||||
|
"andb %r15b, %dl",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::alu_rmi_r(false, AluRmiROpcode::And8, RegMemImm::reg(rax), w_rsi),
|
||||||
|
"4020C6",
|
||||||
|
"andb %al, %sil",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::alu_rmi_r(false, AluRmiROpcode::And8, RegMemImm::reg(rax), w_rbx),
|
||||||
|
"20C3",
|
||||||
|
"andb %al, %bl",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::alu_rmi_r(
|
||||||
|
false,
|
||||||
|
AluRmiROpcode::And8,
|
||||||
|
RegMemImm::mem(Amode::imm_reg(0, rax)),
|
||||||
|
w_rbx,
|
||||||
|
),
|
||||||
|
"2218",
|
||||||
|
"andb 0(%rax), %bl",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::alu_rmi_r(false, AluRmiROpcode::Or8, RegMemImm::reg(r15), w_rdx),
|
||||||
|
"4408FA",
|
||||||
|
"orb %r15b, %dl",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::alu_rmi_r(false, AluRmiROpcode::Or8, RegMemImm::reg(rax), w_rsi),
|
||||||
|
"4008C6",
|
||||||
|
"orb %al, %sil",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::alu_rmi_r(false, AluRmiROpcode::Or8, RegMemImm::reg(rax), w_rbx),
|
||||||
|
"08C3",
|
||||||
|
"orb %al, %bl",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::alu_rmi_r(
|
||||||
|
false,
|
||||||
|
AluRmiROpcode::Or8,
|
||||||
|
RegMemImm::mem(Amode::imm_reg(0, rax)),
|
||||||
|
w_rbx,
|
||||||
|
),
|
||||||
|
"0A18",
|
||||||
|
"orb 0(%rax), %bl",
|
||||||
|
));
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::alu_rmi_r(true, AluRmiROpcode::Xor, RegMemImm::reg(r15), w_rdx),
|
Inst::alu_rmi_r(true, AluRmiROpcode::Xor, RegMemImm::reg(r15), w_rdx),
|
||||||
"4C31FA",
|
"4C31FA",
|
||||||
@@ -1193,6 +1243,16 @@ fn test_x64_emit() {
|
|||||||
"66F7D7",
|
"66F7D7",
|
||||||
"notw %di",
|
"notw %di",
|
||||||
));
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::not(1, Writable::from_reg(regs::rdi())),
|
||||||
|
"40F6D7",
|
||||||
|
"notb %dil",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::not(1, Writable::from_reg(regs::rax())),
|
||||||
|
"F6D0",
|
||||||
|
"notb %al",
|
||||||
|
));
|
||||||
|
|
||||||
// ========================================================
|
// ========================================================
|
||||||
// Neg
|
// Neg
|
||||||
@@ -1216,6 +1276,16 @@ fn test_x64_emit() {
|
|||||||
"66F7DF",
|
"66F7DF",
|
||||||
"negw %di",
|
"negw %di",
|
||||||
));
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::neg(1, Writable::from_reg(regs::rdi())),
|
||||||
|
"40F6DF",
|
||||||
|
"negb %dil",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::neg(1, Writable::from_reg(regs::rax())),
|
||||||
|
"F6D8",
|
||||||
|
"negb %al",
|
||||||
|
));
|
||||||
|
|
||||||
// ========================================================
|
// ========================================================
|
||||||
// Div
|
// Div
|
||||||
@@ -1239,6 +1309,16 @@ fn test_x64_emit() {
|
|||||||
"48F7F7",
|
"48F7F7",
|
||||||
"div %rdi",
|
"div %rdi",
|
||||||
));
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::div(1, false, RegMem::reg(regs::rax())),
|
||||||
|
"F6F0",
|
||||||
|
"div %al",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::div(1, false, RegMem::reg(regs::rsi())),
|
||||||
|
"40F6F6",
|
||||||
|
"div %sil",
|
||||||
|
));
|
||||||
|
|
||||||
// ========================================================
|
// ========================================================
|
||||||
// MulHi
|
// MulHi
|
||||||
@@ -2352,9 +2432,14 @@ fn test_x64_emit() {
|
|||||||
));
|
));
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::shift_r(1, ShiftKind::RotateRight, None, w_rsi),
|
Inst::shift_r(1, ShiftKind::RotateRight, None, w_rsi),
|
||||||
"D2CE",
|
"40D2CE",
|
||||||
"rorb %cl, %sil",
|
"rorb %cl, %sil",
|
||||||
));
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::shift_r(1, ShiftKind::RotateRight, None, w_rax),
|
||||||
|
"D2C8",
|
||||||
|
"rorb %cl, %al",
|
||||||
|
));
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::shift_r(1, ShiftKind::RotateRight, Some(5), w_r15),
|
Inst::shift_r(1, ShiftKind::RotateRight, Some(5), w_r15),
|
||||||
"41C0CF05",
|
"41C0CF05",
|
||||||
|
|||||||
@@ -1243,6 +1243,14 @@ impl PrettyPrint for Inst {
|
|||||||
(if is_64 { "q" } else { "l" }).to_string()
|
(if is_64 { "q" } else { "l" }).to_string()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn suffix_lqb(is_64: bool, is_8: bool) -> String {
|
||||||
|
match (is_64, is_8) {
|
||||||
|
(_, true) => "b".to_string(),
|
||||||
|
(true, false) => "q".to_string(),
|
||||||
|
(false, false) => "l".to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn size_lq(is_64: bool) -> u8 {
|
fn size_lq(is_64: bool) -> u8 {
|
||||||
if is_64 {
|
if is_64 {
|
||||||
8
|
8
|
||||||
@@ -1251,6 +1259,16 @@ impl PrettyPrint for Inst {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn size_lqb(is_64: bool, is_8: bool) -> u8 {
|
||||||
|
if is_8 {
|
||||||
|
1
|
||||||
|
} else if is_64 {
|
||||||
|
8
|
||||||
|
} else {
|
||||||
|
4
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn suffix_bwlq(size: u8) -> String {
|
fn suffix_bwlq(size: u8) -> String {
|
||||||
match size {
|
match size {
|
||||||
1 => "b".to_string(),
|
1 => "b".to_string(),
|
||||||
@@ -1271,9 +1289,9 @@ impl PrettyPrint for Inst {
|
|||||||
dst,
|
dst,
|
||||||
} => format!(
|
} => format!(
|
||||||
"{} {}, {}",
|
"{} {}, {}",
|
||||||
ljustify2(op.to_string(), suffix_lq(*is_64)),
|
ljustify2(op.to_string(), suffix_lqb(*is_64, op.is_8bit())),
|
||||||
src.show_rru_sized(mb_rru, size_lq(*is_64)),
|
src.show_rru_sized(mb_rru, size_lqb(*is_64, op.is_8bit())),
|
||||||
show_ireg_sized(dst.to_reg(), mb_rru, size_lq(*is_64)),
|
show_ireg_sized(dst.to_reg(), mb_rru, size_lqb(*is_64, op.is_8bit())),
|
||||||
),
|
),
|
||||||
|
|
||||||
Inst::UnaryRmR { src, dst, op, size } => format!(
|
Inst::UnaryRmR { src, dst, op, size } => format!(
|
||||||
@@ -2065,6 +2083,17 @@ impl Amode {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Offset the amode by a fixed offset.
|
||||||
|
pub(crate) fn offset(&self, offset: u32) -> Self {
|
||||||
|
let mut ret = self.clone();
|
||||||
|
match &mut ret {
|
||||||
|
&mut Amode::ImmReg { ref mut simm32, .. } => *simm32 += offset,
|
||||||
|
&mut Amode::ImmRegRegShift { ref mut simm32, .. } => *simm32 += offset,
|
||||||
|
_ => panic!("Cannot offset amode: {:?}", self),
|
||||||
|
}
|
||||||
|
ret
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RegMemImm {
|
impl RegMemImm {
|
||||||
@@ -2548,77 +2577,88 @@ impl MachInst for Inst {
|
|||||||
ty: Type,
|
ty: Type,
|
||||||
mut alloc_tmp: F,
|
mut alloc_tmp: F,
|
||||||
) -> SmallVec<[Self; 4]> {
|
) -> SmallVec<[Self; 4]> {
|
||||||
// We don't support 128-bit constants.
|
|
||||||
assert!(value <= u64::MAX as u128);
|
|
||||||
let mut ret = SmallVec::new();
|
let mut ret = SmallVec::new();
|
||||||
let to_reg = to_regs
|
if ty == types::I128 {
|
||||||
.only_reg()
|
ret.push(Inst::imm(
|
||||||
.expect("multi-reg values not supported on x64");
|
OperandSize::Size64,
|
||||||
if ty == types::F32 {
|
value as u64,
|
||||||
if value == 0 {
|
to_regs.regs()[0],
|
||||||
ret.push(Inst::xmm_rm_r(
|
));
|
||||||
SseOpcode::Xorps,
|
ret.push(Inst::imm(
|
||||||
RegMem::reg(to_reg.to_reg()),
|
OperandSize::Size64,
|
||||||
to_reg,
|
(value >> 64) as u64,
|
||||||
));
|
to_regs.regs()[1],
|
||||||
} else {
|
));
|
||||||
let tmp = alloc_tmp(types::I32);
|
|
||||||
ret.push(Inst::imm(OperandSize::Size32, value as u64, tmp));
|
|
||||||
|
|
||||||
ret.push(Inst::gpr_to_xmm(
|
|
||||||
SseOpcode::Movd,
|
|
||||||
RegMem::reg(tmp.to_reg()),
|
|
||||||
OperandSize::Size32,
|
|
||||||
to_reg,
|
|
||||||
));
|
|
||||||
}
|
|
||||||
} else if ty == types::F64 {
|
|
||||||
if value == 0 {
|
|
||||||
ret.push(Inst::xmm_rm_r(
|
|
||||||
SseOpcode::Xorpd,
|
|
||||||
RegMem::reg(to_reg.to_reg()),
|
|
||||||
to_reg,
|
|
||||||
));
|
|
||||||
} else {
|
|
||||||
let tmp = alloc_tmp(types::I64);
|
|
||||||
ret.push(Inst::imm(OperandSize::Size64, value as u64, tmp));
|
|
||||||
|
|
||||||
ret.push(Inst::gpr_to_xmm(
|
|
||||||
SseOpcode::Movq,
|
|
||||||
RegMem::reg(tmp.to_reg()),
|
|
||||||
OperandSize::Size64,
|
|
||||||
to_reg,
|
|
||||||
));
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
// Must be an integer type.
|
let to_reg = to_regs
|
||||||
debug_assert!(
|
.only_reg()
|
||||||
ty == types::B1
|
.expect("multi-reg values not supported on x64");
|
||||||
|| ty == types::I8
|
if ty == types::F32 {
|
||||||
|| ty == types::B8
|
if value == 0 {
|
||||||
|| ty == types::I16
|
ret.push(Inst::xmm_rm_r(
|
||||||
|| ty == types::B16
|
SseOpcode::Xorps,
|
||||||
|| ty == types::I32
|
RegMem::reg(to_reg.to_reg()),
|
||||||
|| ty == types::B32
|
to_reg,
|
||||||
|| ty == types::I64
|
));
|
||||||
|| ty == types::B64
|
} else {
|
||||||
|| ty == types::R32
|
let tmp = alloc_tmp(types::I32);
|
||||||
|| ty == types::R64
|
ret.push(Inst::imm(OperandSize::Size32, value as u64, tmp));
|
||||||
);
|
|
||||||
if value == 0 {
|
ret.push(Inst::gpr_to_xmm(
|
||||||
ret.push(Inst::alu_rmi_r(
|
SseOpcode::Movd,
|
||||||
ty == types::I64,
|
RegMem::reg(tmp.to_reg()),
|
||||||
AluRmiROpcode::Xor,
|
OperandSize::Size32,
|
||||||
RegMemImm::reg(to_reg.to_reg()),
|
to_reg,
|
||||||
to_reg,
|
));
|
||||||
));
|
}
|
||||||
|
} else if ty == types::F64 {
|
||||||
|
if value == 0 {
|
||||||
|
ret.push(Inst::xmm_rm_r(
|
||||||
|
SseOpcode::Xorpd,
|
||||||
|
RegMem::reg(to_reg.to_reg()),
|
||||||
|
to_reg,
|
||||||
|
));
|
||||||
|
} else {
|
||||||
|
let tmp = alloc_tmp(types::I64);
|
||||||
|
ret.push(Inst::imm(OperandSize::Size64, value as u64, tmp));
|
||||||
|
|
||||||
|
ret.push(Inst::gpr_to_xmm(
|
||||||
|
SseOpcode::Movq,
|
||||||
|
RegMem::reg(tmp.to_reg()),
|
||||||
|
OperandSize::Size64,
|
||||||
|
to_reg,
|
||||||
|
));
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
let value = value as u64;
|
// Must be an integer type.
|
||||||
ret.push(Inst::imm(
|
debug_assert!(
|
||||||
OperandSize::from_bytes(ty.bytes()),
|
ty == types::B1
|
||||||
value.into(),
|
|| ty == types::I8
|
||||||
to_reg,
|
|| ty == types::B8
|
||||||
));
|
|| ty == types::I16
|
||||||
|
|| ty == types::B16
|
||||||
|
|| ty == types::I32
|
||||||
|
|| ty == types::B32
|
||||||
|
|| ty == types::I64
|
||||||
|
|| ty == types::B64
|
||||||
|
|| ty == types::R32
|
||||||
|
|| ty == types::R64
|
||||||
|
);
|
||||||
|
if value == 0 {
|
||||||
|
ret.push(Inst::alu_rmi_r(
|
||||||
|
ty == types::I64,
|
||||||
|
AluRmiROpcode::Xor,
|
||||||
|
RegMemImm::reg(to_reg.to_reg()),
|
||||||
|
to_reg,
|
||||||
|
));
|
||||||
|
} else {
|
||||||
|
let value = value as u64;
|
||||||
|
ret.push(Inst::imm(
|
||||||
|
OperandSize::from_bytes(ty.bytes()),
|
||||||
|
value.into(),
|
||||||
|
to_reg,
|
||||||
|
));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ret
|
ret
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
27
cranelift/filetests/filetests/isa/x64/bitops-i128-run.clif
Normal file
27
cranelift/filetests/filetests/isa/x64/bitops-i128-run.clif
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
test run
|
||||||
|
target x86_64
|
||||||
|
feature "experimental_x64"
|
||||||
|
|
||||||
|
function %ctz(i64, i64) -> i8 {
|
||||||
|
block0(v0: i64, v1: i64):
|
||||||
|
v2 = iconcat v0, v1
|
||||||
|
v3 = ctz.i128 v2
|
||||||
|
v4 = ireduce.i8 v3
|
||||||
|
return v4
|
||||||
|
}
|
||||||
|
; run: %ctz(0x00000000_00000000, 0x00000001_00000000) == 96
|
||||||
|
; run: %ctz(0x00000000_00010000, 0x00000001_00000000) == 16
|
||||||
|
; run: %ctz(0x00000000_00010000, 0x00000000_00000000) == 16
|
||||||
|
; run: %ctz(0x00000000_00000000, 0x00000000_00000000) == 128
|
||||||
|
|
||||||
|
function %clz(i64, i64) -> i8 {
|
||||||
|
block0(v0: i64, v1: i64):
|
||||||
|
v2 = iconcat v0, v1
|
||||||
|
v3 = clz.i128 v2
|
||||||
|
v4 = ireduce.i8 v3
|
||||||
|
return v4
|
||||||
|
}
|
||||||
|
; run: %clz(0x00000000_00000000, 0x00000001_00000000) == 31
|
||||||
|
; run: %clz(0x00000000_00010000, 0x00000001_00000000) == 31
|
||||||
|
; run: %clz(0x00000000_00010000, 0x00000000_00000000) == 111
|
||||||
|
; run: %clz(0x00000000_00000000, 0x00000000_00000000) == 128
|
||||||
47
cranelift/filetests/filetests/isa/x64/bitrev-i128-run.clif
Normal file
47
cranelift/filetests/filetests/isa/x64/bitrev-i128-run.clif
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
test run
|
||||||
|
target x86_64
|
||||||
|
feature "experimental_x64"
|
||||||
|
|
||||||
|
function %reverse_bits_zero() -> b1 {
|
||||||
|
block0:
|
||||||
|
v0 = iconst.i64 0
|
||||||
|
v1 = iconcat v0, v0
|
||||||
|
v2 = bitrev.i128 v1
|
||||||
|
v3 = icmp eq v2, v1
|
||||||
|
return v3
|
||||||
|
}
|
||||||
|
; run
|
||||||
|
|
||||||
|
function %reverse_bits_one() -> b1 {
|
||||||
|
block0:
|
||||||
|
v0 = iconst.i64 0
|
||||||
|
v1 = iconst.i64 1
|
||||||
|
v2 = iconcat v0, v1
|
||||||
|
|
||||||
|
v3 = bitrev.i128 v2
|
||||||
|
|
||||||
|
v4 = iconst.i64 0x8000_0000_0000_0000
|
||||||
|
v5 = iconst.i64 0
|
||||||
|
v6 = iconcat v4, v5
|
||||||
|
|
||||||
|
v7 = icmp eq v3, v6
|
||||||
|
return v7
|
||||||
|
}
|
||||||
|
; run
|
||||||
|
|
||||||
|
function %reverse_bits() -> b1 {
|
||||||
|
block0:
|
||||||
|
v0 = iconst.i64 0x06AD_8667_69EC_41BA
|
||||||
|
v1 = iconst.i64 0x6C83_D81A_6E28_83AB
|
||||||
|
v2 = iconcat v0, v1
|
||||||
|
|
||||||
|
v3 = bitrev.i128 v2
|
||||||
|
|
||||||
|
v4 = iconst.i64 0xD5C11476581BC136
|
||||||
|
v5 = iconst.i64 0x5D823796E661B560
|
||||||
|
v6 = iconcat v4, v5
|
||||||
|
|
||||||
|
v7 = icmp eq v3, v6
|
||||||
|
return v7
|
||||||
|
}
|
||||||
|
; run
|
||||||
26
cranelift/filetests/filetests/isa/x64/floating-point.clif
Normal file
26
cranelift/filetests/filetests/isa/x64/floating-point.clif
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
test compile
|
||||||
|
target x86_64
|
||||||
|
feature "experimental_x64"
|
||||||
|
|
||||||
|
function %f(f64) -> f64 {
|
||||||
|
block0(v0: f64):
|
||||||
|
v1 = fabs.f64 v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
; check: movabsq $$9223372036854775807, %rsi
|
||||||
|
; nextln: movq %rsi, %xmm1
|
||||||
|
; nextln: andpd %xmm0, %xmm1
|
||||||
|
; nextln: movaps %xmm1, %xmm0
|
||||||
|
|
||||||
|
|
||||||
|
function %f(i64) -> f64 {
|
||||||
|
block0(v0: i64):
|
||||||
|
v1 = load.f64 v0
|
||||||
|
v2 = fabs.f64 v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
; check: movsd 0(%rdi), %xmm0
|
||||||
|
; nextln: movabsq $$9223372036854775807, %rsi
|
||||||
|
; nextln: movq %rsi, %xmm1
|
||||||
|
; nextln: andpd %xmm0, %xmm1
|
||||||
|
; nextln: movaps %xmm1, %xmm0
|
||||||
1082
cranelift/filetests/filetests/isa/x64/i128.clif
Normal file
1082
cranelift/filetests/filetests/isa/x64/i128.clif
Normal file
File diff suppressed because it is too large
Load Diff
29
cranelift/filetests/filetests/isa/x64/select-i128.clif
Normal file
29
cranelift/filetests/filetests/isa/x64/select-i128.clif
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
test compile
|
||||||
|
target x86_64
|
||||||
|
feature "experimental_x64"
|
||||||
|
|
||||||
|
function %f0(i32, i128, i128) -> i128 {
|
||||||
|
; check: pushq %rbp
|
||||||
|
; nextln: movq %rsp, %rbp
|
||||||
|
|
||||||
|
block0(v0: i32, v1: i128, v2: i128):
|
||||||
|
|
||||||
|
v3 = iconst.i32 42
|
||||||
|
v4 = icmp.i32 eq v0, v3
|
||||||
|
; nextln: movl $$42, %eax
|
||||||
|
; nextln: cmpl %eax, %edi
|
||||||
|
|
||||||
|
v5 = select.i128 v4, v1, v2
|
||||||
|
; nextln: cmovzq %rsi, %rcx
|
||||||
|
; nextln: cmovzq %rdx, %r8
|
||||||
|
|
||||||
|
return v5
|
||||||
|
; nextln: movq %rcx, %rax
|
||||||
|
; nextln: movq %r8, %rdx
|
||||||
|
|
||||||
|
; nextln: movq %rbp, %rsp
|
||||||
|
; nextln: popq %rbp
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
106
cranelift/filetests/filetests/isa/x64/shift-i128-run.clif
Normal file
106
cranelift/filetests/filetests/isa/x64/shift-i128-run.clif
Normal file
@@ -0,0 +1,106 @@
|
|||||||
|
test run
|
||||||
|
target x86_64
|
||||||
|
feature "experimental_x64"
|
||||||
|
|
||||||
|
function %ishl1() -> b1 {
|
||||||
|
block0:
|
||||||
|
v0 = iconst.i64 0x01010101_01010101
|
||||||
|
v1 = iconcat v0, v0
|
||||||
|
v2 = iconst.i32 2
|
||||||
|
v3 = ishl.i128 v1, v2
|
||||||
|
v4 = iconst.i64 0x04040404_04040404
|
||||||
|
v5 = iconcat v4, v4
|
||||||
|
v6 = icmp eq v3, v5
|
||||||
|
return v6
|
||||||
|
}
|
||||||
|
; run
|
||||||
|
|
||||||
|
function %ishl2() -> b1 {
|
||||||
|
block0:
|
||||||
|
v0 = iconst.i64 0x01010101_01010101
|
||||||
|
v1 = iconst.i64 0x01010101_01010101
|
||||||
|
v2 = iconcat v0, v1
|
||||||
|
v3 = iconst.i32 9
|
||||||
|
v4 = ishl.i128 v2, v3
|
||||||
|
v5 = iconst.i64 0x02020202_02020200
|
||||||
|
v6 = iconst.i64 0x02020202_02020202
|
||||||
|
v7 = iconcat v5, v6
|
||||||
|
v8 = icmp eq v4, v7
|
||||||
|
return v8
|
||||||
|
}
|
||||||
|
; run
|
||||||
|
|
||||||
|
function %ishl3() -> b1 {
|
||||||
|
block0:
|
||||||
|
v0 = iconst.i64 0x01010101_01010101
|
||||||
|
v1 = iconst.i64 0xffffffff_ffffffff
|
||||||
|
v2 = iconcat v0, v1
|
||||||
|
v3 = iconst.i32 66
|
||||||
|
v4 = ishl.i128 v2, v3
|
||||||
|
v5 = iconst.i64 0x00000000_00000000
|
||||||
|
v6 = iconst.i64 0x04040404_04040404
|
||||||
|
v7 = iconcat v5, v6
|
||||||
|
v8 = icmp eq v4, v7
|
||||||
|
return v8
|
||||||
|
}
|
||||||
|
; run
|
||||||
|
|
||||||
|
function %ushr1() -> b1 {
|
||||||
|
block0:
|
||||||
|
v0 = iconst.i64 0x01010101_01010101
|
||||||
|
v1 = iconst.i64 0x01010101_01010101
|
||||||
|
v2 = iconcat v0, v1
|
||||||
|
v3 = iconst.i32 2
|
||||||
|
v4 = ushr.i128 v2, v3
|
||||||
|
v5 = iconst.i64 0x40404040_40404040
|
||||||
|
v6 = iconst.i64 0x00404040_40404040
|
||||||
|
v7 = iconcat v5, v6
|
||||||
|
v8 = icmp eq v4, v7
|
||||||
|
return v8
|
||||||
|
}
|
||||||
|
; run
|
||||||
|
|
||||||
|
function %ushr2() -> b1 {
|
||||||
|
block0:
|
||||||
|
v0 = iconst.i64 0x01010101_01010101
|
||||||
|
v1 = iconst.i64 0x01010101_01010101
|
||||||
|
v2 = iconcat v0, v1
|
||||||
|
v3 = iconst.i32 66
|
||||||
|
v4 = ushr.i128 v2, v3
|
||||||
|
v5 = iconst.i64 0x00404040_40404040
|
||||||
|
v6 = iconst.i64 0x00000000_00000000
|
||||||
|
v7 = iconcat v5, v6
|
||||||
|
v8 = icmp eq v4, v7
|
||||||
|
return v8
|
||||||
|
}
|
||||||
|
; run
|
||||||
|
|
||||||
|
function %sshr1() -> b1 {
|
||||||
|
block0:
|
||||||
|
v0 = iconst.i64 0x01010101_01010101
|
||||||
|
v1 = iconst.i64 0x81010101_01010101
|
||||||
|
v2 = iconcat v0, v1
|
||||||
|
v3 = iconst.i32 2
|
||||||
|
v4 = sshr.i128 v2, v3
|
||||||
|
v5 = iconst.i64 0x40404040_40404040
|
||||||
|
v6 = iconst.i64 0xe0404040_40404040
|
||||||
|
v7 = iconcat v5, v6
|
||||||
|
v8 = icmp eq v4, v7
|
||||||
|
return v8
|
||||||
|
}
|
||||||
|
; run
|
||||||
|
|
||||||
|
function %sshr2() -> b1 {
|
||||||
|
block0:
|
||||||
|
v0 = iconst.i64 0x12345678_9abcdef0
|
||||||
|
v1 = iconst.i64 0x80101010_10101010
|
||||||
|
v2 = iconcat v0, v1
|
||||||
|
v3 = iconst.i32 66
|
||||||
|
v4 = sshr.i128 v2, v3
|
||||||
|
v5 = iconst.i64 0xe0040404_04040404
|
||||||
|
v6 = iconst.i64 0xffffffff_ffffffff
|
||||||
|
v7 = iconcat v5, v6
|
||||||
|
v8 = icmp eq v4, v7
|
||||||
|
return v8
|
||||||
|
}
|
||||||
|
; run
|
||||||
Reference in New Issue
Block a user