Merge pull request #2539 from cfallin/x64-i128

Support for I128 operations in x64 backend.
This commit is contained in:
Chris Fallin
2021-01-14 14:45:03 -08:00
committed by GitHub
12 changed files with 3213 additions and 675 deletions

View File

@@ -138,42 +138,62 @@ impl ABIMachineSpec for X64ABIMachineSpec {
),
}
let intreg = in_int_reg(param.value_type);
let vecreg = in_vec_reg(param.value_type);
debug_assert!(intreg || vecreg);
debug_assert!(!(intreg && vecreg));
let (next_reg, candidate) = if intreg {
let candidate = match args_or_rets {
ArgsOrRets::Args => get_intreg_for_arg_systemv(&call_conv, next_gpr),
ArgsOrRets::Rets => get_intreg_for_retval_systemv(&call_conv, next_gpr, i),
};
debug_assert!(candidate
.map(|r| r.get_class() == RegClass::I64)
.unwrap_or(true));
(&mut next_gpr, candidate)
} else {
let candidate = match args_or_rets {
ArgsOrRets::Args => get_fltreg_for_arg_systemv(&call_conv, next_vreg),
ArgsOrRets::Rets => get_fltreg_for_retval_systemv(&call_conv, next_vreg, i),
};
debug_assert!(candidate
.map(|r| r.get_class() == RegClass::V128)
.unwrap_or(true));
(&mut next_vreg, candidate)
};
if let Some(param) = try_fill_baldrdash_reg(call_conv, param) {
assert!(intreg);
ret.push(param);
} else if let Some(reg) = candidate {
continue;
}
// Find regclass(es) of the register(s) used to store a value of this type.
let (rcs, _) = Inst::rc_for_type(param.value_type)?;
let intreg = rcs[0] == RegClass::I64;
let num_regs = rcs.len();
assert!(num_regs <= 2);
if num_regs == 2 {
assert_eq!(rcs[0], rcs[1]);
}
let mut regs: SmallVec<[RealReg; 2]> = smallvec![];
for j in 0..num_regs {
let nextreg = if intreg {
match args_or_rets {
ArgsOrRets::Args => get_intreg_for_arg_systemv(&call_conv, next_gpr + j),
ArgsOrRets::Rets => {
get_intreg_for_retval_systemv(&call_conv, next_gpr + j, i + j)
}
}
} else {
match args_or_rets {
ArgsOrRets::Args => get_fltreg_for_arg_systemv(&call_conv, next_vreg + j),
ArgsOrRets::Rets => {
get_fltreg_for_retval_systemv(&call_conv, next_vreg + j, i + j)
}
}
};
if let Some(reg) = nextreg {
regs.push(reg.to_real_reg());
} else {
regs.clear();
break;
}
}
if regs.len() > 0 {
let regs = match num_regs {
1 => ValueRegs::one(regs[0]),
2 => ValueRegs::two(regs[0], regs[1]),
_ => panic!("More than two registers unexpected"),
};
ret.push(ABIArg::Reg(
ValueRegs::one(reg.to_real_reg()),
regs,
param.value_type,
param.extension,
param.purpose,
));
*next_reg += 1;
if intreg {
next_gpr += num_regs;
} else {
next_vreg += num_regs;
}
} else {
// Compute size. Every arg takes a minimum slot of 8 bytes. (16-byte
// stack alignment happens separately after all args.)
@@ -658,31 +678,6 @@ impl From<StackAMode> for SyntheticAmode {
}
}
fn in_int_reg(ty: types::Type) -> bool {
match ty {
types::I8
| types::I16
| types::I32
| types::I64
| types::B1
| types::B8
| types::B16
| types::B32
| types::B64
| types::R64 => true,
types::R32 => panic!("unexpected 32-bits refs on x64!"),
_ => false,
}
}
fn in_vec_reg(ty: types::Type) -> bool {
match ty {
types::F32 | types::F64 => true,
_ if ty.is_vector() => true,
_ => false,
}
}
fn get_intreg_for_arg_systemv(call_conv: &CallConv, idx: usize) -> Option<Reg> {
match call_conv {
CallConv::Fast

View File

@@ -346,23 +346,35 @@ impl PrettyPrintSized for RegMem {
#[derive(Copy, Clone, PartialEq)]
pub enum AluRmiROpcode {
Add,
Adc,
Sub,
Sbb,
And,
Or,
Xor,
/// The signless, non-extending (N x N -> N, for N in {32,64}) variant.
Mul,
/// 8-bit form of And. Handled separately as we don't have full 8-bit op
/// support (we just use wider instructions). Used only with some sequences
/// with SETcc.
And8,
/// 8-bit form of Or.
Or8,
}
impl fmt::Debug for AluRmiROpcode {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
let name = match self {
AluRmiROpcode::Add => "add",
AluRmiROpcode::Adc => "adc",
AluRmiROpcode::Sub => "sub",
AluRmiROpcode::Sbb => "sbb",
AluRmiROpcode::And => "and",
AluRmiROpcode::Or => "or",
AluRmiROpcode::Xor => "xor",
AluRmiROpcode::Mul => "imul",
AluRmiROpcode::And8 => "and",
AluRmiROpcode::Or8 => "or",
};
write!(fmt, "{}", name)
}
@@ -374,6 +386,16 @@ impl fmt::Display for AluRmiROpcode {
}
}
impl AluRmiROpcode {
/// Is this a special-cased 8-bit ALU op?
pub fn is_8bit(self) -> bool {
match self {
AluRmiROpcode::And8 | AluRmiROpcode::Or8 => true,
_ => false,
}
}
}
#[derive(Clone, PartialEq)]
pub enum UnaryRmROpcode {
/// Bit-scan reverse.
@@ -1010,7 +1032,7 @@ impl fmt::Display for ExtMode {
}
/// These indicate the form of a scalar shift/rotate: left, signed right, unsigned right.
#[derive(Clone)]
#[derive(Clone, Copy)]
pub enum ShiftKind {
ShiftLeft,
/// Inserts zeros in the most significant bits.

View File

@@ -83,6 +83,14 @@ impl RexFlags {
self
}
#[inline(always)]
fn always_emit_if_8bit_needed(&mut self, reg: u8) -> &mut Self {
if reg >= 4 && reg <= 7 {
self.always_emit();
}
self
}
#[inline(always)]
fn must_clear_w(&self) -> bool {
(self.0 & 1) != 0
@@ -527,7 +535,7 @@ pub(crate) fn emit(
src,
dst: reg_g,
} => {
let rex = if *is_64 {
let mut rex = if *is_64 {
RexFlags::set_w()
} else {
RexFlags::clear_w()
@@ -581,17 +589,26 @@ pub(crate) fn emit(
}
}
} else {
let (opcode_r, opcode_m, subopcode_i) = match op {
AluRmiROpcode::Add => (0x01, 0x03, 0),
AluRmiROpcode::Sub => (0x29, 0x2B, 5),
AluRmiROpcode::And => (0x21, 0x23, 4),
AluRmiROpcode::Or => (0x09, 0x0B, 1),
AluRmiROpcode::Xor => (0x31, 0x33, 6),
let (opcode_r, opcode_m, subopcode_i, is_8bit) = match op {
AluRmiROpcode::Add => (0x01, 0x03, 0, false),
AluRmiROpcode::Adc => (0x11, 0x03, 0, false),
AluRmiROpcode::Sub => (0x29, 0x2B, 5, false),
AluRmiROpcode::Sbb => (0x19, 0x2B, 5, false),
AluRmiROpcode::And => (0x21, 0x23, 4, false),
AluRmiROpcode::Or => (0x09, 0x0B, 1, false),
AluRmiROpcode::Xor => (0x31, 0x33, 6, false),
AluRmiROpcode::And8 => (0x20, 0x22, 4, true),
AluRmiROpcode::Or8 => (0x08, 0x0A, 1, true),
AluRmiROpcode::Mul => panic!("unreachable"),
};
assert!(!(is_8bit && *is_64));
match src {
RegMemImm::Reg { reg: reg_e } => {
if is_8bit {
rex.always_emit_if_8bit_needed(int_reg_enc(*reg_e));
rex.always_emit_if_8bit_needed(int_reg_enc(reg_g.to_reg()));
}
// GCC/llvm use the swapped operand encoding (viz., the R/RM vs RM/R
// duality). Do this too, so as to be able to compare generated machine
// code easily.
@@ -604,11 +621,12 @@ pub(crate) fn emit(
reg_g.to_reg(),
rex,
);
// NB: if this is ever extended to handle byte size ops, be sure to retain
// redundant REX prefixes.
}
RegMemImm::Mem { addr } => {
if is_8bit {
rex.always_emit_if_8bit_needed(int_reg_enc(reg_g.to_reg()));
}
// Here we revert to the "normal" G-E ordering.
let amode = addr.finalize(state, sink);
emit_std_reg_mem(
@@ -625,6 +643,7 @@ pub(crate) fn emit(
}
RegMemImm::Imm { simm32 } => {
assert!(!is_8bit);
let use_imm8 = low8_will_sign_extend_to_32(*simm32);
let opcode = if use_imm8 { 0x83 } else { 0x81 };
// And also here we use the "normal" G-E ordering.
@@ -685,8 +704,13 @@ pub(crate) fn emit(
}
Inst::Not { size, src } => {
let src = int_reg_enc(src.to_reg());
let (opcode, prefix, rex_flags) = match size {
1 => (0xF6, LegacyPrefixes::None, RexFlags::clear_w()),
1 => (
0xF6,
LegacyPrefixes::None,
*RexFlags::clear_w().always_emit_if_8bit_needed(src),
),
2 => (0xF7, LegacyPrefixes::_66, RexFlags::clear_w()),
4 => (0xF7, LegacyPrefixes::None, RexFlags::clear_w()),
8 => (0xF7, LegacyPrefixes::None, RexFlags::set_w()),
@@ -694,13 +718,17 @@ pub(crate) fn emit(
};
let subopcode = 2;
let src = int_reg_enc(src.to_reg());
emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, src, rex_flags)
}
Inst::Neg { size, src } => {
let src = int_reg_enc(src.to_reg());
let (opcode, prefix, rex_flags) = match size {
1 => (0xF6, LegacyPrefixes::None, RexFlags::clear_w()),
1 => (
0xF6,
LegacyPrefixes::None,
*RexFlags::clear_w().always_emit_if_8bit_needed(src),
),
2 => (0xF7, LegacyPrefixes::_66, RexFlags::clear_w()),
4 => (0xF7, LegacyPrefixes::None, RexFlags::clear_w()),
8 => (0xF7, LegacyPrefixes::None, RexFlags::set_w()),
@@ -708,7 +736,6 @@ pub(crate) fn emit(
};
let subopcode = 3;
let src = int_reg_enc(src.to_reg());
emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, src, rex_flags)
}
@@ -717,7 +744,7 @@ pub(crate) fn emit(
signed,
divisor,
} => {
let (opcode, prefix, rex_flags) = match size {
let (opcode, prefix, mut rex_flags) = match size {
1 => (0xF6, LegacyPrefixes::None, RexFlags::clear_w()),
2 => (0xF7, LegacyPrefixes::_66, RexFlags::clear_w()),
4 => (0xF7, LegacyPrefixes::None, RexFlags::clear_w()),
@@ -732,6 +759,9 @@ pub(crate) fn emit(
match divisor {
RegMem::Reg { reg } => {
let src = int_reg_enc(*reg);
if *size == 1 {
rex_flags.always_emit_if_8bit_needed(src);
}
emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, src, rex_flags)
}
RegMem::Mem { addr: src } => {
@@ -987,9 +1017,7 @@ pub(crate) fn emit(
ExtMode::BL | ExtMode::BQ => {
// A redundant REX prefix must be emitted for certain register inputs.
let enc_src = int_reg_enc(*src);
if enc_src >= 4 && enc_src <= 7 {
rex_flags.always_emit();
};
rex_flags.always_emit_if_8bit_needed(enc_src);
}
_ => {}
}
@@ -1084,9 +1112,7 @@ pub(crate) fn emit(
ExtMode::BL | ExtMode::BQ => {
// A redundant REX prefix must be emitted for certain register inputs.
let enc_src = int_reg_enc(*src);
if enc_src >= 4 && enc_src <= 7 {
rex_flags.always_emit();
};
rex_flags.always_emit_if_8bit_needed(enc_src);
}
_ => {}
}
@@ -1130,9 +1156,7 @@ pub(crate) fn emit(
let mut rex = RexFlags::clear_w();
let enc_src = int_reg_enc(*src);
if enc_src >= 4 && enc_src <= 7 {
rex.always_emit();
};
rex.always_emit_if_8bit_needed(enc_src);
// MOV r8, r/m8 is (REX.W==0) 88 /r
emit_std_reg_mem(
@@ -1215,7 +1239,11 @@ pub(crate) fn emit(
match num_bits {
None => {
let (opcode, prefix, rex_flags) = match size {
1 => (0xD2, LegacyPrefixes::None, RexFlags::clear_w()),
1 => (
0xD2,
LegacyPrefixes::None,
*RexFlags::clear_w().always_emit_if_8bit_needed(enc_dst),
),
2 => (0xD3, LegacyPrefixes::_66, RexFlags::clear_w()),
4 => (0xD3, LegacyPrefixes::None, RexFlags::clear_w()),
8 => (0xD3, LegacyPrefixes::None, RexFlags::set_w()),
@@ -1231,7 +1259,11 @@ pub(crate) fn emit(
Some(num_bits) => {
let (opcode, prefix, rex_flags) = match size {
1 => (0xC0, LegacyPrefixes::None, RexFlags::clear_w()),
1 => (
0xC0,
LegacyPrefixes::None,
*RexFlags::clear_w().always_emit_if_8bit_needed(enc_dst),
),
2 => (0xC1, LegacyPrefixes::_66, RexFlags::clear_w()),
4 => (0xC1, LegacyPrefixes::None, RexFlags::clear_w()),
8 => (0xC1, LegacyPrefixes::None, RexFlags::set_w()),
@@ -1330,9 +1362,7 @@ pub(crate) fn emit(
let mut rex = RexFlags::clear_w();
// Here, a redundant REX prefix changes the meaning of the instruction.
let enc_g = int_reg_enc(*reg_g);
if enc_g >= 4 && enc_g <= 7 {
rex.always_emit();
}
rex.always_emit_if_8bit_needed(enc_g);
rex
}
_ => panic!("x64::Inst::Cmp_RMI_R::emit: unreachable"),
@@ -1343,9 +1373,7 @@ pub(crate) fn emit(
if *size == 1 {
// Check whether the E register forces the use of a redundant REX.
let enc_e = int_reg_enc(*reg_e);
if enc_e >= 4 && enc_e <= 7 {
rex.always_emit();
}
rex.always_emit_if_8bit_needed(enc_e);
}
// Use the swapped operands encoding for CMP, to stay consistent with the output of
@@ -2761,9 +2789,7 @@ pub(crate) fn emit(
types::I8 => {
let mut rex_flags = RexFlags::clear_w();
let enc_src = int_reg_enc(*src);
if enc_src >= 4 && enc_src <= 7 {
rex_flags.always_emit();
};
rex_flags.always_emit_if_8bit_needed(enc_src);
(LegacyPrefixes::_F0, rex_flags, 0x0FB0)
}
types::I16 => (LegacyPrefixes::_66F0, RexFlags::clear_w(), 0x0FB1),

View File

@@ -1025,6 +1025,56 @@ fn test_x64_emit() {
"4C09FA",
"orq %r15, %rdx",
));
insns.push((
Inst::alu_rmi_r(false, AluRmiROpcode::And8, RegMemImm::reg(r15), w_rdx),
"4420FA",
"andb %r15b, %dl",
));
insns.push((
Inst::alu_rmi_r(false, AluRmiROpcode::And8, RegMemImm::reg(rax), w_rsi),
"4020C6",
"andb %al, %sil",
));
insns.push((
Inst::alu_rmi_r(false, AluRmiROpcode::And8, RegMemImm::reg(rax), w_rbx),
"20C3",
"andb %al, %bl",
));
insns.push((
Inst::alu_rmi_r(
false,
AluRmiROpcode::And8,
RegMemImm::mem(Amode::imm_reg(0, rax)),
w_rbx,
),
"2218",
"andb 0(%rax), %bl",
));
insns.push((
Inst::alu_rmi_r(false, AluRmiROpcode::Or8, RegMemImm::reg(r15), w_rdx),
"4408FA",
"orb %r15b, %dl",
));
insns.push((
Inst::alu_rmi_r(false, AluRmiROpcode::Or8, RegMemImm::reg(rax), w_rsi),
"4008C6",
"orb %al, %sil",
));
insns.push((
Inst::alu_rmi_r(false, AluRmiROpcode::Or8, RegMemImm::reg(rax), w_rbx),
"08C3",
"orb %al, %bl",
));
insns.push((
Inst::alu_rmi_r(
false,
AluRmiROpcode::Or8,
RegMemImm::mem(Amode::imm_reg(0, rax)),
w_rbx,
),
"0A18",
"orb 0(%rax), %bl",
));
insns.push((
Inst::alu_rmi_r(true, AluRmiROpcode::Xor, RegMemImm::reg(r15), w_rdx),
"4C31FA",
@@ -1193,6 +1243,16 @@ fn test_x64_emit() {
"66F7D7",
"notw %di",
));
insns.push((
Inst::not(1, Writable::from_reg(regs::rdi())),
"40F6D7",
"notb %dil",
));
insns.push((
Inst::not(1, Writable::from_reg(regs::rax())),
"F6D0",
"notb %al",
));
// ========================================================
// Neg
@@ -1216,6 +1276,16 @@ fn test_x64_emit() {
"66F7DF",
"negw %di",
));
insns.push((
Inst::neg(1, Writable::from_reg(regs::rdi())),
"40F6DF",
"negb %dil",
));
insns.push((
Inst::neg(1, Writable::from_reg(regs::rax())),
"F6D8",
"negb %al",
));
// ========================================================
// Div
@@ -1239,6 +1309,16 @@ fn test_x64_emit() {
"48F7F7",
"div %rdi",
));
insns.push((
Inst::div(1, false, RegMem::reg(regs::rax())),
"F6F0",
"div %al",
));
insns.push((
Inst::div(1, false, RegMem::reg(regs::rsi())),
"40F6F6",
"div %sil",
));
// ========================================================
// MulHi
@@ -2352,9 +2432,14 @@ fn test_x64_emit() {
));
insns.push((
Inst::shift_r(1, ShiftKind::RotateRight, None, w_rsi),
"D2CE",
"40D2CE",
"rorb %cl, %sil",
));
insns.push((
Inst::shift_r(1, ShiftKind::RotateRight, None, w_rax),
"D2C8",
"rorb %cl, %al",
));
insns.push((
Inst::shift_r(1, ShiftKind::RotateRight, Some(5), w_r15),
"41C0CF05",

View File

@@ -1243,6 +1243,14 @@ impl PrettyPrint for Inst {
(if is_64 { "q" } else { "l" }).to_string()
}
fn suffix_lqb(is_64: bool, is_8: bool) -> String {
match (is_64, is_8) {
(_, true) => "b".to_string(),
(true, false) => "q".to_string(),
(false, false) => "l".to_string(),
}
}
fn size_lq(is_64: bool) -> u8 {
if is_64 {
8
@@ -1251,6 +1259,16 @@ impl PrettyPrint for Inst {
}
}
fn size_lqb(is_64: bool, is_8: bool) -> u8 {
if is_8 {
1
} else if is_64 {
8
} else {
4
}
}
fn suffix_bwlq(size: u8) -> String {
match size {
1 => "b".to_string(),
@@ -1271,9 +1289,9 @@ impl PrettyPrint for Inst {
dst,
} => format!(
"{} {}, {}",
ljustify2(op.to_string(), suffix_lq(*is_64)),
src.show_rru_sized(mb_rru, size_lq(*is_64)),
show_ireg_sized(dst.to_reg(), mb_rru, size_lq(*is_64)),
ljustify2(op.to_string(), suffix_lqb(*is_64, op.is_8bit())),
src.show_rru_sized(mb_rru, size_lqb(*is_64, op.is_8bit())),
show_ireg_sized(dst.to_reg(), mb_rru, size_lqb(*is_64, op.is_8bit())),
),
Inst::UnaryRmR { src, dst, op, size } => format!(
@@ -2065,6 +2083,17 @@ impl Amode {
}
}
}
/// Offset the amode by a fixed offset.
pub(crate) fn offset(&self, offset: u32) -> Self {
let mut ret = self.clone();
match &mut ret {
&mut Amode::ImmReg { ref mut simm32, .. } => *simm32 += offset,
&mut Amode::ImmRegRegShift { ref mut simm32, .. } => *simm32 += offset,
_ => panic!("Cannot offset amode: {:?}", self),
}
ret
}
}
impl RegMemImm {
@@ -2548,77 +2577,88 @@ impl MachInst for Inst {
ty: Type,
mut alloc_tmp: F,
) -> SmallVec<[Self; 4]> {
// We don't support 128-bit constants.
assert!(value <= u64::MAX as u128);
let mut ret = SmallVec::new();
let to_reg = to_regs
.only_reg()
.expect("multi-reg values not supported on x64");
if ty == types::F32 {
if value == 0 {
ret.push(Inst::xmm_rm_r(
SseOpcode::Xorps,
RegMem::reg(to_reg.to_reg()),
to_reg,
));
} else {
let tmp = alloc_tmp(types::I32);
ret.push(Inst::imm(OperandSize::Size32, value as u64, tmp));
ret.push(Inst::gpr_to_xmm(
SseOpcode::Movd,
RegMem::reg(tmp.to_reg()),
OperandSize::Size32,
to_reg,
));
}
} else if ty == types::F64 {
if value == 0 {
ret.push(Inst::xmm_rm_r(
SseOpcode::Xorpd,
RegMem::reg(to_reg.to_reg()),
to_reg,
));
} else {
let tmp = alloc_tmp(types::I64);
ret.push(Inst::imm(OperandSize::Size64, value as u64, tmp));
ret.push(Inst::gpr_to_xmm(
SseOpcode::Movq,
RegMem::reg(tmp.to_reg()),
OperandSize::Size64,
to_reg,
));
}
if ty == types::I128 {
ret.push(Inst::imm(
OperandSize::Size64,
value as u64,
to_regs.regs()[0],
));
ret.push(Inst::imm(
OperandSize::Size64,
(value >> 64) as u64,
to_regs.regs()[1],
));
} else {
// Must be an integer type.
debug_assert!(
ty == types::B1
|| ty == types::I8
|| ty == types::B8
|| ty == types::I16
|| ty == types::B16
|| ty == types::I32
|| ty == types::B32
|| ty == types::I64
|| ty == types::B64
|| ty == types::R32
|| ty == types::R64
);
if value == 0 {
ret.push(Inst::alu_rmi_r(
ty == types::I64,
AluRmiROpcode::Xor,
RegMemImm::reg(to_reg.to_reg()),
to_reg,
));
let to_reg = to_regs
.only_reg()
.expect("multi-reg values not supported on x64");
if ty == types::F32 {
if value == 0 {
ret.push(Inst::xmm_rm_r(
SseOpcode::Xorps,
RegMem::reg(to_reg.to_reg()),
to_reg,
));
} else {
let tmp = alloc_tmp(types::I32);
ret.push(Inst::imm(OperandSize::Size32, value as u64, tmp));
ret.push(Inst::gpr_to_xmm(
SseOpcode::Movd,
RegMem::reg(tmp.to_reg()),
OperandSize::Size32,
to_reg,
));
}
} else if ty == types::F64 {
if value == 0 {
ret.push(Inst::xmm_rm_r(
SseOpcode::Xorpd,
RegMem::reg(to_reg.to_reg()),
to_reg,
));
} else {
let tmp = alloc_tmp(types::I64);
ret.push(Inst::imm(OperandSize::Size64, value as u64, tmp));
ret.push(Inst::gpr_to_xmm(
SseOpcode::Movq,
RegMem::reg(tmp.to_reg()),
OperandSize::Size64,
to_reg,
));
}
} else {
let value = value as u64;
ret.push(Inst::imm(
OperandSize::from_bytes(ty.bytes()),
value.into(),
to_reg,
));
// Must be an integer type.
debug_assert!(
ty == types::B1
|| ty == types::I8
|| ty == types::B8
|| ty == types::I16
|| ty == types::B16
|| ty == types::I32
|| ty == types::B32
|| ty == types::I64
|| ty == types::B64
|| ty == types::R32
|| ty == types::R64
);
if value == 0 {
ret.push(Inst::alu_rmi_r(
ty == types::I64,
AluRmiROpcode::Xor,
RegMemImm::reg(to_reg.to_reg()),
to_reg,
));
} else {
let value = value as u64;
ret.push(Inst::imm(
OperandSize::from_bytes(ty.bytes()),
value.into(),
to_reg,
));
}
}
}
ret

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,27 @@
test run
target x86_64
feature "experimental_x64"
function %ctz(i64, i64) -> i8 {
block0(v0: i64, v1: i64):
v2 = iconcat v0, v1
v3 = ctz.i128 v2
v4 = ireduce.i8 v3
return v4
}
; run: %ctz(0x00000000_00000000, 0x00000001_00000000) == 96
; run: %ctz(0x00000000_00010000, 0x00000001_00000000) == 16
; run: %ctz(0x00000000_00010000, 0x00000000_00000000) == 16
; run: %ctz(0x00000000_00000000, 0x00000000_00000000) == 128
function %clz(i64, i64) -> i8 {
block0(v0: i64, v1: i64):
v2 = iconcat v0, v1
v3 = clz.i128 v2
v4 = ireduce.i8 v3
return v4
}
; run: %clz(0x00000000_00000000, 0x00000001_00000000) == 31
; run: %clz(0x00000000_00010000, 0x00000001_00000000) == 31
; run: %clz(0x00000000_00010000, 0x00000000_00000000) == 111
; run: %clz(0x00000000_00000000, 0x00000000_00000000) == 128

View File

@@ -0,0 +1,47 @@
test run
target x86_64
feature "experimental_x64"
function %reverse_bits_zero() -> b1 {
block0:
v0 = iconst.i64 0
v1 = iconcat v0, v0
v2 = bitrev.i128 v1
v3 = icmp eq v2, v1
return v3
}
; run
function %reverse_bits_one() -> b1 {
block0:
v0 = iconst.i64 0
v1 = iconst.i64 1
v2 = iconcat v0, v1
v3 = bitrev.i128 v2
v4 = iconst.i64 0x8000_0000_0000_0000
v5 = iconst.i64 0
v6 = iconcat v4, v5
v7 = icmp eq v3, v6
return v7
}
; run
function %reverse_bits() -> b1 {
block0:
v0 = iconst.i64 0x06AD_8667_69EC_41BA
v1 = iconst.i64 0x6C83_D81A_6E28_83AB
v2 = iconcat v0, v1
v3 = bitrev.i128 v2
v4 = iconst.i64 0xD5C11476581BC136
v5 = iconst.i64 0x5D823796E661B560
v6 = iconcat v4, v5
v7 = icmp eq v3, v6
return v7
}
; run

View File

@@ -0,0 +1,26 @@
test compile
target x86_64
feature "experimental_x64"
function %f(f64) -> f64 {
block0(v0: f64):
v1 = fabs.f64 v0
return v1
}
; check: movabsq $$9223372036854775807, %rsi
; nextln: movq %rsi, %xmm1
; nextln: andpd %xmm0, %xmm1
; nextln: movaps %xmm1, %xmm0
function %f(i64) -> f64 {
block0(v0: i64):
v1 = load.f64 v0
v2 = fabs.f64 v1
return v2
}
; check: movsd 0(%rdi), %xmm0
; nextln: movabsq $$9223372036854775807, %rsi
; nextln: movq %rsi, %xmm1
; nextln: andpd %xmm0, %xmm1
; nextln: movaps %xmm1, %xmm0

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,29 @@
test compile
target x86_64
feature "experimental_x64"
function %f0(i32, i128, i128) -> i128 {
; check: pushq %rbp
; nextln: movq %rsp, %rbp
block0(v0: i32, v1: i128, v2: i128):
v3 = iconst.i32 42
v4 = icmp.i32 eq v0, v3
; nextln: movl $$42, %eax
; nextln: cmpl %eax, %edi
v5 = select.i128 v4, v1, v2
; nextln: cmovzq %rsi, %rcx
; nextln: cmovzq %rdx, %r8
return v5
; nextln: movq %rcx, %rax
; nextln: movq %r8, %rdx
; nextln: movq %rbp, %rsp
; nextln: popq %rbp
; nextln: ret
}

View File

@@ -0,0 +1,106 @@
test run
target x86_64
feature "experimental_x64"
function %ishl1() -> b1 {
block0:
v0 = iconst.i64 0x01010101_01010101
v1 = iconcat v0, v0
v2 = iconst.i32 2
v3 = ishl.i128 v1, v2
v4 = iconst.i64 0x04040404_04040404
v5 = iconcat v4, v4
v6 = icmp eq v3, v5
return v6
}
; run
function %ishl2() -> b1 {
block0:
v0 = iconst.i64 0x01010101_01010101
v1 = iconst.i64 0x01010101_01010101
v2 = iconcat v0, v1
v3 = iconst.i32 9
v4 = ishl.i128 v2, v3
v5 = iconst.i64 0x02020202_02020200
v6 = iconst.i64 0x02020202_02020202
v7 = iconcat v5, v6
v8 = icmp eq v4, v7
return v8
}
; run
function %ishl3() -> b1 {
block0:
v0 = iconst.i64 0x01010101_01010101
v1 = iconst.i64 0xffffffff_ffffffff
v2 = iconcat v0, v1
v3 = iconst.i32 66
v4 = ishl.i128 v2, v3
v5 = iconst.i64 0x00000000_00000000
v6 = iconst.i64 0x04040404_04040404
v7 = iconcat v5, v6
v8 = icmp eq v4, v7
return v8
}
; run
function %ushr1() -> b1 {
block0:
v0 = iconst.i64 0x01010101_01010101
v1 = iconst.i64 0x01010101_01010101
v2 = iconcat v0, v1
v3 = iconst.i32 2
v4 = ushr.i128 v2, v3
v5 = iconst.i64 0x40404040_40404040
v6 = iconst.i64 0x00404040_40404040
v7 = iconcat v5, v6
v8 = icmp eq v4, v7
return v8
}
; run
function %ushr2() -> b1 {
block0:
v0 = iconst.i64 0x01010101_01010101
v1 = iconst.i64 0x01010101_01010101
v2 = iconcat v0, v1
v3 = iconst.i32 66
v4 = ushr.i128 v2, v3
v5 = iconst.i64 0x00404040_40404040
v6 = iconst.i64 0x00000000_00000000
v7 = iconcat v5, v6
v8 = icmp eq v4, v7
return v8
}
; run
function %sshr1() -> b1 {
block0:
v0 = iconst.i64 0x01010101_01010101
v1 = iconst.i64 0x81010101_01010101
v2 = iconcat v0, v1
v3 = iconst.i32 2
v4 = sshr.i128 v2, v3
v5 = iconst.i64 0x40404040_40404040
v6 = iconst.i64 0xe0404040_40404040
v7 = iconcat v5, v6
v8 = icmp eq v4, v7
return v8
}
; run
function %sshr2() -> b1 {
block0:
v0 = iconst.i64 0x12345678_9abcdef0
v1 = iconst.i64 0x80101010_10101010
v2 = iconcat v0, v1
v3 = iconst.i32 66
v4 = sshr.i128 v2, v3
v5 = iconst.i64 0xe0040404_04040404
v6 = iconst.i64 0xffffffff_ffffffff
v7 = iconcat v5, v6
v8 = icmp eq v4, v7
return v8
}
; run