x64: Implement rotl and rotr for small integers

This commit is contained in:
bjorn3
2020-08-23 10:49:44 +02:00
committed by Benjamin Bouvier
parent 4251a950ba
commit 067255ef45
4 changed files with 105 additions and 78 deletions

View File

@@ -1133,7 +1133,7 @@ pub(crate) fn emit(
} }
Inst::Shift_R { Inst::Shift_R {
is_64, size,
kind, kind,
num_bits, num_bits,
dst, dst,
@@ -1147,25 +1147,39 @@ pub(crate) fn emit(
ShiftKind::ShiftRightArithmetic => 7, ShiftKind::ShiftRightArithmetic => 7,
}; };
let rex = if *is_64 {
RexFlags::set_w()
} else {
RexFlags::clear_w()
};
match num_bits { match num_bits {
None => { None => {
let (opcode, prefix, rex_flags) = match size {
1 => (0xD2, LegacyPrefixes::None, RexFlags::clear_w()),
2 => (0xD3, LegacyPrefixes::_66, RexFlags::clear_w()),
4 => (0xD3, LegacyPrefixes::None, RexFlags::clear_w()),
8 => (0xD3, LegacyPrefixes::None, RexFlags::set_w()),
_ => unreachable!("{}", size),
};
// SHL/SHR/SAR %cl, reg8 is (REX.W==0) D2 /subopcode
// SHL/SHR/SAR %cl, reg16 is 66 (REX.W==0) D3 /subopcode
// SHL/SHR/SAR %cl, reg32 is (REX.W==0) D3 /subopcode // SHL/SHR/SAR %cl, reg32 is (REX.W==0) D3 /subopcode
// SHL/SHR/SAR %cl, reg64 is (REX.W==1) D3 /subopcode // SHL/SHR/SAR %cl, reg64 is (REX.W==1) D3 /subopcode
emit_std_enc_enc(sink, LegacyPrefixes::None, 0xD3, 1, subopcode, enc_dst, rex); emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, enc_dst, rex_flags);
} }
Some(num_bits) => { Some(num_bits) => {
let (opcode, prefix, rex_flags) = match size {
1 => (0xC0, LegacyPrefixes::None, RexFlags::clear_w()),
2 => (0xC1, LegacyPrefixes::_66, RexFlags::clear_w()),
4 => (0xC1, LegacyPrefixes::None, RexFlags::clear_w()),
8 => (0xC1, LegacyPrefixes::None, RexFlags::set_w()),
_ => unreachable!("{}", size),
};
// SHL/SHR/SAR $ib, reg8 is (REX.W==0) C0 /subopcode
// SHL/SHR/SAR $ib, reg16 is 66 (REX.W==0) C1 /subopcode
// SHL/SHR/SAR $ib, reg32 is (REX.W==0) C1 /subopcode ib // SHL/SHR/SAR $ib, reg32 is (REX.W==0) C1 /subopcode ib
// SHL/SHR/SAR $ib, reg64 is (REX.W==1) C1 /subopcode ib // SHL/SHR/SAR $ib, reg64 is (REX.W==1) C1 /subopcode ib
// When the shift amount is 1, there's an even shorter encoding, but we don't // When the shift amount is 1, there's an even shorter encoding, but we don't
// bother with that nicety here. // bother with that nicety here.
emit_std_enc_enc(sink, LegacyPrefixes::None, 0xC1, 1, subopcode, enc_dst, rex); emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, enc_dst, rex_flags);
sink.put1(*num_bits); sink.put1(*num_bits);
} }
} }
@@ -2054,12 +2068,7 @@ pub(crate) fn emit(
inst.emit(sink, flags, state); inst.emit(sink, flags, state);
// tmp_gpr1 := src >> 1 // tmp_gpr1 := src >> 1
let inst = Inst::shift_r( let inst = Inst::shift_r(8, ShiftKind::ShiftRightLogical, Some(1), *tmp_gpr1);
/*is_64*/ true,
ShiftKind::ShiftRightLogical,
Some(1),
*tmp_gpr1,
);
inst.emit(sink, flags, state); inst.emit(sink, flags, state);
let inst = Inst::gen_move(*tmp_gpr2, src.to_reg(), types::I64); let inst = Inst::gen_move(*tmp_gpr2, src.to_reg(), types::I64);

View File

@@ -2365,130 +2365,150 @@ fn test_x64_emit() {
// ======================================================== // ========================================================
// Shift_R // Shift_R
insns.push(( insns.push((
Inst::shift_r(false, ShiftKind::ShiftLeft, None, w_rdi), Inst::shift_r(4, ShiftKind::ShiftLeft, None, w_rdi),
"D3E7", "D3E7",
"shll %cl, %edi", "shll %cl, %edi",
)); ));
insns.push(( insns.push((
Inst::shift_r(false, ShiftKind::ShiftLeft, None, w_r12), Inst::shift_r(4, ShiftKind::ShiftLeft, None, w_r12),
"41D3E4", "41D3E4",
"shll %cl, %r12d", "shll %cl, %r12d",
)); ));
insns.push(( insns.push((
Inst::shift_r(false, ShiftKind::ShiftLeft, Some(2), w_r8), Inst::shift_r(4, ShiftKind::ShiftLeft, Some(2), w_r8),
"41C1E002", "41C1E002",
"shll $2, %r8d", "shll $2, %r8d",
)); ));
insns.push(( insns.push((
Inst::shift_r(false, ShiftKind::ShiftLeft, Some(31), w_r13), Inst::shift_r(4, ShiftKind::ShiftLeft, Some(31), w_r13),
"41C1E51F", "41C1E51F",
"shll $31, %r13d", "shll $31, %r13d",
)); ));
insns.push(( insns.push((
Inst::shift_r(true, ShiftKind::ShiftLeft, None, w_r13), Inst::shift_r(8, ShiftKind::ShiftLeft, None, w_r13),
"49D3E5", "49D3E5",
"shlq %cl, %r13", "shlq %cl, %r13",
)); ));
insns.push(( insns.push((
Inst::shift_r(true, ShiftKind::ShiftLeft, None, w_rdi), Inst::shift_r(8, ShiftKind::ShiftLeft, None, w_rdi),
"48D3E7", "48D3E7",
"shlq %cl, %rdi", "shlq %cl, %rdi",
)); ));
insns.push(( insns.push((
Inst::shift_r(true, ShiftKind::ShiftLeft, Some(2), w_r8), Inst::shift_r(8, ShiftKind::ShiftLeft, Some(2), w_r8),
"49C1E002", "49C1E002",
"shlq $2, %r8", "shlq $2, %r8",
)); ));
insns.push(( insns.push((
Inst::shift_r(true, ShiftKind::ShiftLeft, Some(3), w_rbx), Inst::shift_r(8, ShiftKind::ShiftLeft, Some(3), w_rbx),
"48C1E303", "48C1E303",
"shlq $3, %rbx", "shlq $3, %rbx",
)); ));
insns.push(( insns.push((
Inst::shift_r(true, ShiftKind::ShiftLeft, Some(63), w_r13), Inst::shift_r(8, ShiftKind::ShiftLeft, Some(63), w_r13),
"49C1E53F", "49C1E53F",
"shlq $63, %r13", "shlq $63, %r13",
)); ));
insns.push(( insns.push((
Inst::shift_r(false, ShiftKind::ShiftRightLogical, None, w_rdi), Inst::shift_r(4, ShiftKind::ShiftRightLogical, None, w_rdi),
"D3EF", "D3EF",
"shrl %cl, %edi", "shrl %cl, %edi",
)); ));
insns.push(( insns.push((
Inst::shift_r(false, ShiftKind::ShiftRightLogical, Some(2), w_r8), Inst::shift_r(4, ShiftKind::ShiftRightLogical, Some(2), w_r8),
"41C1E802", "41C1E802",
"shrl $2, %r8d", "shrl $2, %r8d",
)); ));
insns.push(( insns.push((
Inst::shift_r(false, ShiftKind::ShiftRightLogical, Some(31), w_r13), Inst::shift_r(4, ShiftKind::ShiftRightLogical, Some(31), w_r13),
"41C1ED1F", "41C1ED1F",
"shrl $31, %r13d", "shrl $31, %r13d",
)); ));
insns.push(( insns.push((
Inst::shift_r(true, ShiftKind::ShiftRightLogical, None, w_rdi), Inst::shift_r(8, ShiftKind::ShiftRightLogical, None, w_rdi),
"48D3EF", "48D3EF",
"shrq %cl, %rdi", "shrq %cl, %rdi",
)); ));
insns.push(( insns.push((
Inst::shift_r(true, ShiftKind::ShiftRightLogical, Some(2), w_r8), Inst::shift_r(8, ShiftKind::ShiftRightLogical, Some(2), w_r8),
"49C1E802", "49C1E802",
"shrq $2, %r8", "shrq $2, %r8",
)); ));
insns.push(( insns.push((
Inst::shift_r(true, ShiftKind::ShiftRightLogical, Some(63), w_r13), Inst::shift_r(8, ShiftKind::ShiftRightLogical, Some(63), w_r13),
"49C1ED3F", "49C1ED3F",
"shrq $63, %r13", "shrq $63, %r13",
)); ));
insns.push(( insns.push((
Inst::shift_r(false, ShiftKind::ShiftRightArithmetic, None, w_rdi), Inst::shift_r(4, ShiftKind::ShiftRightArithmetic, None, w_rdi),
"D3FF", "D3FF",
"sarl %cl, %edi", "sarl %cl, %edi",
)); ));
insns.push(( insns.push((
Inst::shift_r(false, ShiftKind::ShiftRightArithmetic, Some(2), w_r8), Inst::shift_r(4, ShiftKind::ShiftRightArithmetic, Some(2), w_r8),
"41C1F802", "41C1F802",
"sarl $2, %r8d", "sarl $2, %r8d",
)); ));
insns.push(( insns.push((
Inst::shift_r(false, ShiftKind::ShiftRightArithmetic, Some(31), w_r13), Inst::shift_r(4, ShiftKind::ShiftRightArithmetic, Some(31), w_r13),
"41C1FD1F", "41C1FD1F",
"sarl $31, %r13d", "sarl $31, %r13d",
)); ));
insns.push(( insns.push((
Inst::shift_r(true, ShiftKind::ShiftRightArithmetic, None, w_rdi), Inst::shift_r(8, ShiftKind::ShiftRightArithmetic, None, w_rdi),
"48D3FF", "48D3FF",
"sarq %cl, %rdi", "sarq %cl, %rdi",
)); ));
insns.push(( insns.push((
Inst::shift_r(true, ShiftKind::ShiftRightArithmetic, Some(2), w_r8), Inst::shift_r(8, ShiftKind::ShiftRightArithmetic, Some(2), w_r8),
"49C1F802", "49C1F802",
"sarq $2, %r8", "sarq $2, %r8",
)); ));
insns.push(( insns.push((
Inst::shift_r(true, ShiftKind::ShiftRightArithmetic, Some(63), w_r13), Inst::shift_r(8, ShiftKind::ShiftRightArithmetic, Some(63), w_r13),
"49C1FD3F", "49C1FD3F",
"sarq $63, %r13", "sarq $63, %r13",
)); ));
insns.push(( insns.push((
Inst::shift_r(true, ShiftKind::RotateLeft, None, w_r8), Inst::shift_r(8, ShiftKind::RotateLeft, None, w_r8),
"49D3C0", "49D3C0",
"rolq %cl, %r8", "rolq %cl, %r8",
)); ));
insns.push(( insns.push((
Inst::shift_r(false, ShiftKind::RotateLeft, Some(3), w_r9), Inst::shift_r(4, ShiftKind::RotateLeft, Some(3), w_r9),
"41C1C103", "41C1C103",
"roll $3, %r9d", "roll $3, %r9d",
)); ));
insns.push(( insns.push((
Inst::shift_r(false, ShiftKind::RotateRight, None, w_rsi), Inst::shift_r(4, ShiftKind::RotateRight, None, w_rsi),
"D3CE", "D3CE",
"rorl %cl, %esi", "rorl %cl, %esi",
)); ));
insns.push(( insns.push((
Inst::shift_r(true, ShiftKind::RotateRight, Some(5), w_r15), Inst::shift_r(8, ShiftKind::RotateRight, Some(5), w_r15),
"49C1CF05", "49C1CF05",
"rorq $5, %r15", "rorq $5, %r15",
)); ));
insns.push((
Inst::shift_r(1, ShiftKind::RotateRight, None, w_rsi),
"D2CE",
"rorb %cl, %sil",
));
insns.push((
Inst::shift_r(1, ShiftKind::RotateRight, Some(5), w_r15),
"41C0CF05",
"rorb $5, %r15b",
));
insns.push((
Inst::shift_r(2, ShiftKind::RotateRight, None, w_rsi),
"66D3CE",
"rorw %cl, %si",
));
insns.push((
Inst::shift_r(2, ShiftKind::RotateRight, Some(5), w_r15),
"6641C1CF05",
"rorw $5, %r15w",
));
// ======================================================== // ========================================================
// CmpRMIR // CmpRMIR

View File

@@ -166,9 +166,9 @@ pub enum Inst {
srcloc: Option<SourceLoc>, srcloc: Option<SourceLoc>,
}, },
/// Arithmetic shifts: (shl shr sar) (l q) imm reg. /// Arithmetic shifts: (shl shr sar) (b w l q) imm reg.
Shift_R { Shift_R {
is_64: bool, size: u8, // 1, 2, 4 or 8
kind: ShiftKind, kind: ShiftKind,
/// shift count: Some(0 .. #bits-in-type - 1), or None to mean "%cl". /// shift count: Some(0 .. #bits-in-type - 1), or None to mean "%cl".
num_bits: Option<u8>, num_bits: Option<u8>,
@@ -892,19 +892,20 @@ impl Inst {
} }
pub(crate) fn shift_r( pub(crate) fn shift_r(
is_64: bool, size: u8,
kind: ShiftKind, kind: ShiftKind,
num_bits: Option<u8>, num_bits: Option<u8>,
dst: Writable<Reg>, dst: Writable<Reg>,
) -> Inst { ) -> Inst {
debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
debug_assert!(if let Some(num_bits) = num_bits { debug_assert!(if let Some(num_bits) = num_bits {
num_bits < if is_64 { 64 } else { 32 } num_bits < size * 8
} else { } else {
true true
}); });
debug_assert!(dst.to_reg().get_class() == RegClass::I64); debug_assert!(dst.to_reg().get_class() == RegClass::I64);
Inst::Shift_R { Inst::Shift_R {
is_64, size,
kind, kind,
num_bits, num_bits,
dst, dst,
@@ -1511,22 +1512,22 @@ impl ShowWithRRU for Inst {
), ),
Inst::Shift_R { Inst::Shift_R {
is_64, size,
kind, kind,
num_bits, num_bits,
dst, dst,
} => match num_bits { } => match num_bits {
None => format!( None => format!(
"{} %cl, {}", "{} %cl, {}",
ljustify2(kind.to_string(), suffixLQ(*is_64)), ljustify2(kind.to_string(), suffixBWLQ(*size)),
show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64)) show_ireg_sized(dst.to_reg(), mb_rru, *size)
), ),
Some(num_bits) => format!( Some(num_bits) => format!(
"{} ${}, {}", "{} ${}, {}",
ljustify2(kind.to_string(), suffixLQ(*is_64)), ljustify2(kind.to_string(), suffixBWLQ(*size)),
num_bits, num_bits,
show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64)) show_ireg_sized(dst.to_reg(), mb_rru, *size)
), ),
}, },

View File

@@ -634,15 +634,23 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let dst_ty = ctx.output_ty(insn, 0); let dst_ty = ctx.output_ty(insn, 0);
debug_assert_eq!(ctx.input_ty(insn, 0), dst_ty); debug_assert_eq!(ctx.input_ty(insn, 0), dst_ty);
let lhs = match dst_ty { let (size, lhs) = match dst_ty {
types::I8 | types::I16 => match op { types::I8 | types::I16 => match op {
Opcode::Ishl => input_to_reg(ctx, inputs[0]), Opcode::Ishl => (4, input_to_reg(ctx, inputs[0])),
Opcode::Ushr => extend_input_to_reg(ctx, inputs[0], ExtSpec::ZeroExtendTo32), Opcode::Ushr => (
Opcode::Sshr => extend_input_to_reg(ctx, inputs[0], ExtSpec::SignExtendTo32), 4,
Opcode::Rotl | Opcode::Rotr => unimplemented!("rotl/rotr.i8/i16"), extend_input_to_reg(ctx, inputs[0], ExtSpec::ZeroExtendTo32),
),
Opcode::Sshr => (
4,
extend_input_to_reg(ctx, inputs[0], ExtSpec::SignExtendTo32),
),
Opcode::Rotl | Opcode::Rotr => {
(dst_ty.bytes() as u8, input_to_reg(ctx, inputs[0]))
}
_ => unreachable!(), _ => unreachable!(),
}, },
types::I32 | types::I64 => input_to_reg(ctx, inputs[0]), types::I32 | types::I64 => (dst_ty.bytes() as u8, input_to_reg(ctx, inputs[0])),
_ => unreachable!("{}", dst_ty), _ => unreachable!("{}", dst_ty),
}; };
@@ -669,13 +677,12 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
_ => unreachable!(), _ => unreachable!(),
}; };
let is_64 = dst_ty == types::I64;
let w_rcx = Writable::from_reg(regs::rcx()); let w_rcx = Writable::from_reg(regs::rcx());
ctx.emit(Inst::mov_r_r(true, lhs, dst)); ctx.emit(Inst::mov_r_r(true, lhs, dst));
if count.is_none() { if count.is_none() {
ctx.emit(Inst::mov_r_r(true, rhs.unwrap(), w_rcx)); ctx.emit(Inst::mov_r_r(true, rhs.unwrap(), w_rcx));
} }
ctx.emit(Inst::shift_r(is_64, shift_kind, count, dst)); ctx.emit(Inst::shift_r(size, shift_kind, count, dst));
} }
Opcode::Ineg => { Opcode::Ineg => {
@@ -828,7 +835,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
// shr $1, tmp1 // shr $1, tmp1
ctx.emit(Inst::shift_r( ctx.emit(Inst::shift_r(
is_64, 8,
ShiftKind::ShiftRightLogical, ShiftKind::ShiftRightLogical,
Some(1), Some(1),
tmp1, tmp1,
@@ -858,7 +865,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
// shr $1, tmp1 // shr $1, tmp1
ctx.emit(Inst::shift_r( ctx.emit(Inst::shift_r(
is_64, 8,
ShiftKind::ShiftRightLogical, ShiftKind::ShiftRightLogical,
Some(1), Some(1),
tmp1, tmp1,
@@ -882,7 +889,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
// shr $1, tmp1 // shr $1, tmp1
ctx.emit(Inst::shift_r( ctx.emit(Inst::shift_r(
is_64, 8,
ShiftKind::ShiftRightLogical, ShiftKind::ShiftRightLogical,
Some(1), Some(1),
tmp1, tmp1,
@@ -908,12 +915,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
ctx.emit(Inst::mov64_rm_r(RegMem::reg(tmp2.to_reg()), dst, None)); ctx.emit(Inst::mov64_rm_r(RegMem::reg(tmp2.to_reg()), dst, None));
// shr $4, dst // shr $4, dst
ctx.emit(Inst::shift_r( ctx.emit(Inst::shift_r(8, ShiftKind::ShiftRightLogical, Some(4), dst));
is_64,
ShiftKind::ShiftRightLogical,
Some(4),
dst,
));
// add tmp2, dst // add tmp2, dst
ctx.emit(Inst::alu_rmi_r( ctx.emit(Inst::alu_rmi_r(
@@ -947,7 +949,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
// shr $56, dst // shr $56, dst
ctx.emit(Inst::shift_r( ctx.emit(Inst::shift_r(
is_64, 8,
ShiftKind::ShiftRightLogical, ShiftKind::ShiftRightLogical,
Some(56), Some(56),
dst, dst,
@@ -964,7 +966,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
// shr $1, tmp1 // shr $1, tmp1
ctx.emit(Inst::shift_r( ctx.emit(Inst::shift_r(
is_64, 4,
ShiftKind::ShiftRightLogical, ShiftKind::ShiftRightLogical,
Some(1), Some(1),
tmp1, tmp1,
@@ -991,7 +993,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
// shr $1, tmp1 // shr $1, tmp1
ctx.emit(Inst::shift_r( ctx.emit(Inst::shift_r(
is_64, 4,
ShiftKind::ShiftRightLogical, ShiftKind::ShiftRightLogical,
Some(1), Some(1),
tmp1, tmp1,
@@ -1015,7 +1017,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
// shr $1, tmp1 // shr $1, tmp1
ctx.emit(Inst::shift_r( ctx.emit(Inst::shift_r(
is_64, 4,
ShiftKind::ShiftRightLogical, ShiftKind::ShiftRightLogical,
Some(1), Some(1),
tmp1, tmp1,
@@ -1041,12 +1043,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
ctx.emit(Inst::mov64_rm_r(RegMem::reg(tmp2.to_reg()), dst, None)); ctx.emit(Inst::mov64_rm_r(RegMem::reg(tmp2.to_reg()), dst, None));
// shr $4, dst // shr $4, dst
ctx.emit(Inst::shift_r( ctx.emit(Inst::shift_r(4, ShiftKind::ShiftRightLogical, Some(4), dst));
is_64,
ShiftKind::ShiftRightLogical,
Some(4),
dst,
));
// add tmp2, dst // add tmp2, dst
ctx.emit(Inst::alu_rmi_r( ctx.emit(Inst::alu_rmi_r(
@@ -1074,7 +1071,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
// shr $24, dst // shr $24, dst
ctx.emit(Inst::shift_r( ctx.emit(Inst::shift_r(
is_64, 4,
ShiftKind::ShiftRightLogical, ShiftKind::ShiftRightLogical,
Some(24), Some(24),
dst, dst,