Adds Bswap to the Cranelift IR. Implements the Bswap instruction in the x64 and aarch64 codegen backends. Cranelift users can now: ``` builder.ins().bswap(value) ``` to get a native byteswap instruction. * x64: implements the 32- and 64-bit bswap instruction, following the pattern set by similar unary instrutions (Neg and Not) - it only operates on a dst register, but is parameterized with both a src and dst which are expected to be the same register. As x64 bswap instruction is only for 32- or 64-bit registers, the 16-bit swap is implemented as a rotate left by 8. Updated x64 RexFlags type to support emitting for single-operand instructions like bswap * aarch64: Bswap gets emitted as aarch64 rev16, rev32, or rev64 instruction as appropriate. * s390x: Bswap was already supported in backend, just had to add a bit of plumbing * For completeness, added bswap to the interpreter as well. * added filetests and runtests for each ISA * added bswap to fuzzgen, thanks to afonso360 for the code there * 128-bit swaps are not yet implemented, that can be done later
This commit is contained in:
@@ -105,6 +105,21 @@ impl RexFlags {
|
||||
(self.0 & 2) != 0
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub(crate) fn emit_one_op(&self, sink: &mut MachBuffer<Inst>, enc_e: u8) {
|
||||
// Register Operand coded in Opcode Byte
|
||||
// REX.R and REX.X unused
|
||||
// REX.B == 1 accesses r8-r15
|
||||
let w = if self.must_clear_w() { 0 } else { 1 };
|
||||
let r = 0;
|
||||
let x = 0;
|
||||
let b = (enc_e >> 3) & 1;
|
||||
let rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | b;
|
||||
if rex != 0x40 || self.must_always_emit() {
|
||||
sink.put1(rex);
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub(crate) fn emit_two_op(&self, sink: &mut MachBuffer<Inst>, enc_g: u8, enc_e: u8) {
|
||||
let w = if self.must_clear_w() { 0 } else { 1 };
|
||||
|
||||
@@ -151,6 +151,11 @@
|
||||
(Setcc (cc CC)
|
||||
(dst WritableGpr))
|
||||
|
||||
;; Swaps byte order in register
|
||||
(Bswap (size OperandSize) ;; 4 or 8
|
||||
(src Gpr)
|
||||
(dst WritableGpr))
|
||||
|
||||
;; =========================================
|
||||
;; Conditional moves.
|
||||
|
||||
@@ -1959,6 +1964,16 @@
|
||||
(rule (x64_sar ty src1 src2)
|
||||
(shift_r ty (ShiftKind.ShiftRightArithmetic) src1 src2))
|
||||
|
||||
;; Helper for creating byteswap instructions.
|
||||
;; In x64, 32- and 64-bit registers use BSWAP instruction, and
|
||||
;; for 16-bit registers one must instead use xchg or rol/ror
|
||||
(decl x64_bswap (Type Gpr) Gpr)
|
||||
(rule (x64_bswap ty src)
|
||||
(let ((dst WritableGpr (temp_writable_gpr))
|
||||
(size OperandSize (operand_size_of_type_32_64 ty))
|
||||
(_ Unit (emit (MInst.Bswap size src dst))))
|
||||
dst))
|
||||
|
||||
;; Helper for creating `MInst.CmpRmiR` instructions.
|
||||
(decl cmp_rmi_r (OperandSize CmpOpcode GprMemImm Gpr) ProducesFlags)
|
||||
(rule (cmp_rmi_r size opcode src1 src2)
|
||||
|
||||
@@ -1115,6 +1115,21 @@ pub(crate) fn emit(
|
||||
);
|
||||
}
|
||||
|
||||
Inst::Bswap { size, src, dst } => {
|
||||
let src = allocs.next(src.to_reg());
|
||||
let dst = allocs.next(dst.to_reg().to_reg());
|
||||
debug_assert_eq!(src, dst);
|
||||
let enc_reg = int_reg_enc(dst);
|
||||
|
||||
// BSWAP reg32 is (REX.W==0) 0F C8
|
||||
// BSWAP reg64 is (REX.W==1) 0F C8
|
||||
let rex_flags = RexFlags::from(*size);
|
||||
rex_flags.emit_one_op(sink, enc_reg);
|
||||
|
||||
sink.put1(0x0F);
|
||||
sink.put1(0xC8 | (enc_reg & 7));
|
||||
}
|
||||
|
||||
Inst::Cmove {
|
||||
size,
|
||||
cc,
|
||||
|
||||
@@ -107,6 +107,13 @@ impl Inst {
|
||||
Inst::Setcc { cc, dst }
|
||||
}
|
||||
|
||||
fn bswap(size: OperandSize, dst: Writable<Reg>) -> Inst {
|
||||
debug_assert!(dst.to_reg().class() == RegClass::Int);
|
||||
let src = Gpr::new(dst.to_reg()).unwrap();
|
||||
let dst = WritableGpr::from_writable_reg(dst).unwrap();
|
||||
Inst::Bswap { size, src, dst }
|
||||
}
|
||||
|
||||
fn xmm_rm_r_imm(
|
||||
op: SseOpcode,
|
||||
src: RegMem,
|
||||
@@ -3505,6 +3512,55 @@ fn test_x64_emit() {
|
||||
insns.push((Inst::setcc(CC::LE, w_r14), "410F9EC6", "setle %r14b"));
|
||||
insns.push((Inst::setcc(CC::P, w_r9), "410F9AC1", "setp %r9b"));
|
||||
insns.push((Inst::setcc(CC::NP, w_r8), "410F9BC0", "setnp %r8b"));
|
||||
|
||||
// ========================================================
|
||||
// Bswap
|
||||
insns.push((
|
||||
Inst::bswap(OperandSize::Size64, w_rax),
|
||||
"480FC8",
|
||||
"bswapq %rax, %rax",
|
||||
));
|
||||
insns.push((
|
||||
Inst::bswap(OperandSize::Size64, w_r8),
|
||||
"490FC8",
|
||||
"bswapq %r8, %r8",
|
||||
));
|
||||
insns.push((
|
||||
Inst::bswap(OperandSize::Size32, w_rax),
|
||||
"0FC8",
|
||||
"bswapl %eax, %eax",
|
||||
));
|
||||
insns.push((
|
||||
Inst::bswap(OperandSize::Size64, w_rcx),
|
||||
"480FC9",
|
||||
"bswapq %rcx, %rcx",
|
||||
));
|
||||
insns.push((
|
||||
Inst::bswap(OperandSize::Size32, w_rcx),
|
||||
"0FC9",
|
||||
"bswapl %ecx, %ecx",
|
||||
));
|
||||
insns.push((
|
||||
Inst::bswap(OperandSize::Size64, w_r11),
|
||||
"490FCB",
|
||||
"bswapq %r11, %r11",
|
||||
));
|
||||
insns.push((
|
||||
Inst::bswap(OperandSize::Size32, w_r11),
|
||||
"410FCB",
|
||||
"bswapl %r11d, %r11d",
|
||||
));
|
||||
insns.push((
|
||||
Inst::bswap(OperandSize::Size64, w_r14),
|
||||
"490FCE",
|
||||
"bswapq %r14, %r14",
|
||||
));
|
||||
insns.push((
|
||||
Inst::bswap(OperandSize::Size32, w_r14),
|
||||
"410FCE",
|
||||
"bswapl %r14d, %r14d",
|
||||
));
|
||||
|
||||
// ========================================================
|
||||
// Cmove
|
||||
insns.push((
|
||||
|
||||
@@ -68,6 +68,7 @@ impl Inst {
|
||||
Inst::AluRmiR { .. }
|
||||
| Inst::AluRM { .. }
|
||||
| Inst::AtomicRmwSeq { .. }
|
||||
| Inst::Bswap { .. }
|
||||
| Inst::CallKnown { .. }
|
||||
| Inst::CallUnknown { .. }
|
||||
| Inst::CheckedDivOrRemSeq { .. }
|
||||
@@ -1373,6 +1374,17 @@ impl PrettyPrint for Inst {
|
||||
format!("{} {}", ljustify2("set".to_string(), cc.to_string()), dst)
|
||||
}
|
||||
|
||||
Inst::Bswap { size, src, dst } => {
|
||||
let src = pretty_print_reg(src.to_reg(), size.to_bytes(), allocs);
|
||||
let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes(), allocs);
|
||||
format!(
|
||||
"{} {}, {}",
|
||||
ljustify2("bswap".to_string(), suffix_bwlq(*size)),
|
||||
src,
|
||||
dst
|
||||
)
|
||||
}
|
||||
|
||||
Inst::Cmove {
|
||||
size,
|
||||
cc,
|
||||
@@ -1953,6 +1965,10 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
|
||||
Inst::Setcc { dst, .. } => {
|
||||
collector.reg_def(dst.to_writable_reg());
|
||||
}
|
||||
Inst::Bswap { src, dst, .. } => {
|
||||
collector.reg_use(src.to_reg());
|
||||
collector.reg_reuse_def(dst.to_writable_reg(), 0);
|
||||
}
|
||||
Inst::Cmove {
|
||||
consequent,
|
||||
alternative,
|
||||
|
||||
@@ -2065,6 +2065,19 @@
|
||||
hi32)))
|
||||
swap32))
|
||||
|
||||
;; Rules for `bswap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; x64 bswap instruction is only for 32- or 64-bit swaps
|
||||
;; implement the 16-bit swap as a rotl by 8
|
||||
(rule (lower (has_type $I16 (bswap src)))
|
||||
(x64_rotl $I16 src (Imm8Reg.Imm8 8)))
|
||||
|
||||
(rule (lower (has_type $I32 (bswap src)))
|
||||
(x64_bswap $I32 src))
|
||||
|
||||
(rule (lower (has_type $I64 (bswap src)))
|
||||
(x64_bswap $I64 src))
|
||||
|
||||
;; Rules for `is_null` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Null references are represented by the constant value `0`.
|
||||
|
||||
@@ -363,6 +363,7 @@ fn lower_insn_to_regs(
|
||||
| Opcode::Ctz
|
||||
| Opcode::Popcnt
|
||||
| Opcode::Bitrev
|
||||
| Opcode::Bswap
|
||||
| Opcode::IsNull
|
||||
| Opcode::IsInvalid
|
||||
| Opcode::Uextend
|
||||
|
||||
Reference in New Issue
Block a user