x64: Add bmask implementation (#5148)

This commit is contained in:
Afonso Bordado
2022-10-29 01:17:22 +01:00
committed by GitHub
parent 879b52825f
commit 2fb76be2e4
7 changed files with 496 additions and 33 deletions

View File

@@ -2914,15 +2914,15 @@
(decl x64_neg (Type Gpr) Gpr)
(rule (x64_neg ty src)
(let ((dst WritableGpr (temp_writable_gpr))
(size OperandSize (operand_size_of_type_32_64 ty))
(size OperandSize (raw_operand_size_of_type ty))
(_ Unit (emit (MInst.Neg size src dst))))
dst))
;; Helper for creating `neg` instructions whose flags are also used.
(decl x64_neg_paired (Type Gpr) ProducesFlags)
(rule (x64_neg_paired ty src)
(let ((dst WritableGpr (temp_writable_gpr))
(size OperandSize (operand_size_of_type_32_64 ty))
(size OperandSize (raw_operand_size_of_type ty))
(inst MInst (MInst.Neg size src dst)))
(ProducesFlags.ProducesFlagsReturnsResultWithConsumer inst dst)))

View File

@@ -1172,6 +1172,50 @@
(x64_psllq (vector_all_ones)
(RegMemImm.Imm 63))))
;;;; Rules for `bmask` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(decl lower_bmask (Type Type ValueRegs) ValueRegs)
;; Values that fit in a register
;;
;; Use the neg instruction on the input which sets the CF (carry) flag
;; to 0 if the input is 0 or 1 otherwise.
;; We then subtract the output register with itself, which always gives a 0,
;; however use the carry flag from the previous negate to generate a -1 if it
;; was nonzero.
;;
;; neg in_reg
;; sbb out_reg, out_reg
(rule 0
(lower_bmask (fits_in_64 out_ty) (fits_in_64 in_ty) val)
(let ((reg Gpr (value_regs_get_gpr val 0))
(out ValueRegs (with_flags
(x64_neg_paired in_ty reg)
(x64_sbb_paired out_ty reg reg))))
;; Extract only the output of the sbb instruction
(value_reg (value_regs_get out 1))))
;; If the input type is I128 we can `or` the registers, and recurse to the general case.
(rule 1
(lower_bmask (fits_in_64 out_ty) $I128 val)
(let ((lo Gpr (value_regs_get_gpr val 0))
(hi Gpr (value_regs_get_gpr val 1))
(mixed Gpr (x64_or $I64 lo hi)))
(lower_bmask out_ty $I64 (value_reg mixed))))
;; If the output type is I128 we just duplicate the result of the I64 lowering
(rule 2
(lower_bmask $I128 in_ty val)
(let ((res ValueRegs (lower_bmask $I64 in_ty val))
(res Gpr (value_regs_get_gpr res 0)))
(value_regs res res)))
;; Call the lower_bmask rule that does all the procssing
(rule (lower (has_type out_ty (bmask x @ (value_type in_ty))))
(lower_bmask out_ty in_ty x))
;;;; Rules for `bnot` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; `i64` and smaller.

View File

@@ -467,7 +467,8 @@ fn lower_insn_to_regs(
| Opcode::TlsValue
| Opcode::SqmulRoundSat
| Opcode::Uunarrow
| Opcode::Nop => {
| Opcode::Nop
| Opcode::Bmask => {
let ty = if outputs.len() > 0 {
Some(ctx.output_ty(insn, 0))
} else {
@@ -496,8 +497,6 @@ fn lower_insn_to_regs(
unimplemented!("or-not / xor-not opcodes not implemented");
}
Opcode::Bmask => unimplemented!("Bmask not implemented"),
Opcode::Vsplit | Opcode::Vconcat => {
unimplemented!("Vector split/concat ops not implemented.");
}

View File

@@ -0,0 +1,440 @@
test compile precise-output
set enable_llvm_abi_extensions
target x86_64
function %bmask_i64_i64(i64) -> i64 {
block0(v0: i64):
v1 = bmask.i64 v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rcx
; negq %rcx, %rcx
; movq %rdi, %rax
; sbbq %rax, %rdi, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
function %bmask_i64_i32(i64) -> i32 {
block0(v0: i64):
v1 = bmask.i32 v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rcx
; negq %rcx, %rcx
; movq %rdi, %rax
; sbbl %eax, %edi, %eax
; movq %rbp, %rsp
; popq %rbp
; ret
function %bmask_i64_i16(i64) -> i16 {
block0(v0: i64):
v1 = bmask.i16 v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rcx
; negq %rcx, %rcx
; movq %rdi, %rax
; sbbl %eax, %edi, %eax
; movq %rbp, %rsp
; popq %rbp
; ret
function %bmask_i64_i8(i64) -> i8 {
block0(v0: i64):
v1 = bmask.i8 v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rcx
; negq %rcx, %rcx
; movq %rdi, %rax
; sbbl %eax, %edi, %eax
; movq %rbp, %rsp
; popq %rbp
; ret
function %bmask_i32_i64(i32) -> i64 {
block0(v0: i32):
v1 = bmask.i64 v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rcx
; negl %ecx, %ecx
; movq %rdi, %rax
; sbbq %rax, %rdi, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
function %bmask_i32_i32(i32) -> i32 {
block0(v0: i32):
v1 = bmask.i32 v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rcx
; negl %ecx, %ecx
; movq %rdi, %rax
; sbbl %eax, %edi, %eax
; movq %rbp, %rsp
; popq %rbp
; ret
function %bmask_i32_i16(i32) -> i16 {
block0(v0: i32):
v1 = bmask.i16 v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rcx
; negl %ecx, %ecx
; movq %rdi, %rax
; sbbl %eax, %edi, %eax
; movq %rbp, %rsp
; popq %rbp
; ret
function %bmask_i32_i8(i32) -> i8 {
block0(v0: i32):
v1 = bmask.i8 v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rcx
; negl %ecx, %ecx
; movq %rdi, %rax
; sbbl %eax, %edi, %eax
; movq %rbp, %rsp
; popq %rbp
; ret
function %bmask_i16_i64(i16) -> i64 {
block0(v0: i16):
v1 = bmask.i64 v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rcx
; negw %cx, %cx
; movq %rdi, %rax
; sbbq %rax, %rdi, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
function %bmask_i16_i32(i16) -> i32 {
block0(v0: i16):
v1 = bmask.i32 v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rcx
; negw %cx, %cx
; movq %rdi, %rax
; sbbl %eax, %edi, %eax
; movq %rbp, %rsp
; popq %rbp
; ret
function %bmask_i16_i16(i16) -> i16 {
block0(v0: i16):
v1 = bmask.i16 v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rcx
; negw %cx, %cx
; movq %rdi, %rax
; sbbl %eax, %edi, %eax
; movq %rbp, %rsp
; popq %rbp
; ret
function %bmask_i16_i8(i16) -> i8 {
block0(v0: i16):
v1 = bmask.i8 v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rcx
; negw %cx, %cx
; movq %rdi, %rax
; sbbl %eax, %edi, %eax
; movq %rbp, %rsp
; popq %rbp
; ret
function %bmask_i8_i64(i8) -> i64 {
block0(v0: i8):
v1 = bmask.i64 v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rcx
; negb %cl, %cl
; movq %rdi, %rax
; sbbq %rax, %rdi, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
function %bmask_i8_i32(i8) -> i32 {
block0(v0: i8):
v1 = bmask.i32 v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rcx
; negb %cl, %cl
; movq %rdi, %rax
; sbbl %eax, %edi, %eax
; movq %rbp, %rsp
; popq %rbp
; ret
function %bmask_i8_i16(i8) -> i16 {
block0(v0: i8):
v1 = bmask.i16 v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rcx
; negb %cl, %cl
; movq %rdi, %rax
; sbbl %eax, %edi, %eax
; movq %rbp, %rsp
; popq %rbp
; ret
function %bmask_i8_i8(i8) -> i8 {
block0(v0: i8):
v1 = bmask.i8 v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rcx
; negb %cl, %cl
; movq %rdi, %rax
; sbbl %eax, %edi, %eax
; movq %rbp, %rsp
; popq %rbp
; ret
function %bmask_i128_i128(i128) -> i128 {
block0(v0: i128):
v1 = bmask.i128 v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rdx
; orq %rdx, %rsi, %rdx
; movq %rdx, %r10
; negq %r10, %r10
; sbbq %rdx, %rdx, %rdx
; movq %rdx, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
function %bmask_i128_i64(i128) -> i64 {
block0(v0: i128):
v1 = bmask.i64 v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rax
; orq %rax, %rsi, %rax
; movq %rax, %r9
; negq %r9, %r9
; sbbq %rax, %rax, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
function %bmask_i128_i32(i128) -> i32 {
block0(v0: i128):
v1 = bmask.i32 v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rax
; orq %rax, %rsi, %rax
; movq %rax, %r9
; negq %r9, %r9
; sbbl %eax, %eax, %eax
; movq %rbp, %rsp
; popq %rbp
; ret
function %bmask_i128_i16(i128) -> i16 {
block0(v0: i128):
v1 = bmask.i16 v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rax
; orq %rax, %rsi, %rax
; movq %rax, %r9
; negq %r9, %r9
; sbbl %eax, %eax, %eax
; movq %rbp, %rsp
; popq %rbp
; ret
function %bmask_i128_i8(i128) -> i8 {
block0(v0: i128):
v1 = bmask.i8 v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rax
; orq %rax, %rsi, %rax
; movq %rax, %r9
; negq %r9, %r9
; sbbl %eax, %eax, %eax
; movq %rbp, %rsp
; popq %rbp
; ret
function %bmask_i64_i128(i64) -> i128 {
block0(v0: i64):
v1 = bmask.i128 v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rdx
; negq %rdx, %rdx
; movq %rdi, %rdx
; sbbq %rdx, %rdi, %rdx
; movq %rdx, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
function %bmask_i32_i128(i32) -> i128 {
block0(v0: i32):
v1 = bmask.i128 v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rdx
; negl %edx, %edx
; movq %rdi, %rdx
; sbbq %rdx, %rdi, %rdx
; movq %rdx, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
function %bmask_i16_i128(i16) -> i128 {
block0(v0: i16):
v1 = bmask.i128 v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rdx
; negw %dx, %dx
; movq %rdi, %rdx
; sbbq %rdx, %rdi, %rdx
; movq %rdx, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
function %bmask_i8_i128(i8) -> i128 {
block0(v0: i8):
v1 = bmask.i128 v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rdx
; negb %dl, %dl
; movq %rdi, %rdx
; sbbq %rdx, %rdi, %rdx
; movq %rdx, %rax
; movq %rbp, %rsp
; popq %rbp
; ret

View File

@@ -1,5 +1,6 @@
test interpret
test run
target x86_64
target aarch64
target s390x
target riscv64

View File

@@ -1,5 +1,7 @@
test interpret
test run
set enable_llvm_abi_extensions
target x86_64
target aarch64
target riscv64
target s390x
@@ -11,6 +13,10 @@ block0(v0: i128):
}
; run: %bmask_i128_i128(1) == -1
; run: %bmask_i128_i128(0) == 0
; run: %bmask_i128_i128(0x00000001_00000000_00000000_00000000) == -1
; run: %bmask_i128_i128(0x00000000_00000001_00000000_00000000) == -1
; run: %bmask_i128_i128(0x00000000_00000000_00000001_00000000) == -1
; run: %bmask_i128_i128(0x00000000_00000000_00000000_00000001) == -1
function %bmask_i128_i64(i128) -> i64 {
block0(v0: i128):

View File

@@ -646,57 +646,30 @@ const OPCODE_SIGNATURES: &'static [(
(Opcode::Popcnt, &[I64], &[I64], insert_opcode),
(Opcode::Popcnt, &[I128], &[I128], insert_opcode),
// Bmask
// bmask not implemented in some backends:
// x64: https://github.com/bytecodealliance/wasmtime/issues/5106
#[cfg(not(target_arch = "x86_64"))]
(Opcode::Bmask, &[I8], &[I8], insert_opcode),
#[cfg(not(target_arch = "x86_64"))]
(Opcode::Bmask, &[I16], &[I8], insert_opcode),
#[cfg(not(target_arch = "x86_64"))]
(Opcode::Bmask, &[I32], &[I8], insert_opcode),
#[cfg(not(target_arch = "x86_64"))]
(Opcode::Bmask, &[I64], &[I8], insert_opcode),
#[cfg(not(target_arch = "x86_64"))]
(Opcode::Bmask, &[I128], &[I8], insert_opcode),
#[cfg(not(target_arch = "x86_64"))]
(Opcode::Bmask, &[I8], &[I16], insert_opcode),
#[cfg(not(target_arch = "x86_64"))]
(Opcode::Bmask, &[I16], &[I16], insert_opcode),
#[cfg(not(target_arch = "x86_64"))]
(Opcode::Bmask, &[I32], &[I16], insert_opcode),
#[cfg(not(target_arch = "x86_64"))]
(Opcode::Bmask, &[I64], &[I16], insert_opcode),
#[cfg(not(target_arch = "x86_64"))]
(Opcode::Bmask, &[I128], &[I16], insert_opcode),
#[cfg(not(target_arch = "x86_64"))]
(Opcode::Bmask, &[I8], &[I32], insert_opcode),
#[cfg(not(target_arch = "x86_64"))]
(Opcode::Bmask, &[I16], &[I32], insert_opcode),
#[cfg(not(target_arch = "x86_64"))]
(Opcode::Bmask, &[I32], &[I32], insert_opcode),
#[cfg(not(target_arch = "x86_64"))]
(Opcode::Bmask, &[I64], &[I32], insert_opcode),
#[cfg(not(target_arch = "x86_64"))]
(Opcode::Bmask, &[I128], &[I32], insert_opcode),
#[cfg(not(target_arch = "x86_64"))]
(Opcode::Bmask, &[I8], &[I64], insert_opcode),
#[cfg(not(target_arch = "x86_64"))]
(Opcode::Bmask, &[I16], &[I64], insert_opcode),
#[cfg(not(target_arch = "x86_64"))]
(Opcode::Bmask, &[I32], &[I64], insert_opcode),
#[cfg(not(target_arch = "x86_64"))]
(Opcode::Bmask, &[I64], &[I64], insert_opcode),
#[cfg(not(target_arch = "x86_64"))]
(Opcode::Bmask, &[I128], &[I64], insert_opcode),
#[cfg(not(target_arch = "x86_64"))]
(Opcode::Bmask, &[I8], &[I128], insert_opcode),
#[cfg(not(target_arch = "x86_64"))]
(Opcode::Bmask, &[I16], &[I128], insert_opcode),
#[cfg(not(target_arch = "x86_64"))]
(Opcode::Bmask, &[I32], &[I128], insert_opcode),
#[cfg(not(target_arch = "x86_64"))]
(Opcode::Bmask, &[I64], &[I128], insert_opcode),
#[cfg(not(target_arch = "x86_64"))]
(Opcode::Bmask, &[I128], &[I128], insert_opcode),
// Fadd
(Opcode::Fadd, &[F32, F32], &[F32], insert_opcode),