x64: Add bmask implementation (#5148)

This commit is contained in:
Afonso Bordado
2022-10-29 01:17:22 +01:00
committed by GitHub
parent 879b52825f
commit 2fb76be2e4
7 changed files with 496 additions and 33 deletions

View File

@@ -2914,15 +2914,15 @@
(decl x64_neg (Type Gpr) Gpr)
(rule (x64_neg ty src)
(let ((dst WritableGpr (temp_writable_gpr))
(size OperandSize (operand_size_of_type_32_64 ty))
(size OperandSize (raw_operand_size_of_type ty))
(_ Unit (emit (MInst.Neg size src dst))))
dst))
;; Helper for creating `neg` instructions whose flags are also used.
(decl x64_neg_paired (Type Gpr) ProducesFlags)
(rule (x64_neg_paired ty src)
(let ((dst WritableGpr (temp_writable_gpr))
(size OperandSize (operand_size_of_type_32_64 ty))
(size OperandSize (raw_operand_size_of_type ty))
(inst MInst (MInst.Neg size src dst)))
(ProducesFlags.ProducesFlagsReturnsResultWithConsumer inst dst)))

View File

@@ -1172,6 +1172,50 @@
(x64_psllq (vector_all_ones)
(RegMemImm.Imm 63))))
;;;; Rules for `bmask` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(decl lower_bmask (Type Type ValueRegs) ValueRegs)
;; Values that fit in a register
;;
;; Use the neg instruction on the input which sets the CF (carry) flag
;; to 0 if the input is 0 or 1 otherwise.
;; We then subtract the output register with itself, which always gives a 0,
;; however use the carry flag from the previous negate to generate a -1 if it
;; was nonzero.
;;
;; neg in_reg
;; sbb out_reg, out_reg
(rule 0
(lower_bmask (fits_in_64 out_ty) (fits_in_64 in_ty) val)
(let ((reg Gpr (value_regs_get_gpr val 0))
(out ValueRegs (with_flags
(x64_neg_paired in_ty reg)
(x64_sbb_paired out_ty reg reg))))
;; Extract only the output of the sbb instruction
(value_reg (value_regs_get out 1))))
;; If the input type is I128 we can `or` the registers, and recurse to the general case.
(rule 1
(lower_bmask (fits_in_64 out_ty) $I128 val)
(let ((lo Gpr (value_regs_get_gpr val 0))
(hi Gpr (value_regs_get_gpr val 1))
(mixed Gpr (x64_or $I64 lo hi)))
(lower_bmask out_ty $I64 (value_reg mixed))))
;; If the output type is I128 we just duplicate the result of the I64 lowering
(rule 2
(lower_bmask $I128 in_ty val)
(let ((res ValueRegs (lower_bmask $I64 in_ty val))
(res Gpr (value_regs_get_gpr res 0)))
(value_regs res res)))
;; Call the lower_bmask rule that does all the procssing
(rule (lower (has_type out_ty (bmask x @ (value_type in_ty))))
(lower_bmask out_ty in_ty x))
;;;; Rules for `bnot` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; `i64` and smaller.

View File

@@ -467,7 +467,8 @@ fn lower_insn_to_regs(
| Opcode::TlsValue
| Opcode::SqmulRoundSat
| Opcode::Uunarrow
| Opcode::Nop => {
| Opcode::Nop
| Opcode::Bmask => {
let ty = if outputs.len() > 0 {
Some(ctx.output_ty(insn, 0))
} else {
@@ -496,8 +497,6 @@ fn lower_insn_to_regs(
unimplemented!("or-not / xor-not opcodes not implemented");
}
Opcode::Bmask => unimplemented!("Bmask not implemented"),
Opcode::Vsplit | Opcode::Vconcat => {
unimplemented!("Vector split/concat ops not implemented.");
}