Cranelift: Implement 'iabs' for scalar types on x86_64 (#5527)

* Implement 'iabs' for scalar types on x86_64

* Small fix
This commit is contained in:
uint256_t
2023-01-06 14:33:12 +09:00
committed by GitHub
parent c50bdf600e
commit b00455135e
5 changed files with 84 additions and 2 deletions

View File

@@ -1129,6 +1129,20 @@
(neg Xmm (x64_psubq (imm $I64X2 0) rx))) (neg Xmm (x64_psubq (imm $I64X2 0) rx)))
(x64_blendvpd neg rx neg))) (x64_blendvpd neg rx neg)))
;; `i64` and smaller.
(rule -1 (lower (has_type (fits_in_64 ty) (iabs x)))
(let ((src Gpr x)
(neg ProducesFlags (x64_neg_paired ty src))
;; Manually extract the result from the neg, then ignore
;; it below, since we need to pass it into the cmove
;; before we pass the cmove to with_flags_reg.
(neg_result Gpr (produces_flags_get_reg neg))
;; When the neg instruction sets the sign flag,
;; takes the original (non-negative) value.
(cmove ConsumesFlags (cmove ty (CC.S) src neg_result)))
(with_flags_reg (produces_flags_ignore neg) cmove)))
;;;; Rules for `fabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Rules for `fabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $F32 (fabs x))) (rule (lower (has_type $F32 (fabs x)))

View File

@@ -362,6 +362,7 @@
;; Get the produced register out of a ProducesFlags. ;; Get the produced register out of a ProducesFlags.
(decl produces_flags_get_reg (ProducesFlags) Reg) (decl produces_flags_get_reg (ProducesFlags) Reg)
(rule (produces_flags_get_reg (ProducesFlags.ProducesFlagsReturnsReg _ reg)) reg) (rule (produces_flags_get_reg (ProducesFlags.ProducesFlagsReturnsReg _ reg)) reg)
(rule (produces_flags_get_reg (ProducesFlags.ProducesFlagsReturnsResultWithConsumer _ reg)) reg)
;; Modify a ProducesFlags to use it only for its side-effect, ignoring ;; Modify a ProducesFlags to use it only for its side-effect, ignoring
;; its result. ;; its result.

View File

@@ -0,0 +1,67 @@
test compile precise-output
target x86_64
function %f1(i8) -> i8 {
block0(v0: i8):
v1 = iabs.i8 v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rax
; negb %al, %al
; cmovsl %edi, %eax, %eax
; movq %rbp, %rsp
; popq %rbp
; ret
function %f2(i16) -> i16 {
block0(v0: i16):
v1 = iabs.i16 v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rax
; negw %ax, %ax
; cmovsl %edi, %eax, %eax
; movq %rbp, %rsp
; popq %rbp
; ret
function %f3(i32) -> i32 {
block0(v0: i32):
v1 = iabs.i32 v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rax
; negl %eax, %eax
; cmovsl %edi, %eax, %eax
; movq %rbp, %rsp
; popq %rbp
; ret
function %f3(i64) -> i64 {
block0(v0: i64):
v1 = iabs.i64 v0
return v1
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movq %rdi, %rax
; negq %rax, %rax
; cmovsq %rdi, %rax, %rax
; movq %rbp, %rsp
; popq %rbp
; ret

View File

@@ -4,7 +4,7 @@ target aarch64
target s390x target s390x
target riscv64 has_zbb=false target riscv64 has_zbb=false
target riscv64 has_zbb=true target riscv64 has_zbb=true
; x86_64 only supports vector iabs target x86_64
function %iabs_i8(i8) -> i8 { function %iabs_i8(i8) -> i8 {
block0(v0: i8): block0(v0: i8):

View File

@@ -293,7 +293,7 @@ fn valid_for_target(triple: &Triple, op: Opcode, args: &[Type], rets: &[Type]) -
// https://github.com/bytecodealliance/wasmtime/issues/5474 // https://github.com/bytecodealliance/wasmtime/issues/5474
(Opcode::Srem, &[I128, I128]), (Opcode::Srem, &[I128, I128]),
// https://github.com/bytecodealliance/wasmtime/issues/5466 // https://github.com/bytecodealliance/wasmtime/issues/5466
(Opcode::Iabs), (Opcode::Iabs, &[I128]),
// https://github.com/bytecodealliance/wasmtime/issues/3370 // https://github.com/bytecodealliance/wasmtime/issues/3370
(Opcode::Smin, &[I128, I128]), (Opcode::Smin, &[I128, I128]),
// https://github.com/bytecodealliance/wasmtime/issues/3370 // https://github.com/bytecodealliance/wasmtime/issues/3370