x64: Peephole optimization for x < 0 (#4625)

https://github.com/bytecodealliance/wasmtime/pull/4625

Fixes #4607
This commit is contained in:
Trevor Elliott
2022-08-09 09:45:53 -07:00
committed by GitHub
parent a36a52a017
commit ed7dfd3925
3 changed files with 224 additions and 0 deletions

View File

@@ -1501,6 +1501,38 @@
(rule (lower (icmp cc a @ (value_type $I128) b))
(lower_icmp_bool (emit_cmp cc a b)))
;; Peephole optimization for `x < 0`, when x is a signed 64 bit value
(rule (lower (has_type $B1 (icmp (IntCC.SignedLessThan) x @ (value_type $I64) (u64_from_iconst 0))))
(x64_shr $I64 x (Imm8Reg.Imm8 63)))
;; Peephole optimization for `0 > x`, when x is a signed 64 bit value
(rule (lower (has_type $B1 (icmp (IntCC.SignedGreaterThan) (u64_from_iconst 0) x @ (value_type $I64))))
(x64_shr $I64 x (Imm8Reg.Imm8 63)))
;; Peephole optimization for `0 <= x`, when x is a signed 64 bit value
(rule (lower (has_type $B1 (icmp (IntCC.SignedLessThanOrEqual) (u64_from_iconst 0) x @ (value_type $I64))))
(x64_shr $I64 (x64_not $I64 x) (Imm8Reg.Imm8 63)))
;; Peephole optimization for `x >= 0`, when x is a signed 64 bit value
(rule (lower (has_type $B1 (icmp (IntCC.SignedGreaterThanOrEqual) x @ (value_type $I64) (u64_from_iconst 0))))
(x64_shr $I64 (x64_not $I64 x) (Imm8Reg.Imm8 63)))
;; Peephole optimization for `x < 0`, when x is a signed 32 bit value
(rule (lower (has_type $B1 (icmp (IntCC.SignedLessThan) x @ (value_type $I32) (u64_from_iconst 0))))
(x64_shr $I32 x (Imm8Reg.Imm8 31)))
;; Peephole optimization for `0 > x`, when x is a signed 32 bit value
(rule (lower (has_type $B1 (icmp (IntCC.SignedGreaterThan) (u64_from_iconst 0) x @ (value_type $I32))))
(x64_shr $I32 x (Imm8Reg.Imm8 31)))
;; Peephole optimization for `0 <= x`, when x is a signed 32 bit value
(rule (lower (has_type $B1 (icmp (IntCC.SignedLessThanOrEqual) (u64_from_iconst 0) x @ (value_type $I32))))
(x64_shr $I32 (x64_not $I64 x) (Imm8Reg.Imm8 31)))
;; Peephole optimization for `x >= 0`, when x is a signed 32 bit value
(rule (lower (has_type $B1 (icmp (IntCC.SignedGreaterThanOrEqual) x @ (value_type $I32) (u64_from_iconst 0))))
(x64_shr $I32 (x64_not $I64 x) (Imm8Reg.Imm8 31)))
;; For XMM-held values, we lower to `PCMP*` instructions, sometimes more than
;; one. To note: what is different here about the output values is that each
;; lane will be filled with all 1s or all 0s according to the comparison,

View File

@@ -73,3 +73,135 @@ block2:
; popq %rbp
; ret
function %test_x_slt_0_i64(i64) -> b1 {
block0(v0: i64):
v1 = iconst.i64 0
v2 = icmp slt v0, v1
return v2
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; shrq $63, %rdi, %rdi
; movq %rdi, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
function %test_x_slt_0_i32f4(i32) -> b1 {
block0(v0: i32):
v1 = iconst.i32 0
v2 = icmp slt v0, v1
return v2
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; shrl $31, %edi, %edi
; movq %rdi, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
function %test_0_sgt_x_i64(i64) -> b1 {
block0(v0: i64):
v1 = iconst.i64 0
v2 = icmp sgt v1, v0
return v2
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; shrq $63, %rdi, %rdi
; movq %rdi, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
function %test_0_sgt_x_i32f4(i32) -> b1 {
block0(v0: i32):
v1 = iconst.i32 0
v2 = icmp sgt v1, v0
return v2
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; shrl $31, %edi, %edi
; movq %rdi, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
function %test_0_sle_x_i64(i64) -> b1 {
block0(v0: i64):
v1 = iconst.i64 0
v2 = icmp sle v1, v0
return v2
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; notq %rdi, %rdi
; shrq $63, %rdi, %rdi
; movq %rdi, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
function %test_0_sle_x_i32f4(i32) -> b1 {
block0(v0: i32):
v1 = iconst.i32 0
v2 = icmp sle v1, v0
return v2
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; notq %rdi, %rdi
; shrl $31, %edi, %edi
; movq %rdi, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
function %test_x_sge_x_i64(i64) -> b1 {
block0(v0: i64):
v1 = iconst.i64 0
v2 = icmp sge v0, v1
return v2
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; notq %rdi, %rdi
; shrq $63, %rdi, %rdi
; movq %rdi, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
function %test_x_sge_x_i32f4(i32) -> b1 {
block0(v0: i32):
v1 = iconst.i32 0
v2 = icmp sge v0, v1
return v2
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; notq %rdi, %rdi
; shrl $31, %edi, %edi
; movq %rdi, %rax
; movq %rbp, %rsp
; popq %rbp
; ret

View File

@@ -223,3 +223,63 @@ block2:
; popq %rbp
; ret
function %f6(i64) -> b1 {
block0(v0: i64):
v1 = iconst.i64 0
v2 = icmp slt v0, v1
brnz v2, block1
jump block2
block1:
v3 = bconst.b1 true
return v3
block2:
v4 = bconst.b1 false
return v4
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; cmpq $0, %rdi
; jl label1; j label2
; block1:
; movl $1, %eax
; movq %rbp, %rsp
; popq %rbp
; ret
; block2:
; xorl %eax, %eax, %eax
; movq %rbp, %rsp
; popq %rbp
; ret
function %f7(i32) -> b1 {
block0(v0: i32):
v1 = iconst.i32 0
v2 = icmp slt v0, v1
brnz v2, block1
jump block2
block1:
v3 = bconst.b1 true
return v3
block2:
v4 = bconst.b1 false
return v4
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; cmpl $0, %edi
; jl label1; j label2
; block1:
; movl $1, %eax
; movq %rbp, %rsp
; popq %rbp
; ret
; block2:
; xorl %eax, %eax, %eax
; movq %rbp, %rsp
; popq %rbp
; ret