x64: Peephole optimization for x < 0 (#4625)
https://github.com/bytecodealliance/wasmtime/pull/4625 Fixes #4607
This commit is contained in:
@@ -1501,6 +1501,38 @@
|
||||
(rule (lower (icmp cc a @ (value_type $I128) b))
|
||||
(lower_icmp_bool (emit_cmp cc a b)))
|
||||
|
||||
;; Peephole optimization for `x < 0`, when x is a signed 64 bit value
|
||||
(rule (lower (has_type $B1 (icmp (IntCC.SignedLessThan) x @ (value_type $I64) (u64_from_iconst 0))))
|
||||
(x64_shr $I64 x (Imm8Reg.Imm8 63)))
|
||||
|
||||
;; Peephole optimization for `0 > x`, when x is a signed 64 bit value
|
||||
(rule (lower (has_type $B1 (icmp (IntCC.SignedGreaterThan) (u64_from_iconst 0) x @ (value_type $I64))))
|
||||
(x64_shr $I64 x (Imm8Reg.Imm8 63)))
|
||||
|
||||
;; Peephole optimization for `0 <= x`, when x is a signed 64 bit value
|
||||
(rule (lower (has_type $B1 (icmp (IntCC.SignedLessThanOrEqual) (u64_from_iconst 0) x @ (value_type $I64))))
|
||||
(x64_shr $I64 (x64_not $I64 x) (Imm8Reg.Imm8 63)))
|
||||
|
||||
;; Peephole optimization for `x >= 0`, when x is a signed 64 bit value
|
||||
(rule (lower (has_type $B1 (icmp (IntCC.SignedGreaterThanOrEqual) x @ (value_type $I64) (u64_from_iconst 0))))
|
||||
(x64_shr $I64 (x64_not $I64 x) (Imm8Reg.Imm8 63)))
|
||||
|
||||
;; Peephole optimization for `x < 0`, when x is a signed 32 bit value
|
||||
(rule (lower (has_type $B1 (icmp (IntCC.SignedLessThan) x @ (value_type $I32) (u64_from_iconst 0))))
|
||||
(x64_shr $I32 x (Imm8Reg.Imm8 31)))
|
||||
|
||||
;; Peephole optimization for `0 > x`, when x is a signed 32 bit value
|
||||
(rule (lower (has_type $B1 (icmp (IntCC.SignedGreaterThan) (u64_from_iconst 0) x @ (value_type $I32))))
|
||||
(x64_shr $I32 x (Imm8Reg.Imm8 31)))
|
||||
|
||||
;; Peephole optimization for `0 <= x`, when x is a signed 32 bit value
|
||||
(rule (lower (has_type $B1 (icmp (IntCC.SignedLessThanOrEqual) (u64_from_iconst 0) x @ (value_type $I32))))
|
||||
(x64_shr $I32 (x64_not $I64 x) (Imm8Reg.Imm8 31)))
|
||||
|
||||
;; Peephole optimization for `x >= 0`, when x is a signed 32 bit value
|
||||
(rule (lower (has_type $B1 (icmp (IntCC.SignedGreaterThanOrEqual) x @ (value_type $I32) (u64_from_iconst 0))))
|
||||
(x64_shr $I32 (x64_not $I64 x) (Imm8Reg.Imm8 31)))
|
||||
|
||||
;; For XMM-held values, we lower to `PCMP*` instructions, sometimes more than
|
||||
;; one. To note: what is different here about the output values is that each
|
||||
;; lane will be filled with all 1s or all 0s according to the comparison,
|
||||
|
||||
@@ -73,3 +73,135 @@ block2:
|
||||
; popq %rbp
|
||||
; ret
|
||||
|
||||
function %test_x_slt_0_i64(i64) -> b1 {
|
||||
block0(v0: i64):
|
||||
v1 = iconst.i64 0
|
||||
v2 = icmp slt v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; shrq $63, %rdi, %rdi
|
||||
; movq %rdi, %rax
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
|
||||
function %test_x_slt_0_i32f4(i32) -> b1 {
|
||||
block0(v0: i32):
|
||||
v1 = iconst.i32 0
|
||||
v2 = icmp slt v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; shrl $31, %edi, %edi
|
||||
; movq %rdi, %rax
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
|
||||
function %test_0_sgt_x_i64(i64) -> b1 {
|
||||
block0(v0: i64):
|
||||
v1 = iconst.i64 0
|
||||
v2 = icmp sgt v1, v0
|
||||
return v2
|
||||
}
|
||||
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; shrq $63, %rdi, %rdi
|
||||
; movq %rdi, %rax
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
|
||||
function %test_0_sgt_x_i32f4(i32) -> b1 {
|
||||
block0(v0: i32):
|
||||
v1 = iconst.i32 0
|
||||
v2 = icmp sgt v1, v0
|
||||
return v2
|
||||
}
|
||||
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; shrl $31, %edi, %edi
|
||||
; movq %rdi, %rax
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
|
||||
function %test_0_sle_x_i64(i64) -> b1 {
|
||||
block0(v0: i64):
|
||||
v1 = iconst.i64 0
|
||||
v2 = icmp sle v1, v0
|
||||
return v2
|
||||
}
|
||||
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; notq %rdi, %rdi
|
||||
; shrq $63, %rdi, %rdi
|
||||
; movq %rdi, %rax
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
|
||||
function %test_0_sle_x_i32f4(i32) -> b1 {
|
||||
block0(v0: i32):
|
||||
v1 = iconst.i32 0
|
||||
v2 = icmp sle v1, v0
|
||||
return v2
|
||||
}
|
||||
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; notq %rdi, %rdi
|
||||
; shrl $31, %edi, %edi
|
||||
; movq %rdi, %rax
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
|
||||
function %test_x_sge_x_i64(i64) -> b1 {
|
||||
block0(v0: i64):
|
||||
v1 = iconst.i64 0
|
||||
v2 = icmp sge v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; notq %rdi, %rdi
|
||||
; shrq $63, %rdi, %rdi
|
||||
; movq %rdi, %rax
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
|
||||
function %test_x_sge_x_i32f4(i32) -> b1 {
|
||||
block0(v0: i32):
|
||||
v1 = iconst.i32 0
|
||||
v2 = icmp sge v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; notq %rdi, %rdi
|
||||
; shrl $31, %edi, %edi
|
||||
; movq %rdi, %rax
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
|
||||
|
||||
@@ -223,3 +223,63 @@ block2:
|
||||
; popq %rbp
|
||||
; ret
|
||||
|
||||
function %f6(i64) -> b1 {
|
||||
block0(v0: i64):
|
||||
v1 = iconst.i64 0
|
||||
v2 = icmp slt v0, v1
|
||||
brnz v2, block1
|
||||
jump block2
|
||||
block1:
|
||||
v3 = bconst.b1 true
|
||||
return v3
|
||||
block2:
|
||||
v4 = bconst.b1 false
|
||||
return v4
|
||||
}
|
||||
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; cmpq $0, %rdi
|
||||
; jl label1; j label2
|
||||
; block1:
|
||||
; movl $1, %eax
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
; block2:
|
||||
; xorl %eax, %eax, %eax
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
|
||||
function %f7(i32) -> b1 {
|
||||
block0(v0: i32):
|
||||
v1 = iconst.i32 0
|
||||
v2 = icmp slt v0, v1
|
||||
brnz v2, block1
|
||||
jump block2
|
||||
block1:
|
||||
v3 = bconst.b1 true
|
||||
return v3
|
||||
block2:
|
||||
v4 = bconst.b1 false
|
||||
return v4
|
||||
}
|
||||
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; cmpl $0, %edi
|
||||
; jl label1; j label2
|
||||
; block1:
|
||||
; movl $1, %eax
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
; block2:
|
||||
; xorl %eax, %eax, %eax
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
|
||||
|
||||
Reference in New Issue
Block a user