Optimize sign extension via shifts (#6220)

* Optimize sign extension via shifts

This commit adds egraph optimization patterns for left-shifting a value
and then right-shifting it as a form of sign extending its lower bits.
This matches the behavior of the WebAssembly `i32.extend8_s`
instruction, for example. Note that the lowering of that WebAssembly
instruction does not use shifts, but historical versions of LLVM that
didn't support the instruction, or versions with the instruction
disabled, will use shifts instead.

A second rule for reduction-of-extend being the same as the original
value was added to keep an existing shift-related test passing as well.

* Add reference assemblies for new opts
This commit is contained in:
Alex Crichton
2023-04-17 13:48:08 -05:00
committed by GitHub
parent 9a4bd7c6df
commit 7ebff82861
5 changed files with 407 additions and 0 deletions

View File

@@ -53,3 +53,21 @@ block0(v1: i16):
; check: v4 = sextend.i64 v1
; check: return v4
function %sextend_then_reduce(i16) -> i16 {
block0(v1: i16):
v2 = sextend.i32 v1
v3 = ireduce.i16 v2
return v3
}
; check: return v1
function %uextend_then_reduce(i32) -> i32 {
block0(v1: i32):
v2 = uextend.i64 v1
v3 = ireduce.i32 v2
return v3
}
; check: return v1

View File

@@ -215,3 +215,103 @@ block0(v0: i64):
; check: v4 = sshr v0, v1
; check: return v4
}
function %i32_shl_sshr_8_to_ireduce(i32) -> i32 {
block0(v0: i32):
v1 = ishl_imm v0, 24
v2 = sshr_imm v1, 24
return v2
; check: v5 = ireduce.i8 v0
; check: v6 = sextend.i32 v5
; check: return v6
}
function %i32_shl_sshr_16_to_ireduce(i32) -> i32 {
block0(v0: i32):
v1 = ishl_imm v0, 16
v2 = sshr_imm v1, 16
return v2
; check: v5 = ireduce.i16 v0
; check: v6 = sextend.i32 v5
; check: return v6
}
function %i64_shl_sshr_8_to_ireduce(i64) -> i64 {
block0(v0: i64):
v1 = ishl_imm v0, 56
v2 = sshr_imm v1, 56
return v2
; check: v5 = ireduce.i8 v0
; check: v6 = sextend.i64 v5
; check: return v6
}
function %i64_shl_sshr_16_to_ireduce(i64) -> i64 {
block0(v0: i64):
v1 = ishl_imm v0, 48
v2 = sshr_imm v1, 48
return v2
; check: v5 = ireduce.i16 v0
; check: v6 = sextend.i64 v5
; check: return v6
}
function %i64_shl_sshr_32_to_ireduce(i64) -> i64 {
block0(v0: i64):
v1 = ishl_imm v0, 32
v2 = sshr_imm v1, 32
return v2
; check: v5 = ireduce.i32 v0
; check: v6 = sextend.i64 v5
; check: return v6
}
function %i32_shl_ushr_8_to_ireduce(i32) -> i32 {
block0(v0: i32):
v1 = ishl_imm v0, 24
v2 = ushr_imm v1, 24
return v2
; check: v7 = ireduce.i8 v0
; check: v8 = uextend.i32 v7
; check: return v8
}
function %i32_shl_ushr_16_to_ireduce(i32) -> i32 {
block0(v0: i32):
v1 = ishl_imm v0, 16
v2 = ushr_imm v1, 16
return v2
; check: v7 = ireduce.i16 v0
; check: v8 = uextend.i32 v7
; check: return v8
}
function %i64_shl_ushr_8_to_ireduce(i64) -> i64 {
block0(v0: i64):
v1 = ishl_imm v0, 56
v2 = ushr_imm v1, 56
return v2
; check: v7 = ireduce.i8 v0
; check: v8 = uextend.i64 v7
; check: return v8
}
function %i64_shl_ushr_16_to_ireduce(i64) -> i64 {
block0(v0: i64):
v1 = ishl_imm v0, 48
v2 = ushr_imm v1, 48
return v2
; check: v7 = ireduce.i16 v0
; check: v8 = uextend.i64 v7
; check: return v8
}
function %i64_shl_ushr_32_to_ireduce(i64) -> i64 {
block0(v0: i64):
v1 = ishl_imm v0, 32
v2 = ushr_imm v1, 32
return v2
; check: v7 = ireduce.i32 v0
; check: v8 = uextend.i64 v7
; check: return v8
}

View File

@@ -0,0 +1,265 @@
test compile precise-output
set opt_level=speed
target x86_64
function %i32_shl_sshr_8_to_ireduce(i32) -> i32 {
block0(v0: i32):
v1 = ishl_imm v0, 24
v2 = sshr_imm v1, 24
return v2
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movsbl %dil, %eax
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movsbl %dil, %eax
; movq %rbp, %rsp
; popq %rbp
; retq
function %i32_shl_sshr_16_to_ireduce(i32) -> i32 {
block0(v0: i32):
v1 = ishl_imm v0, 16
v2 = sshr_imm v1, 16
return v2
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movswl %di, %eax
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movswl %di, %eax
; movq %rbp, %rsp
; popq %rbp
; retq
function %i64_shl_sshr_8_to_ireduce(i64) -> i64 {
block0(v0: i64):
v1 = ishl_imm v0, 56
v2 = sshr_imm v1, 56
return v2
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movsbq %dil, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movsbq %dil, %rax
; movq %rbp, %rsp
; popq %rbp
; retq
function %i64_shl_sshr_16_to_ireduce(i64) -> i64 {
block0(v0: i64):
v1 = ishl_imm v0, 48
v2 = sshr_imm v1, 48
return v2
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movswq %di, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movswq %di, %rax
; movq %rbp, %rsp
; popq %rbp
; retq
function %i64_shl_sshr_32_to_ireduce(i64) -> i64 {
block0(v0: i64):
v1 = ishl_imm v0, 32
v2 = sshr_imm v1, 32
return v2
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movslq %edi, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movslq %edi, %rax
; movq %rbp, %rsp
; popq %rbp
; retq
function %i32_shl_ushr_8_to_ireduce(i32) -> i32 {
block0(v0: i32):
v1 = ishl_imm v0, 24
v2 = ushr_imm v1, 24
return v2
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movzbl %dil, %eax
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movzbl %dil, %eax
; movq %rbp, %rsp
; popq %rbp
; retq
function %i32_shl_ushr_16_to_ireduce(i32) -> i32 {
block0(v0: i32):
v1 = ishl_imm v0, 16
v2 = ushr_imm v1, 16
return v2
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movzwl %di, %eax
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movzwl %di, %eax
; movq %rbp, %rsp
; popq %rbp
; retq
function %i64_shl_ushr_8_to_ireduce(i64) -> i64 {
block0(v0: i64):
v1 = ishl_imm v0, 56
v2 = ushr_imm v1, 56
return v2
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movzbq %dil, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movzbq %dil, %rax
; movq %rbp, %rsp
; popq %rbp
; retq
function %i64_shl_ushr_16_to_ireduce(i64) -> i64 {
block0(v0: i64):
v1 = ishl_imm v0, 48
v2 = ushr_imm v1, 48
return v2
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movzwq %di, %rax
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movzwq %di, %rax
; movq %rbp, %rsp
; popq %rbp
; retq
function %i64_shl_ushr_32_to_ireduce(i64) -> i64 {
block0(v0: i64):
v1 = ishl_imm v0, 32
v2 = ushr_imm v1, 32
return v2
}
; VCode:
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movl %edi, %eax
; movq %rbp, %rsp
; popq %rbp
; ret
;
; Disassembled:
; block0: ; offset 0x0
; pushq %rbp
; movq %rsp, %rbp
; block1: ; offset 0x4
; movl %edi, %eax
; movq %rbp, %rsp
; popq %rbp
; retq