diff --git a/cranelift/codegen/src/opts/extends.isle b/cranelift/codegen/src/opts/extends.isle index a64fba1b6c..a1a4dfda05 100644 --- a/cranelift/codegen/src/opts/extends.isle +++ b/cranelift/codegen/src/opts/extends.isle @@ -27,3 +27,8 @@ (uextend $I64 x @ (value_type $I32)) (iconst _ (u64_from_imm64 0)))) (iconst ty (imm64 1))) + +;; A reduction-of-an-extend back to the same original type is the same as not +;; actually doing the extend in the first place. +(rule (simplify (ireduce ty (sextend _ x @ (value_type ty)))) x) +(rule (simplify (ireduce ty (uextend _ x @ (value_type ty)))) x) diff --git a/cranelift/codegen/src/opts/shifts.isle b/cranelift/codegen/src/opts/shifts.isle index 44c79a6f8d..8dcf153d1a 100644 --- a/cranelift/codegen/src/opts/shifts.isle +++ b/cranelift/codegen/src/opts/shifts.isle @@ -79,6 +79,25 @@ (if-let $true (u64_le shift_u64 (u64_sub (ty_bits_u64 wide) (ty_bits_u64 narrow)))) x) +;; (x << N) >> N == x as T_SMALL as T_LARGE +;; if N == bytesizeof(T_LARGE) - bytesizeof(T_SMALL) +;; +;; Note that the shift is required to be >0 to ensure this doesn't accidentally +;; try to `ireduce` a type to itself, which isn't a valid use of `ireduce`. +(rule (simplify (sshr ty (ishl ty x (iconst _ shift)) (iconst _ shift))) + (if-let (u64_from_imm64 (u64_nonzero shift_u64)) shift) + (if-let ty_small (shift_amt_to_type (u64_sub (ty_bits ty) shift_u64))) + (sextend ty (ireduce ty_small x))) +(rule (simplify (ushr ty (ishl ty x (iconst _ shift)) (iconst _ shift))) + (if-let (u64_from_imm64 (u64_nonzero shift_u64)) shift) + (if-let ty_small (shift_amt_to_type (u64_sub (ty_bits ty) shift_u64))) + (uextend ty (ireduce ty_small x))) + +(decl pure partial shift_amt_to_type (u64) Type) +(rule (shift_amt_to_type 8) $I8) +(rule (shift_amt_to_type 16) $I16) +(rule (shift_amt_to_type 32) $I32) + ;; ineg(ushr(x, k)) == sshr(x, k) when k == ty_bits - 1. (rule (simplify (ineg ty (ushr ty x sconst @ (iconst ty (u64_from_imm64 shift_amt))))) (if-let $true (u64_eq shift_amt (u64_sub (ty_bits ty) 1))) diff --git a/cranelift/filetests/filetests/egraph/extends.clif b/cranelift/filetests/filetests/egraph/extends.clif index bfc9876044..03454ca6d0 100644 --- a/cranelift/filetests/filetests/egraph/extends.clif +++ b/cranelift/filetests/filetests/egraph/extends.clif @@ -53,3 +53,21 @@ block0(v1: i16): ; check: v4 = sextend.i64 v1 ; check: return v4 + +function %sextend_then_reduce(i16) -> i16 { +block0(v1: i16): + v2 = sextend.i32 v1 + v3 = ireduce.i16 v2 + return v3 +} + +; check: return v1 + +function %uextend_then_reduce(i32) -> i32 { +block0(v1: i32): + v2 = uextend.i64 v1 + v3 = ireduce.i32 v2 + return v3 +} + +; check: return v1 diff --git a/cranelift/filetests/filetests/egraph/shifts.clif b/cranelift/filetests/filetests/egraph/shifts.clif index d9c9da277d..b8028c50e6 100644 --- a/cranelift/filetests/filetests/egraph/shifts.clif +++ b/cranelift/filetests/filetests/egraph/shifts.clif @@ -215,3 +215,103 @@ block0(v0: i64): ; check: v4 = sshr v0, v1 ; check: return v4 } + +function %i32_shl_sshr_8_to_ireduce(i32) -> i32 { +block0(v0: i32): + v1 = ishl_imm v0, 24 + v2 = sshr_imm v1, 24 + return v2 + ; check: v5 = ireduce.i8 v0 + ; check: v6 = sextend.i32 v5 + ; check: return v6 +} + +function %i32_shl_sshr_16_to_ireduce(i32) -> i32 { +block0(v0: i32): + v1 = ishl_imm v0, 16 + v2 = sshr_imm v1, 16 + return v2 + ; check: v5 = ireduce.i16 v0 + ; check: v6 = sextend.i32 v5 + ; check: return v6 +} + +function %i64_shl_sshr_8_to_ireduce(i64) -> i64 { +block0(v0: i64): + v1 = ishl_imm v0, 56 + v2 = sshr_imm v1, 56 + return v2 + ; check: v5 = ireduce.i8 v0 + ; check: v6 = sextend.i64 v5 + ; check: return v6 +} + +function %i64_shl_sshr_16_to_ireduce(i64) -> i64 { +block0(v0: i64): + v1 = ishl_imm v0, 48 + v2 = sshr_imm v1, 48 + return v2 + ; check: v5 = ireduce.i16 v0 + ; check: v6 = sextend.i64 v5 + ; check: return v6 +} + +function %i64_shl_sshr_32_to_ireduce(i64) -> i64 { +block0(v0: i64): + v1 = ishl_imm v0, 32 + v2 = sshr_imm v1, 32 + return v2 + ; check: v5 = ireduce.i32 v0 + ; check: v6 = sextend.i64 v5 + ; check: return v6 +} + +function %i32_shl_ushr_8_to_ireduce(i32) -> i32 { +block0(v0: i32): + v1 = ishl_imm v0, 24 + v2 = ushr_imm v1, 24 + return v2 + ; check: v7 = ireduce.i8 v0 + ; check: v8 = uextend.i32 v7 + ; check: return v8 +} + +function %i32_shl_ushr_16_to_ireduce(i32) -> i32 { +block0(v0: i32): + v1 = ishl_imm v0, 16 + v2 = ushr_imm v1, 16 + return v2 + ; check: v7 = ireduce.i16 v0 + ; check: v8 = uextend.i32 v7 + ; check: return v8 +} + +function %i64_shl_ushr_8_to_ireduce(i64) -> i64 { +block0(v0: i64): + v1 = ishl_imm v0, 56 + v2 = ushr_imm v1, 56 + return v2 + ; check: v7 = ireduce.i8 v0 + ; check: v8 = uextend.i64 v7 + ; check: return v8 +} + +function %i64_shl_ushr_16_to_ireduce(i64) -> i64 { +block0(v0: i64): + v1 = ishl_imm v0, 48 + v2 = ushr_imm v1, 48 + return v2 + ; check: v7 = ireduce.i16 v0 + ; check: v8 = uextend.i64 v7 + ; check: return v8 +} + +function %i64_shl_ushr_32_to_ireduce(i64) -> i64 { +block0(v0: i64): + v1 = ishl_imm v0, 32 + v2 = ushr_imm v1, 32 + return v2 + ; check: v7 = ireduce.i32 v0 + ; check: v8 = uextend.i64 v7 + ; check: return v8 +} diff --git a/cranelift/filetests/filetests/isa/x64/shift-to-extend.clif b/cranelift/filetests/filetests/isa/x64/shift-to-extend.clif new file mode 100644 index 0000000000..219fe75a78 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/shift-to-extend.clif @@ -0,0 +1,265 @@ +test compile precise-output +set opt_level=speed +target x86_64 + + +function %i32_shl_sshr_8_to_ireduce(i32) -> i32 { +block0(v0: i32): + v1 = ishl_imm v0, 24 + v2 = sshr_imm v1, 24 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movsbl %dil, %eax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movsbl %dil, %eax +; movq %rbp, %rsp +; popq %rbp +; retq + +function %i32_shl_sshr_16_to_ireduce(i32) -> i32 { +block0(v0: i32): + v1 = ishl_imm v0, 16 + v2 = sshr_imm v1, 16 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movswl %di, %eax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movswl %di, %eax +; movq %rbp, %rsp +; popq %rbp +; retq + +function %i64_shl_sshr_8_to_ireduce(i64) -> i64 { +block0(v0: i64): + v1 = ishl_imm v0, 56 + v2 = sshr_imm v1, 56 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movsbq %dil, %rax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movsbq %dil, %rax +; movq %rbp, %rsp +; popq %rbp +; retq + +function %i64_shl_sshr_16_to_ireduce(i64) -> i64 { +block0(v0: i64): + v1 = ishl_imm v0, 48 + v2 = sshr_imm v1, 48 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movswq %di, %rax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movswq %di, %rax +; movq %rbp, %rsp +; popq %rbp +; retq + +function %i64_shl_sshr_32_to_ireduce(i64) -> i64 { +block0(v0: i64): + v1 = ishl_imm v0, 32 + v2 = sshr_imm v1, 32 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movslq %edi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movslq %edi, %rax +; movq %rbp, %rsp +; popq %rbp +; retq + +function %i32_shl_ushr_8_to_ireduce(i32) -> i32 { +block0(v0: i32): + v1 = ishl_imm v0, 24 + v2 = ushr_imm v1, 24 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movzbl %dil, %eax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movzbl %dil, %eax +; movq %rbp, %rsp +; popq %rbp +; retq + +function %i32_shl_ushr_16_to_ireduce(i32) -> i32 { +block0(v0: i32): + v1 = ishl_imm v0, 16 + v2 = ushr_imm v1, 16 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movzwl %di, %eax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movzwl %di, %eax +; movq %rbp, %rsp +; popq %rbp +; retq + +function %i64_shl_ushr_8_to_ireduce(i64) -> i64 { +block0(v0: i64): + v1 = ishl_imm v0, 56 + v2 = ushr_imm v1, 56 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movzbq %dil, %rax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movzbq %dil, %rax +; movq %rbp, %rsp +; popq %rbp +; retq + +function %i64_shl_ushr_16_to_ireduce(i64) -> i64 { +block0(v0: i64): + v1 = ishl_imm v0, 48 + v2 = ushr_imm v1, 48 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movzwq %di, %rax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movzwq %di, %rax +; movq %rbp, %rsp +; popq %rbp +; retq + +function %i64_shl_ushr_32_to_ireduce(i64) -> i64 { +block0(v0: i64): + v1 = ishl_imm v0, 32 + v2 = ushr_imm v1, 32 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movl %edi, %eax +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; movl %edi, %eax +; movq %rbp, %rsp +; popq %rbp +; retq +