diff --git a/cranelift/codegen/src/isa/aarch64/inst.isle b/cranelift/codegen/src/isa/aarch64/inst.isle index aa7b296078..f7239a2adf 100644 --- a/cranelift/codegen/src/isa/aarch64/inst.isle +++ b/cranelift/codegen/src/isa/aarch64/inst.isle @@ -1704,9 +1704,6 @@ (decl fpu_op_ri_sli (u8 u8) FPUOpRIMod) (extern constructor fpu_op_ri_sli fpu_op_ri_sli) -(decl imm12_from_negated_u64 (Imm12) u64) -(extern extractor imm12_from_negated_u64 imm12_from_negated_u64) - (decl pure partial lshr_from_u64 (Type u64) ShiftOpAndAmt) (extern constructor lshr_from_u64 lshr_from_u64) @@ -1734,11 +1731,13 @@ (imm12_from_value n) (iconst (u64_from_imm64 (imm12_from_u64 n)))) -;; Same as `imm12_from_value`, but tries negating the constant value. -(decl imm12_from_negated_value (Imm12) Value) -(extractor - (imm12_from_negated_value n) - (iconst (u64_from_imm64 (imm12_from_negated_u64 n)))) +;; Conceptually the same as `imm12_from_value`, but tries negating the constant +;; value (first sign-extending to handle narrow widths). +(decl pure partial imm12_from_negated_value (Value) Imm12) +(rule + (imm12_from_negated_value (has_type ty (iconst n))) + (if-let (imm12_from_u64 imm) (i64_as_u64 (i64_neg (i64_sextend_imm64 ty n)))) + imm) ;; Helper type to represent a value and an extend operation fused together. (type ExtendedValue extern (enum)) diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle index 9f3025309a..bafd73330b 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.isle +++ b/cranelift/codegen/src/isa/aarch64/lower.isle @@ -56,11 +56,13 @@ ;; Same as the previous special cases, except we can switch the addition to a ;; subtraction if the negated immediate fits in 12 bits. -(rule 2 (lower (has_type (fits_in_64 ty) (iadd x (imm12_from_negated_value y)))) - (sub_imm ty x y)) +(rule 2 (lower (has_type (fits_in_64 ty) (iadd x y))) + (if-let imm12_neg (imm12_from_negated_value y)) + (sub_imm ty x imm12_neg)) -(rule 3 (lower (has_type (fits_in_64 ty) (iadd (imm12_from_negated_value x) y))) - (sub_imm ty y x)) +(rule 3 (lower (has_type (fits_in_64 ty) (iadd x y))) + (if-let imm12_neg (imm12_from_negated_value x)) + (sub_imm ty y imm12_neg)) ;; Special cases for when we're adding an extended register where the extending ;; operation can get folded into the add itself. @@ -650,8 +652,9 @@ ;; Same as the previous special case, except we can switch the subtraction to an ;; addition if the negated immediate fits in 12 bits. -(rule 2 (lower (has_type (fits_in_64 ty) (isub x (imm12_from_negated_value y)))) - (add_imm ty x y)) +(rule 2 (lower (has_type (fits_in_64 ty) (isub x y))) + (if-let imm12_neg (imm12_from_negated_value y)) + (add_imm ty x imm12_neg)) ;; Special cases for when we're subtracting an extended register where the ;; extending operation can get folded into the sub itself. diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle.rs b/cranelift/codegen/src/isa/aarch64/lower/isle.rs index a3fe4eb02b..c5d661d24a 100644 --- a/cranelift/codegen/src/isa/aarch64/lower/isle.rs +++ b/cranelift/codegen/src/isa/aarch64/lower/isle.rs @@ -126,10 +126,6 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> { Imm12::maybe_from_u64(n) } - fn imm12_from_negated_u64(&mut self, n: u64) -> Option { - Imm12::maybe_from_u64((n as i64).wrapping_neg() as u64) - } - fn imm_shift_from_u8(&mut self, n: u8) -> ImmShift { ImmShift::maybe_from_u64(n.into()).unwrap() } diff --git a/cranelift/codegen/src/isle_prelude.rs b/cranelift/codegen/src/isle_prelude.rs index d557d53f93..749e8bb4ee 100644 --- a/cranelift/codegen/src/isle_prelude.rs +++ b/cranelift/codegen/src/isle_prelude.rs @@ -38,6 +38,11 @@ macro_rules! isle_common_prelude_methods { x as u64 } + #[inline] + fn i64_neg(&mut self, x: i64) -> i64 { + x.wrapping_neg() + } + #[inline] fn u64_add(&mut self, x: u64, y: u64) -> u64 { x.wrapping_add(y) diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index 5461109fdd..8228f72dd5 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -93,6 +93,9 @@ (decl pure i64_as_u64 (i64) u64) (extern constructor i64_as_u64 i64_as_u64) +(decl pure i64_neg (i64) i64) +(extern constructor i64_neg i64_neg) + (decl u128_as_u64 (u64) u128) (extern extractor u128_as_u64 u128_as_u64) diff --git a/cranelift/filetests/filetests/isa/aarch64/iconst-imm12_from_negated.isle b/cranelift/filetests/filetests/isa/aarch64/iconst-imm12_from_negated.isle index 61067e316d..1b9f210585 100644 --- a/cranelift/filetests/filetests/isa/aarch64/iconst-imm12_from_negated.isle +++ b/cranelift/filetests/filetests/isa/aarch64/iconst-imm12_from_negated.isle @@ -24,7 +24,6 @@ block0(v0: i64): ; ret ;; 4294967295 is zero-extended i32 -1 -;; Result should use immediate but currently doesn't function %b(i32) -> i32 { block0(v0: i32): v1 = iconst.i32 4294967295 @@ -34,18 +33,15 @@ block0(v0: i32): ; VCode: ; block0: -; movn w2, #0 -; add w0, w0, w2 +; sub w0, w0, #1 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; mov w2, #-1 -; add w0, w0, w2 +; sub w0, w0, #1 ; ret ;; 65535 is zero-extended i16 -1 -;; Result should use immediate but currently doesn't function %a(i16) -> i16 { block0(v0: i16): v1 = iconst.i16 65535 @@ -55,12 +51,122 @@ block0(v0: i16): ; VCode: ; block0: -; movz w2, #65535 -; add w0, w0, w2 +; sub w0, w0, #1 ; ret ; ; Disassembled: ; block0: ; offset 0x0 -; mov w2, #0xffff -; add w0, w0, w2 +; sub w0, w0, #1 +; ret + +;; Swapped order to trigger commutative rule +function %c(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 -1 + v3 = iadd v1, v0 + return v3 +} + +; VCode: +; block0: +; sub x0, x0, #1 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; sub x0, x0, #1 +; ret + +;; Swapped order to trigger commutative rule +;; 4294967295 is zero-extended i32 -1 +function %b(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 4294967295 + v3 = iadd v1, v0 + return v3 +} + +; VCode: +; block0: +; sub w0, w0, #1 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; sub w0, w0, #1 +; ret + +;; Swapped order to trigger commutative rule +;; 65535 is zero-extended i16 -1 +function %a(i16) -> i16 { +block0(v0: i16): + v1 = iconst.i16 65535 + v3 = iadd v1, v0 + return v3 +} + +; VCode: +; block0: +; sub w0, w0, #1 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; sub w0, w0, #1 +; ret + +;; Version sub -> add +function %c(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 -1 + v3 = isub v0, v1 + return v3 +} + +; VCode: +; block0: +; add x0, x0, #1 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; add x0, x0, #1 +; ret + +;; Version sub -> add +;; 4294967295 is zero-extended i32 -1 +function %b(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 4294967295 + v3 = isub v0, v1 + return v3 +} + +; VCode: +; block0: +; add w0, w0, #1 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; add w0, w0, #1 +; ret + +;; Version sub -> add +;; 65535 is zero-extended i16 -1 +function %a(i16) -> i16 { +block0(v0: i16): + v1 = iconst.i16 65535 + v3 = isub v0, v1 + return v3 +} + +; VCode: +; block0: +; add w0, w0, #1 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; add w0, w0, #1 ; ret