From 52ba72f3412357e2dde616df9c6b954d34431ebe Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Wed, 4 Jan 2023 01:37:25 +0000 Subject: [PATCH] riscv64: Fix masking on `iabs` (#5505) * cranelift: Add `iabs.i128` runtest * riscv64: Fix incorrect extension in iabs When lowering iabs, we were accidentally comparing the unextended value this caused the instruction to misbehave with certain top bits. This commit also adds a zbb lowering that does not use jumps. --- cranelift/codegen/meta/src/isa/riscv64.rs | 1 + cranelift/codegen/src/isa/riscv64/inst.isle | 47 ++++++++++++++--- cranelift/codegen/src/isa/riscv64/lower.isle | 9 ++-- .../codegen/src/isa/riscv64/lower/isle.rs | 3 ++ .../filetests/isa/riscv64/iabs-zbb.clif | 50 +++++++++++++++++++ .../filetests/filetests/isa/riscv64/iabs.clif | 50 +++++++++++++++++++ .../filetests/runtests/i128-iabs.clif | 13 +++++ .../filetests/filetests/runtests/iabs.clif | 14 +++++- 8 files changed, 172 insertions(+), 15 deletions(-) create mode 100644 cranelift/filetests/filetests/isa/riscv64/iabs-zbb.clif create mode 100644 cranelift/filetests/filetests/isa/riscv64/iabs.clif create mode 100644 cranelift/filetests/filetests/runtests/i128-iabs.clif diff --git a/cranelift/codegen/meta/src/isa/riscv64.rs b/cranelift/codegen/meta/src/isa/riscv64.rs index 7f392efd02..3b1cc62548 100644 --- a/cranelift/codegen/meta/src/isa/riscv64.rs +++ b/cranelift/codegen/meta/src/isa/riscv64.rs @@ -14,6 +14,7 @@ fn define_settings(_shared: &SettingGroup) -> SettingGroup { let _has_b = setting.add_bool("has_b", "has extension B?", "", false); let _has_c = setting.add_bool("has_c", "has extension C?", "", false); let _has_zbkb = setting.add_bool("has_zbkb", "has extension zbkb?", "", false); + let _has_zbb = setting.add_bool("has_zbb", "has extension zbb?", "", false); let _has_zicsr = setting.add_bool("has_zicsr", "has extension zicsr?", "", false); let _has_zifencei = setting.add_bool("has_zifencei", "has extension zifencei?", "", false); diff --git a/cranelift/codegen/src/isa/riscv64/inst.isle b/cranelift/codegen/src/isa/riscv64/inst.isle index 9d4042b221..6fc3b254de 100644 --- a/cranelift/codegen/src/isa/riscv64/inst.isle +++ b/cranelift/codegen/src/isa/riscv64/inst.isle @@ -2011,9 +2011,13 @@ (step WritableReg (temp_writable_reg $I64)) (_ Unit (emit (MInst.Rev8 rs step tmp rd)))) (writable_reg_to_reg rd))) + (decl pure has_zbkb () bool) (extern constructor has_zbkb has_zbkb) +(decl pure has_zbb () bool) +(extern constructor has_zbb has_zbb) + (decl gen_brev8 (Reg Type) Reg) (rule 1 (gen_brev8 rs _) @@ -2038,14 +2042,41 @@ ((tmp Reg (gen_bit_not y))) (alu_rrr (AluOPRRR.Xor) x tmp))) -(decl lower_iabs (Reg Type) Reg) -(rule - (lower_iabs r ty) - (let - ((tmp Reg (ext_int_if_need $true r ty)) - (a Reg (gen_bit_not r)) - (a2 Reg (alu_rr_imm12 (AluOPRRI.Addi) a (imm12_const 1)))) - (gen_select_reg (IntCC.SignedLessThan) r (zero_reg) a2 r))) +;; Negates x +;; Equivalent to 0 - x +(decl neg (Type ValueRegs) ValueRegs) +(rule 1 (neg (fits_in_64 (ty_int ty)) val) + (value_reg + (alu_rrr (AluOPRRR.Sub) (zero_reg) (value_regs_get val 0)))) + +(rule 2 (neg $I128 val) + (i128_sub (value_regs_zero) val)) + + +;; Selects the greatest of two registers as signed values. +(decl max (Type Reg Reg) Reg) +(rule (max (fits_in_64 (ty_int ty)) x y) + (if-let $true (has_zbb)) + (alu_rrr (AluOPRRR.Max) x y)) + +(rule (max (fits_in_64 (ty_int ty)) x y) + (if-let $false (has_zbb)) + (gen_select_reg (IntCC.SignedGreaterThan) x y x y)) + + +(decl lower_iabs (Type Reg) Reg) + +; I64 and lower +; Generate the following code: +; sext.{b,h,w} a0, a0 +; neg a1, a0 +; max a0, a0, a1 +(rule (lower_iabs (fits_in_64 ty) val) + (let ((extended Reg (ext_int_if_need $true val ty)) + (negated Reg (neg $I64 extended))) + (max $I64 extended negated))) + + (decl gen_trapff (FloatCC Reg Reg Type TrapCode) InstOutput) (rule diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index e951378a40..6ec81a3a5d 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -76,11 +76,8 @@ ;;;; Rules for `ineg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; `i64` and smaller. -(rule 1 (lower (has_type (fits_in_64 ty) (ineg x))) - (alu_rrr (AluOPRRR.Sub) (zero_reg) x)) - -(rule 2 (lower (has_type $I128 (ineg x))) - (i128_sub (value_regs_zero) x)) +(rule (lower (has_type ty (ineg val))) + (neg ty val)) ;;;; Rules for `imul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -871,7 +868,7 @@ ;;; Rules for `iabs` ;;;;;;;;;;;;; (rule (lower (has_type (fits_in_64 ty) (iabs x))) - (lower_iabs x ty)) + (lower_iabs ty x)) ;;;; Rules for calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/cranelift/codegen/src/isa/riscv64/lower/isle.rs b/cranelift/codegen/src/isa/riscv64/lower/isle.rs index 1454855eeb..b414148b6a 100644 --- a/cranelift/codegen/src/isa/riscv64/lower/isle.rs +++ b/cranelift/codegen/src/isa/riscv64/lower/isle.rs @@ -306,6 +306,9 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Riscv64Backend> { fn has_zbkb(&mut self) -> bool { self.backend.isa_flags.has_zbkb() } + fn has_zbb(&mut self) -> bool { + self.backend.isa_flags.has_zbb() + } fn inst_output_get(&mut self, x: InstOutput, index: u8) -> ValueRegs { x[index as usize] diff --git a/cranelift/filetests/filetests/isa/riscv64/iabs-zbb.clif b/cranelift/filetests/filetests/isa/riscv64/iabs-zbb.clif new file mode 100644 index 0000000000..6ca55dd82c --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/iabs-zbb.clif @@ -0,0 +1,50 @@ +test compile precise-output +target riscv64 has_zbb=true + +function %iabs_i8(i8) -> i8 { +block0(v0: i8): + v1 = iabs v0 + return v1 +} + +; block0: +; sext.b t2,a0 +; sub a1,zero,t2 +; max a0,t2,a1 +; ret + +function %iabs_i16(i16) -> i16 { +block0(v0: i16): + v1 = iabs v0 + return v1 +} + +; block0: +; sext.h t2,a0 +; sub a1,zero,t2 +; max a0,t2,a1 +; ret + +function %iabs_i32(i32) -> i32 { +block0(v0: i32): + v1 = iabs v0 + return v1 +} + +; block0: +; sext.w t2,a0 +; sub a1,zero,t2 +; max a0,t2,a1 +; ret + +function %iabs_i64(i64) -> i64 { +block0(v0: i64): + v1 = iabs v0 + return v1 +} + +; block0: +; sub t2,zero,a0 +; max a0,a0,t2 +; ret + diff --git a/cranelift/filetests/filetests/isa/riscv64/iabs.clif b/cranelift/filetests/filetests/isa/riscv64/iabs.clif new file mode 100644 index 0000000000..92dd1fe9b4 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/iabs.clif @@ -0,0 +1,50 @@ +test compile precise-output +target riscv64 has_zbb=false + +function %iabs_i8(i8) -> i8 { +block0(v0: i8): + v1 = iabs v0 + return v1 +} + +; block0: +; sext.b t2,a0 +; sub a1,zero,t2 +; select_reg a0,t2,a1##condition=(t2 sgt a1) +; ret + +function %iabs_i16(i16) -> i16 { +block0(v0: i16): + v1 = iabs v0 + return v1 +} + +; block0: +; sext.h t2,a0 +; sub a1,zero,t2 +; select_reg a0,t2,a1##condition=(t2 sgt a1) +; ret + +function %iabs_i32(i32) -> i32 { +block0(v0: i32): + v1 = iabs v0 + return v1 +} + +; block0: +; sext.w t2,a0 +; sub a1,zero,t2 +; select_reg a0,t2,a1##condition=(t2 sgt a1) +; ret + +function %iabs_i64(i64) -> i64 { +block0(v0: i64): + v1 = iabs v0 + return v1 +} + +; block0: +; sub t2,zero,a0 +; select_reg a0,a0,t2##condition=(a0 sgt t2) +; ret + diff --git a/cranelift/filetests/filetests/runtests/i128-iabs.clif b/cranelift/filetests/filetests/runtests/i128-iabs.clif new file mode 100644 index 0000000000..c84f2c03e0 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/i128-iabs.clif @@ -0,0 +1,13 @@ +test interpret +test run +target s390x + +function %iabs_i128(i128) -> i128 { +block0(v0: i128): + v1 = iabs v0 + return v1 +} +; run: %iabs_i128(0) == 0 +; run: %iabs_i128(-1) == 1 +; run: %iabs_i128(1) == 1 +; run: %iabs_i128(0x80000000_00000000_00000000_00000000) == 0x80000000_00000000_00000000_00000000 diff --git a/cranelift/filetests/filetests/runtests/iabs.clif b/cranelift/filetests/filetests/runtests/iabs.clif index 3048f02bb0..bf6b51a32c 100644 --- a/cranelift/filetests/filetests/runtests/iabs.clif +++ b/cranelift/filetests/filetests/runtests/iabs.clif @@ -2,7 +2,8 @@ test interpret test run target aarch64 target s390x -target riscv64 +target riscv64 has_zbb=false +target riscv64 has_zbb=true ; x86_64 only supports vector iabs function %iabs_i8(i8) -> i8 { @@ -44,3 +45,14 @@ block0(v0: i64): ; run: %iabs_i64(9223372036854775807) == 9223372036854775807 ; run: %iabs_i64(-9223372036854775807) == 9223372036854775807 ; run: %iabs_i64(-9223372036854775808) == -9223372036854775808 + + +; See issue #5501. +; If iabs does not mask the high bits on the input, it can give an incorrect result. +function %iabs_i16_mask(i16, i64) -> i16 system_v { +block0(v0: i16, v1: i64): + v2 = ushr v0, v1 + v3 = iabs v2 + return v3 +} +; run: %iabs_i16_mask(-24064, 16) == 24064