riscv64: Fix masking on iabs (#5505)
* cranelift: Add `iabs.i128` runtest * riscv64: Fix incorrect extension in iabs When lowering iabs, we were accidentally comparing the unextended value this caused the instruction to misbehave with certain top bits. This commit also adds a zbb lowering that does not use jumps.
This commit is contained in:
@@ -14,6 +14,7 @@ fn define_settings(_shared: &SettingGroup) -> SettingGroup {
|
|||||||
let _has_b = setting.add_bool("has_b", "has extension B?", "", false);
|
let _has_b = setting.add_bool("has_b", "has extension B?", "", false);
|
||||||
let _has_c = setting.add_bool("has_c", "has extension C?", "", false);
|
let _has_c = setting.add_bool("has_c", "has extension C?", "", false);
|
||||||
let _has_zbkb = setting.add_bool("has_zbkb", "has extension zbkb?", "", false);
|
let _has_zbkb = setting.add_bool("has_zbkb", "has extension zbkb?", "", false);
|
||||||
|
let _has_zbb = setting.add_bool("has_zbb", "has extension zbb?", "", false);
|
||||||
|
|
||||||
let _has_zicsr = setting.add_bool("has_zicsr", "has extension zicsr?", "", false);
|
let _has_zicsr = setting.add_bool("has_zicsr", "has extension zicsr?", "", false);
|
||||||
let _has_zifencei = setting.add_bool("has_zifencei", "has extension zifencei?", "", false);
|
let _has_zifencei = setting.add_bool("has_zifencei", "has extension zifencei?", "", false);
|
||||||
|
|||||||
@@ -2011,9 +2011,13 @@
|
|||||||
(step WritableReg (temp_writable_reg $I64))
|
(step WritableReg (temp_writable_reg $I64))
|
||||||
(_ Unit (emit (MInst.Rev8 rs step tmp rd))))
|
(_ Unit (emit (MInst.Rev8 rs step tmp rd))))
|
||||||
(writable_reg_to_reg rd)))
|
(writable_reg_to_reg rd)))
|
||||||
|
|
||||||
(decl pure has_zbkb () bool)
|
(decl pure has_zbkb () bool)
|
||||||
(extern constructor has_zbkb has_zbkb)
|
(extern constructor has_zbkb has_zbkb)
|
||||||
|
|
||||||
|
(decl pure has_zbb () bool)
|
||||||
|
(extern constructor has_zbb has_zbb)
|
||||||
|
|
||||||
(decl gen_brev8 (Reg Type) Reg)
|
(decl gen_brev8 (Reg Type) Reg)
|
||||||
(rule 1
|
(rule 1
|
||||||
(gen_brev8 rs _)
|
(gen_brev8 rs _)
|
||||||
@@ -2038,14 +2042,41 @@
|
|||||||
((tmp Reg (gen_bit_not y)))
|
((tmp Reg (gen_bit_not y)))
|
||||||
(alu_rrr (AluOPRRR.Xor) x tmp)))
|
(alu_rrr (AluOPRRR.Xor) x tmp)))
|
||||||
|
|
||||||
(decl lower_iabs (Reg Type) Reg)
|
;; Negates x
|
||||||
(rule
|
;; Equivalent to 0 - x
|
||||||
(lower_iabs r ty)
|
(decl neg (Type ValueRegs) ValueRegs)
|
||||||
(let
|
(rule 1 (neg (fits_in_64 (ty_int ty)) val)
|
||||||
((tmp Reg (ext_int_if_need $true r ty))
|
(value_reg
|
||||||
(a Reg (gen_bit_not r))
|
(alu_rrr (AluOPRRR.Sub) (zero_reg) (value_regs_get val 0))))
|
||||||
(a2 Reg (alu_rr_imm12 (AluOPRRI.Addi) a (imm12_const 1))))
|
|
||||||
(gen_select_reg (IntCC.SignedLessThan) r (zero_reg) a2 r)))
|
(rule 2 (neg $I128 val)
|
||||||
|
(i128_sub (value_regs_zero) val))
|
||||||
|
|
||||||
|
|
||||||
|
;; Selects the greatest of two registers as signed values.
|
||||||
|
(decl max (Type Reg Reg) Reg)
|
||||||
|
(rule (max (fits_in_64 (ty_int ty)) x y)
|
||||||
|
(if-let $true (has_zbb))
|
||||||
|
(alu_rrr (AluOPRRR.Max) x y))
|
||||||
|
|
||||||
|
(rule (max (fits_in_64 (ty_int ty)) x y)
|
||||||
|
(if-let $false (has_zbb))
|
||||||
|
(gen_select_reg (IntCC.SignedGreaterThan) x y x y))
|
||||||
|
|
||||||
|
|
||||||
|
(decl lower_iabs (Type Reg) Reg)
|
||||||
|
|
||||||
|
; I64 and lower
|
||||||
|
; Generate the following code:
|
||||||
|
; sext.{b,h,w} a0, a0
|
||||||
|
; neg a1, a0
|
||||||
|
; max a0, a0, a1
|
||||||
|
(rule (lower_iabs (fits_in_64 ty) val)
|
||||||
|
(let ((extended Reg (ext_int_if_need $true val ty))
|
||||||
|
(negated Reg (neg $I64 extended)))
|
||||||
|
(max $I64 extended negated)))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
(decl gen_trapff (FloatCC Reg Reg Type TrapCode) InstOutput)
|
(decl gen_trapff (FloatCC Reg Reg Type TrapCode) InstOutput)
|
||||||
(rule
|
(rule
|
||||||
|
|||||||
@@ -76,11 +76,8 @@
|
|||||||
;;;; Rules for `ineg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;; Rules for `ineg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
;; `i64` and smaller.
|
;; `i64` and smaller.
|
||||||
(rule 1 (lower (has_type (fits_in_64 ty) (ineg x)))
|
(rule (lower (has_type ty (ineg val)))
|
||||||
(alu_rrr (AluOPRRR.Sub) (zero_reg) x))
|
(neg ty val))
|
||||||
|
|
||||||
(rule 2 (lower (has_type $I128 (ineg x)))
|
|
||||||
(i128_sub (value_regs_zero) x))
|
|
||||||
|
|
||||||
;;;; Rules for `imul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;; Rules for `imul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
@@ -871,7 +868,7 @@
|
|||||||
;;; Rules for `iabs` ;;;;;;;;;;;;;
|
;;; Rules for `iabs` ;;;;;;;;;;;;;
|
||||||
(rule
|
(rule
|
||||||
(lower (has_type (fits_in_64 ty) (iabs x)))
|
(lower (has_type (fits_in_64 ty) (iabs x)))
|
||||||
(lower_iabs x ty))
|
(lower_iabs ty x))
|
||||||
|
|
||||||
;;;; Rules for calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;; Rules for calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
|||||||
@@ -306,6 +306,9 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Riscv64Backend> {
|
|||||||
fn has_zbkb(&mut self) -> bool {
|
fn has_zbkb(&mut self) -> bool {
|
||||||
self.backend.isa_flags.has_zbkb()
|
self.backend.isa_flags.has_zbkb()
|
||||||
}
|
}
|
||||||
|
fn has_zbb(&mut self) -> bool {
|
||||||
|
self.backend.isa_flags.has_zbb()
|
||||||
|
}
|
||||||
|
|
||||||
fn inst_output_get(&mut self, x: InstOutput, index: u8) -> ValueRegs {
|
fn inst_output_get(&mut self, x: InstOutput, index: u8) -> ValueRegs {
|
||||||
x[index as usize]
|
x[index as usize]
|
||||||
|
|||||||
50
cranelift/filetests/filetests/isa/riscv64/iabs-zbb.clif
Normal file
50
cranelift/filetests/filetests/isa/riscv64/iabs-zbb.clif
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
test compile precise-output
|
||||||
|
target riscv64 has_zbb=true
|
||||||
|
|
||||||
|
function %iabs_i8(i8) -> i8 {
|
||||||
|
block0(v0: i8):
|
||||||
|
v1 = iabs v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; block0:
|
||||||
|
; sext.b t2,a0
|
||||||
|
; sub a1,zero,t2
|
||||||
|
; max a0,t2,a1
|
||||||
|
; ret
|
||||||
|
|
||||||
|
function %iabs_i16(i16) -> i16 {
|
||||||
|
block0(v0: i16):
|
||||||
|
v1 = iabs v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; block0:
|
||||||
|
; sext.h t2,a0
|
||||||
|
; sub a1,zero,t2
|
||||||
|
; max a0,t2,a1
|
||||||
|
; ret
|
||||||
|
|
||||||
|
function %iabs_i32(i32) -> i32 {
|
||||||
|
block0(v0: i32):
|
||||||
|
v1 = iabs v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; block0:
|
||||||
|
; sext.w t2,a0
|
||||||
|
; sub a1,zero,t2
|
||||||
|
; max a0,t2,a1
|
||||||
|
; ret
|
||||||
|
|
||||||
|
function %iabs_i64(i64) -> i64 {
|
||||||
|
block0(v0: i64):
|
||||||
|
v1 = iabs v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; block0:
|
||||||
|
; sub t2,zero,a0
|
||||||
|
; max a0,a0,t2
|
||||||
|
; ret
|
||||||
|
|
||||||
50
cranelift/filetests/filetests/isa/riscv64/iabs.clif
Normal file
50
cranelift/filetests/filetests/isa/riscv64/iabs.clif
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
test compile precise-output
|
||||||
|
target riscv64 has_zbb=false
|
||||||
|
|
||||||
|
function %iabs_i8(i8) -> i8 {
|
||||||
|
block0(v0: i8):
|
||||||
|
v1 = iabs v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; block0:
|
||||||
|
; sext.b t2,a0
|
||||||
|
; sub a1,zero,t2
|
||||||
|
; select_reg a0,t2,a1##condition=(t2 sgt a1)
|
||||||
|
; ret
|
||||||
|
|
||||||
|
function %iabs_i16(i16) -> i16 {
|
||||||
|
block0(v0: i16):
|
||||||
|
v1 = iabs v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; block0:
|
||||||
|
; sext.h t2,a0
|
||||||
|
; sub a1,zero,t2
|
||||||
|
; select_reg a0,t2,a1##condition=(t2 sgt a1)
|
||||||
|
; ret
|
||||||
|
|
||||||
|
function %iabs_i32(i32) -> i32 {
|
||||||
|
block0(v0: i32):
|
||||||
|
v1 = iabs v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; block0:
|
||||||
|
; sext.w t2,a0
|
||||||
|
; sub a1,zero,t2
|
||||||
|
; select_reg a0,t2,a1##condition=(t2 sgt a1)
|
||||||
|
; ret
|
||||||
|
|
||||||
|
function %iabs_i64(i64) -> i64 {
|
||||||
|
block0(v0: i64):
|
||||||
|
v1 = iabs v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; block0:
|
||||||
|
; sub t2,zero,a0
|
||||||
|
; select_reg a0,a0,t2##condition=(a0 sgt t2)
|
||||||
|
; ret
|
||||||
|
|
||||||
13
cranelift/filetests/filetests/runtests/i128-iabs.clif
Normal file
13
cranelift/filetests/filetests/runtests/i128-iabs.clif
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
test interpret
|
||||||
|
test run
|
||||||
|
target s390x
|
||||||
|
|
||||||
|
function %iabs_i128(i128) -> i128 {
|
||||||
|
block0(v0: i128):
|
||||||
|
v1 = iabs v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
; run: %iabs_i128(0) == 0
|
||||||
|
; run: %iabs_i128(-1) == 1
|
||||||
|
; run: %iabs_i128(1) == 1
|
||||||
|
; run: %iabs_i128(0x80000000_00000000_00000000_00000000) == 0x80000000_00000000_00000000_00000000
|
||||||
@@ -2,7 +2,8 @@ test interpret
|
|||||||
test run
|
test run
|
||||||
target aarch64
|
target aarch64
|
||||||
target s390x
|
target s390x
|
||||||
target riscv64
|
target riscv64 has_zbb=false
|
||||||
|
target riscv64 has_zbb=true
|
||||||
; x86_64 only supports vector iabs
|
; x86_64 only supports vector iabs
|
||||||
|
|
||||||
function %iabs_i8(i8) -> i8 {
|
function %iabs_i8(i8) -> i8 {
|
||||||
@@ -44,3 +45,14 @@ block0(v0: i64):
|
|||||||
; run: %iabs_i64(9223372036854775807) == 9223372036854775807
|
; run: %iabs_i64(9223372036854775807) == 9223372036854775807
|
||||||
; run: %iabs_i64(-9223372036854775807) == 9223372036854775807
|
; run: %iabs_i64(-9223372036854775807) == 9223372036854775807
|
||||||
; run: %iabs_i64(-9223372036854775808) == -9223372036854775808
|
; run: %iabs_i64(-9223372036854775808) == -9223372036854775808
|
||||||
|
|
||||||
|
|
||||||
|
; See issue #5501.
|
||||||
|
; If iabs does not mask the high bits on the input, it can give an incorrect result.
|
||||||
|
function %iabs_i16_mask(i16, i64) -> i16 system_v {
|
||||||
|
block0(v0: i16, v1: i64):
|
||||||
|
v2 = ushr v0, v1
|
||||||
|
v3 = iabs v2
|
||||||
|
return v3
|
||||||
|
}
|
||||||
|
; run: %iabs_i16_mask(-24064, 16) == 24064
|
||||||
|
|||||||
Reference in New Issue
Block a user