riscv64: Fix regaloc panic with bor+bnot on floats (#5857)

This commit is contained in:
Afonso Bordado
2023-03-13 18:29:36 +00:00
committed by GitHub
parent d03612c2d9
commit ad0bce3a36
2 changed files with 110 additions and 19 deletions

View File

@@ -186,14 +186,14 @@
(alu_rrr (AluOPRRR.RemU) x y)))
;;;; Rules for `and` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule -1 (lower (has_type (fits_in_64 ty) (band x y)))
(rule -1 (lower (has_type (fits_in_64 (ty_int ty)) (band x y)))
(alu_rrr (AluOPRRR.And) x y))
;; Special cases for when one operand is an immediate that fits in 12 bits.
(rule 2 (lower (has_type (fits_in_64 ty) (band x (imm12_from_value y))))
(rule 2 (lower (has_type (fits_in_64 (ty_int ty)) (band x (imm12_from_value y))))
(alu_rr_imm12 (AluOPRRI.Andi) x y))
(rule 1 (lower (has_type (fits_in_64 ty) (band (imm12_from_value x) y)))
(rule 1 (lower (has_type (fits_in_64 (ty_int ty)) (band (imm12_from_value x) y)))
(alu_rr_imm12 (AluOPRRI.Andi) y x))
(rule (lower (has_type $I128 (band x y)))
@@ -201,6 +201,7 @@
(rule (lower (has_type $F32 (band x y)))
(lower_float_binary (AluOPRRR.And) x y $F32))
(rule (lower (has_type $F64 (band x y)))
(lower_float_binary (AluOPRRR.And) x y $F64))
@@ -208,18 +209,21 @@
;; by Cranelift's `band_not` instruction that is legalized into the simpler
;; forms early on.
(rule 3 (lower (has_type (fits_in_64 ty) (band x (bnot y))))
(rule 3 (lower (has_type (fits_in_64 (ty_int ty)) (band x (bnot y))))
(if-let $true (has_zbb))
(gen_andn x y))
(rule 4 (lower (has_type (fits_in_64 ty) (band (bnot y) x)))
(rule 4 (lower (has_type (fits_in_64 (ty_int ty)) (band (bnot y) x)))
(if-let $true (has_zbb))
(gen_andn x y))
(rule 5 (lower (has_type $I128 (band x (bnot y))))
(if-let $true (has_zbb))
(let
((low Reg (gen_andn (value_regs_get x 0) (value_regs_get y 0)))
(high Reg (gen_andn (value_regs_get x 1) (value_regs_get y 1))))
(value_regs low high)))
(rule 6 (lower (has_type $I128 (band (bnot y) x)))
(if-let $true (has_zbb))
(let
@@ -229,19 +233,22 @@
;;;; Rules for `or` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule -1 (lower (has_type (fits_in_64 ty) (bor x y)))
(rule -1 (lower (has_type (fits_in_64 (ty_int ty)) (bor x y)))
(alu_rrr (AluOPRRR.Or) x y))
;; Special cases for when one operand is an immediate that fits in 12 bits.
(rule 2 (lower (has_type (fits_in_64 ty) (bor x (imm12_from_value y))))
(rule 2 (lower (has_type (fits_in_64 (ty_int ty)) (bor x (imm12_from_value y))))
(alu_rr_imm12 (AluOPRRI.Ori) x y))
(rule 1 (lower (has_type (fits_in_64 ty) (bor (imm12_from_value x) y)))
(rule 1 (lower (has_type (fits_in_64 (ty_int ty)) (bor (imm12_from_value x) y)))
(alu_rr_imm12 (AluOPRRI.Ori) y x))
(rule (lower (has_type $I128 (bor x y)))
(lower_b128_binary (AluOPRRR.Or) x y))
(rule (lower (has_type $F32 (bor x y)))
(lower_float_binary (AluOPRRR.Or) x y $F32))
(rule (lower (has_type $F64 (bor x y)))
(lower_float_binary (AluOPRRR.Or) x y $F64))
@@ -249,10 +256,11 @@
;; by Cranelift's `bor_not` instruction that is legalized into the simpler
;; forms early on.
(rule 3 (lower (has_type (fits_in_64 ty) (bor x (bnot y))))
(rule 3 (lower (has_type (fits_in_64 (ty_int ty)) (bor x (bnot y))))
(if-let $true (has_zbb))
(gen_orn x y))
(rule 4 (lower (has_type (fits_in_64 ty) (bor (bnot y) x)))
(rule 4 (lower (has_type (fits_in_64 (ty_int ty)) (bor (bnot y) x)))
(if-let $true (has_zbb))
(gen_orn x y))
@@ -262,6 +270,7 @@
((low Reg (gen_orn (value_regs_get x 0) (value_regs_get y 0)))
(high Reg (gen_orn (value_regs_get x 1) (value_regs_get y 1))))
(value_regs low high)))
(rule 6 (lower (has_type $I128 (bor (bnot y) x)))
(if-let $true (has_zbb))
(let
@@ -271,40 +280,43 @@
;;;; Rules for `xor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule -1 (lower (has_type (fits_in_64 ty) (bxor x y)))
(rule -1 (lower (has_type (fits_in_64 (ty_int ty)) (bxor x y)))
(alu_rrr (AluOPRRR.Xor) x y))
;; Special cases for when one operand is an immediate that fits in 12 bits.
(rule 2 (lower (has_type (fits_in_64 ty) (bxor x (imm12_from_value y))))
(rule 2 (lower (has_type (fits_in_64 (ty_int ty)) (bxor x (imm12_from_value y))))
(alu_rr_imm12 (AluOPRRI.Xori) x y))
(rule 1 (lower (has_type (fits_in_64 ty) (bxor (imm12_from_value x) y)))
(rule 1 (lower (has_type (fits_in_64 (ty_int ty)) (bxor (imm12_from_value x) y)))
(alu_rr_imm12 (AluOPRRI.Xori) y x))
(rule (lower (has_type $I128 (bxor x y)))
(lower_b128_binary (AluOPRRR.Xor) x y))
(rule (lower (has_type $F32 (bxor x y)))
(lower_float_binary (AluOPRRR.Xor) x y $F32))
(rule (lower (has_type $F64 (bxor x y)))
(lower_float_binary (AluOPRRR.Xor) x y $F64))
;;;; Rules for `bnot` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule -1 (lower (has_type fits_in_64 (bnot x)))
(rule -1 (lower (has_type (fits_in_64 (ty_int ty)) (bnot x)))
(alu_rr_imm12 (AluOPRRI.Xori) x (imm_from_neg_bits -1)))
(rule (lower (has_type $I128 (bnot x)))
(bnot_128 x))
(rule
(lower (has_type $F32 (bnot x)))
(lower_float_bnot x $F32)
)
(lower_float_bnot x $F32))
(rule
(lower (has_type $F64 (bnot x)))
(lower_float_bnot x $F64)
)
(lower_float_bnot x $F64))
;;;; Rules for `bit_reverse` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty (bitrev x)))
(rule (lower (has_type (fits_in_64 (ty_int ty)) (bitrev x)))
(lower_bit_reverse x ty))
(rule 1 (lower (has_type $I128 (bitrev x)))

View File

@@ -0,0 +1,79 @@
test compile precise-output
set unwind_info=false
target riscv64 has_zbb
;; This is a regression test for a bug in the RISC-V backend where
;; When enabling `Zbb` the backend would try to use one of the
;; integer instructions (`orn`) to implement a float operation
;; causing a regalloc panic.
function %or_not_optimization_float() -> i32 system_v {
block0:
v0 = iconst.i32 0
v1 = f32const 0.0
v2 = bnot v1
v3 = bor v2, v2
br_table v0, block1(v3), [block1(v1)]
block1(v4: f32):
return v0
}
; VCode:
; block0:
; li a1,0
; fmv.w.x ft9,a1
; li t1,0
; fmv.w.x fa6,t1
; fmv.x.w a1,fa6
; not a3,a1
; fmv.w.x ft1,a3
; fmv.x.w t1,ft1
; fmv.x.w a0,ft1
; or a2,t1,a0
; fmv.w.x fa2,a2
; li t2,0
; br_table t2,[MachLabel(1),MachLabel(2)]##tmp1=a1,tmp2=a2
; block1:
; j label3
; block2:
; fmv.d fa2,ft9
; j label3
; block3:
; li a0,0
; ret
;
; Disassembled:
; block0: ; offset 0x0
; mv a1, zero
; fmv.w.x ft9, a1
; mv t1, zero
; fmv.w.x fa6, t1
; fmv.x.w a1, fa6
; not a3, a1
; fmv.w.x ft1, a3
; fmv.x.w t1, ft1
; fmv.x.w a0, ft1
; or a2, t1, a0
; fmv.w.x fa2, a2
; mv t2, zero
; slli t6, t2, 0x20
; srli t6, t6, 0x20
; addi a2, zero, 1
; bltu t6, a2, 0xc
; auipc a2, 0
; jalr zero, a2, 0x28
; auipc a1, 0
; slli a2, t6, 3
; add a1, a1, a2
; jalr zero, a1, 0x10
; auipc a2, 0
; jalr zero, a2, 0xc
; block1: ; offset 0x60
; j 8
; block2: ; offset 0x64
; fmv.d fa2, ft9
; block3: ; offset 0x68
; mv a0, zero
; ret