diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index 163d0cb63a..fd69eb4bdd 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -186,14 +186,14 @@ (alu_rrr (AluOPRRR.RemU) x y))) ;;;; Rules for `and` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule -1 (lower (has_type (fits_in_64 ty) (band x y))) +(rule -1 (lower (has_type (fits_in_64 (ty_int ty)) (band x y))) (alu_rrr (AluOPRRR.And) x y)) ;; Special cases for when one operand is an immediate that fits in 12 bits. -(rule 2 (lower (has_type (fits_in_64 ty) (band x (imm12_from_value y)))) +(rule 2 (lower (has_type (fits_in_64 (ty_int ty)) (band x (imm12_from_value y)))) (alu_rr_imm12 (AluOPRRI.Andi) x y)) -(rule 1 (lower (has_type (fits_in_64 ty) (band (imm12_from_value x) y))) +(rule 1 (lower (has_type (fits_in_64 (ty_int ty)) (band (imm12_from_value x) y))) (alu_rr_imm12 (AluOPRRI.Andi) y x)) (rule (lower (has_type $I128 (band x y))) @@ -201,6 +201,7 @@ (rule (lower (has_type $F32 (band x y))) (lower_float_binary (AluOPRRR.And) x y $F32)) + (rule (lower (has_type $F64 (band x y))) (lower_float_binary (AluOPRRR.And) x y $F64)) @@ -208,18 +209,21 @@ ;; by Cranelift's `band_not` instruction that is legalized into the simpler ;; forms early on. -(rule 3 (lower (has_type (fits_in_64 ty) (band x (bnot y)))) +(rule 3 (lower (has_type (fits_in_64 (ty_int ty)) (band x (bnot y)))) (if-let $true (has_zbb)) (gen_andn x y)) -(rule 4 (lower (has_type (fits_in_64 ty) (band (bnot y) x))) + +(rule 4 (lower (has_type (fits_in_64 (ty_int ty)) (band (bnot y) x))) (if-let $true (has_zbb)) (gen_andn x y)) + (rule 5 (lower (has_type $I128 (band x (bnot y)))) (if-let $true (has_zbb)) (let ((low Reg (gen_andn (value_regs_get x 0) (value_regs_get y 0))) (high Reg (gen_andn (value_regs_get x 1) (value_regs_get y 1)))) (value_regs low high))) + (rule 6 (lower (has_type $I128 (band (bnot y) x))) (if-let $true (has_zbb)) (let @@ -229,19 +233,22 @@ ;;;; Rules for `or` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule -1 (lower (has_type (fits_in_64 ty) (bor x y))) +(rule -1 (lower (has_type (fits_in_64 (ty_int ty)) (bor x y))) (alu_rrr (AluOPRRR.Or) x y)) ;; Special cases for when one operand is an immediate that fits in 12 bits. -(rule 2 (lower (has_type (fits_in_64 ty) (bor x (imm12_from_value y)))) +(rule 2 (lower (has_type (fits_in_64 (ty_int ty)) (bor x (imm12_from_value y)))) (alu_rr_imm12 (AluOPRRI.Ori) x y)) -(rule 1 (lower (has_type (fits_in_64 ty) (bor (imm12_from_value x) y))) +(rule 1 (lower (has_type (fits_in_64 (ty_int ty)) (bor (imm12_from_value x) y))) (alu_rr_imm12 (AluOPRRI.Ori) y x)) + (rule (lower (has_type $I128 (bor x y))) (lower_b128_binary (AluOPRRR.Or) x y)) + (rule (lower (has_type $F32 (bor x y))) (lower_float_binary (AluOPRRR.Or) x y $F32)) + (rule (lower (has_type $F64 (bor x y))) (lower_float_binary (AluOPRRR.Or) x y $F64)) @@ -249,10 +256,11 @@ ;; by Cranelift's `bor_not` instruction that is legalized into the simpler ;; forms early on. -(rule 3 (lower (has_type (fits_in_64 ty) (bor x (bnot y)))) +(rule 3 (lower (has_type (fits_in_64 (ty_int ty)) (bor x (bnot y)))) (if-let $true (has_zbb)) (gen_orn x y)) -(rule 4 (lower (has_type (fits_in_64 ty) (bor (bnot y) x))) + +(rule 4 (lower (has_type (fits_in_64 (ty_int ty)) (bor (bnot y) x))) (if-let $true (has_zbb)) (gen_orn x y)) @@ -262,6 +270,7 @@ ((low Reg (gen_orn (value_regs_get x 0) (value_regs_get y 0))) (high Reg (gen_orn (value_regs_get x 1) (value_regs_get y 1)))) (value_regs low high))) + (rule 6 (lower (has_type $I128 (bor (bnot y) x))) (if-let $true (has_zbb)) (let @@ -271,40 +280,43 @@ ;;;; Rules for `xor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule -1 (lower (has_type (fits_in_64 ty) (bxor x y))) +(rule -1 (lower (has_type (fits_in_64 (ty_int ty)) (bxor x y))) (alu_rrr (AluOPRRR.Xor) x y)) ;; Special cases for when one operand is an immediate that fits in 12 bits. -(rule 2 (lower (has_type (fits_in_64 ty) (bxor x (imm12_from_value y)))) +(rule 2 (lower (has_type (fits_in_64 (ty_int ty)) (bxor x (imm12_from_value y)))) (alu_rr_imm12 (AluOPRRI.Xori) x y)) -(rule 1 (lower (has_type (fits_in_64 ty) (bxor (imm12_from_value x) y))) +(rule 1 (lower (has_type (fits_in_64 (ty_int ty)) (bxor (imm12_from_value x) y))) (alu_rr_imm12 (AluOPRRI.Xori) y x)) + (rule (lower (has_type $I128 (bxor x y))) (lower_b128_binary (AluOPRRR.Xor) x y)) + (rule (lower (has_type $F32 (bxor x y))) (lower_float_binary (AluOPRRR.Xor) x y $F32)) + (rule (lower (has_type $F64 (bxor x y))) (lower_float_binary (AluOPRRR.Xor) x y $F64)) ;;;; Rules for `bnot` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule -1 (lower (has_type fits_in_64 (bnot x))) +(rule -1 (lower (has_type (fits_in_64 (ty_int ty)) (bnot x))) (alu_rr_imm12 (AluOPRRI.Xori) x (imm_from_neg_bits -1))) (rule (lower (has_type $I128 (bnot x))) (bnot_128 x)) + (rule (lower (has_type $F32 (bnot x))) - (lower_float_bnot x $F32) -) + (lower_float_bnot x $F32)) + (rule (lower (has_type $F64 (bnot x))) - (lower_float_bnot x $F64) -) + (lower_float_bnot x $F64)) ;;;; Rules for `bit_reverse` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(rule (lower (has_type ty (bitrev x))) +(rule (lower (has_type (fits_in_64 (ty_int ty)) (bitrev x))) (lower_bit_reverse x ty)) (rule 1 (lower (has_type $I128 (bitrev x))) diff --git a/cranelift/filetests/filetests/isa/riscv64/bitops-float.clif b/cranelift/filetests/filetests/isa/riscv64/bitops-float.clif new file mode 100644 index 0000000000..ffd36afd07 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/bitops-float.clif @@ -0,0 +1,79 @@ +test compile precise-output +set unwind_info=false +target riscv64 has_zbb + +;; This is a regression test for a bug in the RISC-V backend where +;; When enabling `Zbb` the backend would try to use one of the +;; integer instructions (`orn`) to implement a float operation +;; causing a regalloc panic. + +function %or_not_optimization_float() -> i32 system_v { +block0: + v0 = iconst.i32 0 + v1 = f32const 0.0 + v2 = bnot v1 + v3 = bor v2, v2 + br_table v0, block1(v3), [block1(v1)] + +block1(v4: f32): + return v0 +} + +; VCode: +; block0: +; li a1,0 +; fmv.w.x ft9,a1 +; li t1,0 +; fmv.w.x fa6,t1 +; fmv.x.w a1,fa6 +; not a3,a1 +; fmv.w.x ft1,a3 +; fmv.x.w t1,ft1 +; fmv.x.w a0,ft1 +; or a2,t1,a0 +; fmv.w.x fa2,a2 +; li t2,0 +; br_table t2,[MachLabel(1),MachLabel(2)]##tmp1=a1,tmp2=a2 +; block1: +; j label3 +; block2: +; fmv.d fa2,ft9 +; j label3 +; block3: +; li a0,0 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; mv a1, zero +; fmv.w.x ft9, a1 +; mv t1, zero +; fmv.w.x fa6, t1 +; fmv.x.w a1, fa6 +; not a3, a1 +; fmv.w.x ft1, a3 +; fmv.x.w t1, ft1 +; fmv.x.w a0, ft1 +; or a2, t1, a0 +; fmv.w.x fa2, a2 +; mv t2, zero +; slli t6, t2, 0x20 +; srli t6, t6, 0x20 +; addi a2, zero, 1 +; bltu t6, a2, 0xc +; auipc a2, 0 +; jalr zero, a2, 0x28 +; auipc a1, 0 +; slli a2, t6, 3 +; add a1, a1, a2 +; jalr zero, a1, 0x10 +; auipc a2, 0 +; jalr zero, a2, 0xc +; block1: ; offset 0x60 +; j 8 +; block2: ; offset 0x64 +; fmv.d fa2, ft9 +; block3: ; offset 0x68 +; mv a0, zero +; ret +