fix codegen riscv64 normalize_cmp_value. (#5873)

* fix issue5839

* add target.

* fix normalize_cmp_value.

* fix test failutre.

* fix test failure.

* fix parameter type.

* Update cranelift/codegen/src/isa/riscv64/inst.isle

Co-authored-by: Jamey Sharp <jamey@minilop.net>

* Update cranelift/codegen/src/isa/riscv64/lower.isle

Co-authored-by: Jamey Sharp <jamey@minilop.net>

* remove convert rule from IntCC to ExtendOp

---------

Co-authored-by: Jamey Sharp <jamey@minilop.net>
This commit is contained in:
yuyang
2023-03-01 07:00:23 +08:00
committed by GitHub
parent 0e9a48afd5
commit 32cfd60877
9 changed files with 109 additions and 66 deletions

View File

@@ -1910,21 +1910,20 @@
(decl lower_cond_br (IntCC ValueRegs VecMachLabel Type) Unit) (decl lower_cond_br (IntCC ValueRegs VecMachLabel Type) Unit)
(extern constructor lower_cond_br lower_cond_br) (extern constructor lower_cond_br lower_cond_br)
(decl intcc_to_extend_op (IntCC) ExtendOp)
(extern constructor intcc_to_extend_op intcc_to_extend_op)
;; Normalize a value for comparision. ;; Normalize a value for comparision.
;; ;;
;; This ensures that types smaller than a register don't accidentally ;; This ensures that types smaller than a register don't accidentally
;; pass undefined high bits when being compared as a full register. ;; pass undefined high bits when being compared as a full register.
(decl normalize_cmp_value (Type ValueRegs) ValueRegs) (decl normalize_cmp_value (Type ValueRegs ExtendOp) ValueRegs)
(rule (normalize_cmp_value $I8 r) (rule 1 (normalize_cmp_value (fits_in_32 ity) r op)
(value_reg (alu_rr_imm12 (AluOPRRI.Andi) r (imm12_const 255)))) (extend r op ity $I64))
(rule (normalize_cmp_value $I16 r)
(value_reg (alu_rrr (AluOPRRR.And) r (imm $I16 65535))))
(rule (normalize_cmp_value $I32 r)
(value_reg (alu_rr_imm12 (AluOPRRI.Addiw) r (imm12_const 0))))
(rule (normalize_cmp_value $I64 r) r) (rule (normalize_cmp_value $I64 r _) r)
(rule (normalize_cmp_value $I128 r) r) (rule (normalize_cmp_value $I128 r _) r)
;; Convert a truthy value, possibly of more than one register (an ;; Convert a truthy value, possibly of more than one register (an
;; I128), to one register. If narrower than 64 bits, must have already ;; I128), to one register. If narrower than 64 bits, must have already
@@ -1940,7 +1939,7 @@
;; Default behavior for branching based on an input value. ;; Default behavior for branching based on an input value.
(rule (rule
(lower_branch (brif v @ (value_type ty) _ _) targets) (lower_branch (brif v @ (value_type ty) _ _) targets)
(lower_cond_br (IntCC.NotEqual) (normalize_cmp_value ty v) targets ty)) (lower_cond_br (IntCC.NotEqual) (normalize_cmp_value ty v (ExtendOp.Zero)) targets ty))
;; Special case for SI128 to reify the comparison value and branch on it. ;; Special case for SI128 to reify the comparison value and branch on it.
(rule 2 (rule 2
@@ -2118,7 +2117,7 @@
(rule (rule
0 0
(lower_bmask (fits_in_64 _) (fits_in_64 in_ty) val) (lower_bmask (fits_in_64 _) (fits_in_64 in_ty) val)
(let ((input Reg (normalize_cmp_value in_ty val)) (let ((input Reg (normalize_cmp_value in_ty val (ExtendOp.Zero)))
(zero Reg (zero_reg)) (zero Reg (zero_reg))
(ones Reg (load_imm12 -1))) (ones Reg (load_imm12 -1)))
(value_reg (gen_select_reg (IntCC.Equal) zero input zero ones)))) (value_reg (gen_select_reg (IntCC.Equal) zero input zero ones))))

View File

@@ -143,7 +143,7 @@ mod tests {
assert_eq!( assert_eq!(
format!("{:?}", fde), format!("{:?}", fde),
"FrameDescriptionEntry { address: Constant(4321), length: 16, lsda: None, instructions: [] }" "FrameDescriptionEntry { address: Constant(4321), length: 20, lsda: None, instructions: [] }"
); );
} }

View File

@@ -626,12 +626,12 @@
;;;;; Rules for `select`;;;;;;;;; ;;;;; Rules for `select`;;;;;;;;;
(rule (rule
(lower (has_type ty (select c @ (value_type cty) x y))) (lower (has_type ty (select c @ (value_type cty) x y)))
(gen_select ty (truthy_to_reg cty (normalize_cmp_value cty c)) x y)) (gen_select ty (truthy_to_reg cty (normalize_cmp_value cty c (ExtendOp.Zero))) x y))
(rule 1 (rule 1
(lower (has_type (fits_in_64 ty) (select (icmp cc a b @ (value_type in_ty)) x y))) (lower (has_type (fits_in_64 ty) (select (icmp cc a b @ (value_type in_ty)) x y)))
(let ((a Reg (normalize_cmp_value in_ty a)) (let ((a Reg (normalize_cmp_value in_ty a (intcc_to_extend_op cc)))
(b Reg (normalize_cmp_value in_ty b))) (b Reg (normalize_cmp_value in_ty b (intcc_to_extend_op cc))))
(gen_select_reg cc a b x y))) (gen_select_reg cc a b x y)))
;;;;; Rules for `bitselect`;;;;;;;;; ;;;;; Rules for `bitselect`;;;;;;;;;
@@ -851,7 +851,7 @@
(rule -1 (rule -1
(lower (has_type ty (select_spectre_guard c @ (value_type cty) x y))) (lower (has_type ty (select_spectre_guard c @ (value_type cty) x y)))
(gen_select ty (truthy_to_reg cty (normalize_cmp_value cty c)) x y)) (gen_select ty (truthy_to_reg cty (normalize_cmp_value cty c (ExtendOp.Zero))) x y))
;;;;; Rules for `bmask`;;;;;;;;; ;;;;; Rules for `bmask`;;;;;;;;;
(rule (rule

View File

@@ -3,7 +3,7 @@
// Pull in the ISLE generated code. // Pull in the ISLE generated code.
#[allow(unused)] #[allow(unused)]
pub mod generated_code; pub mod generated_code;
use generated_code::{Context, MInst}; use generated_code::{Context, ExtendOp, MInst};
// Types that the generated ISLE code uses via `use super::*`. // Types that the generated ISLE code uses via `use super::*`.
use super::{writable_zero_reg, zero_reg}; use super::{writable_zero_reg, zero_reg};
@@ -60,7 +60,22 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Riscv64Backend> {
_ => unreachable!(), _ => unreachable!(),
} }
} }
fn intcc_to_extend_op(&mut self, cc: &IntCC) -> ExtendOp {
use IntCC::*;
match *cc {
Equal
| NotEqual
| UnsignedLessThan
| UnsignedGreaterThanOrEqual
| UnsignedGreaterThan
| UnsignedLessThanOrEqual => ExtendOp::Zero,
SignedLessThan
| SignedGreaterThanOrEqual
| SignedGreaterThan
| SignedLessThanOrEqual => ExtendOp::Signed,
}
}
fn lower_cond_br( fn lower_cond_br(
&mut self, &mut self,
cc: &IntCC, cc: &IntCC,

View File

@@ -16,8 +16,9 @@ block2:
; VCode: ; VCode:
; block0: ; block0:
; sext.w a2,a0 ; slli a2,a0,32
; bne a2,zero,taken(label1),not_taken(label2) ; srli a4,a2,32
; bne a4,zero,taken(label1),not_taken(label2)
; block1: ; block1:
; j label3 ; j label3
; block2: ; block2:
@@ -28,11 +29,12 @@ block2:
; ;
; Disassembled: ; Disassembled:
; block0: ; offset 0x0 ; block0: ; offset 0x0
; sext.w a2, a0 ; slli a2, a0, 0x20
; bnez a2, 8 ; srli a4, a2, 0x20
; block1: ; offset 0x8 ; bnez a4, 8
; block1: ; offset 0xc
; addi a0, zero, 0x61 ; addi a0, zero, 0x61
; block2: ; offset 0xc ; block2: ; offset 0x10
; ret ; ret
function %cold_annotation(i32) -> i32 { function %cold_annotation(i32) -> i32 {
@@ -49,8 +51,9 @@ block2 cold:
; VCode: ; VCode:
; block0: ; block0:
; sext.w a2,a0 ; slli a2,a0,32
; bne a2,zero,taken(label1),not_taken(label2) ; srli a4,a2,32
; bne a4,zero,taken(label1),not_taken(label2)
; block1: ; block1:
; j label3 ; j label3
; block3: ; block3:
@@ -61,11 +64,12 @@ block2 cold:
; ;
; Disassembled: ; Disassembled:
; block0: ; offset 0x0 ; block0: ; offset 0x0
; sext.w a2, a0 ; slli a2, a0, 0x20
; beqz a2, 8 ; srli a4, a2, 0x20
; block1: ; offset 0x8 ; beqz a4, 8
; block1: ; offset 0xc
; ret ; ret
; block2: ; offset 0xc ; block2: ; offset 0x10
; addi a0, zero, 0x61 ; addi a0, zero, 0x61
; j -8 ; j -8

View File

@@ -675,10 +675,9 @@ block1:
; VCode: ; VCode:
; block0: ; block0:
; lui a1,16 ; slli t2,a0,48
; addi a1,a1,4095 ; srli a1,t2,48
; and a3,a0,a1 ; bne a1,zero,taken(label1),not_taken(label2)
; bne a3,zero,taken(label1),not_taken(label2)
; block1: ; block1:
; j label3 ; j label3
; block2: ; block2:
@@ -688,10 +687,9 @@ block1:
; ;
; Disassembled: ; Disassembled:
; block0: ; offset 0x0 ; block0: ; offset 0x0
; lui a1, 0x10 ; slli t2, a0, 0x30
; addi a1, a1, -1 ; srli a1, t2, 0x30
; and a3, a0, a1 ; block1: ; offset 0x8
; block1: ; offset 0xc
; ret ; ret
function %i32_brif(i32){ function %i32_brif(i32){
@@ -705,8 +703,9 @@ block1:
; VCode: ; VCode:
; block0: ; block0:
; sext.w t2,a0 ; slli t2,a0,32
; bne t2,zero,taken(label1),not_taken(label2) ; srli a1,t2,32
; bne a1,zero,taken(label1),not_taken(label2)
; block1: ; block1:
; j label3 ; j label3
; block2: ; block2:
@@ -716,8 +715,9 @@ block1:
; ;
; Disassembled: ; Disassembled:
; block0: ; offset 0x0 ; block0: ; offset 0x0
; sext.w t2, a0 ; slli t2, a0, 0x20
; block1: ; offset 0x4 ; srli a1, t2, 0x20
; block1: ; offset 0x8
; ret ; ret
function %i64_brif(i64){ function %i64_brif(i64){

View File

@@ -108,18 +108,22 @@ block0(v0: i32, v1: i8, v2: i8):
; VCode: ; VCode:
; block0: ; block0:
; sext.w a3,a0 ; slli a3,a0,32
; li a4,42 ; srli a3,a3,32
; sext.w a5,a4 ; li a5,42
; select_reg a0,a1,a2##condition=(a3 eq a5) ; slli a7,a5,32
; srli t4,a7,32
; select_reg a0,a1,a2##condition=(a3 eq t4)
; ret ; ret
; ;
; Disassembled: ; Disassembled:
; block0: ; offset 0x0 ; block0: ; offset 0x0
; sext.w a3, a0 ; slli a3, a0, 0x20
; addi a4, zero, 0x2a ; srli a3, a3, 0x20
; sext.w a5, a4 ; addi a5, zero, 0x2a
; beq a3, a5, 0xc ; slli a7, a5, 0x20
; srli t4, a7, 0x20
; beq a3, t4, 0xc
; ori a0, a2, 0 ; ori a0, a2, 0
; j 8 ; j 8
; ori a0, a1, 0 ; ori a0, a1, 0

View File

@@ -150,17 +150,20 @@ block0(v0: i32):
; VCode: ; VCode:
; block0: ; block0:
; sext.w t2,a0 ; slli t2,a0,32
; li a1,-1 ; srli a1,t2,32
; select_reg a1,zero,a1##condition=(zero eq t2) ; li a3,-1
; select_reg a1,zero,a3##condition=(zero eq a1)
; mv a0,a1 ; mv a0,a1
; ret ; ret
; ;
; Disassembled: ; Disassembled:
; block0: ; offset 0x0 ; block0: ; offset 0x0
; sext.w t2, a0 ; slli t2, a0, 0x20
; addi a1, zero, -1 ; srli a1, t2, 0x20
; beq zero, t2, 8 ; addi a3, zero, -1
; beq zero, a1, 0xc
; ori a1, a3, 0
; j 8 ; j 8
; ori a1, zero, 0 ; ori a1, zero, 0
; ori a0, a1, 0 ; ori a0, a1, 0
@@ -174,22 +177,20 @@ block0(v0: i16):
; VCode: ; VCode:
; block0: ; block0:
; lui a1,16 ; slli t2,a0,48
; addi a1,a1,4095 ; srli a1,t2,48
; and a3,a0,a1 ; li a3,-1
; li a5,-1 ; select_reg a1,zero,a3##condition=(zero eq a1)
; select_reg a1,zero,a5##condition=(zero eq a3)
; mv a0,a1 ; mv a0,a1
; ret ; ret
; ;
; Disassembled: ; Disassembled:
; block0: ; offset 0x0 ; block0: ; offset 0x0
; lui a1, 0x10 ; slli t2, a0, 0x30
; addi a1, a1, -1 ; srli a1, t2, 0x30
; and a3, a0, a1 ; addi a3, zero, -1
; addi a5, zero, -1 ; beq zero, a1, 0xc
; beq zero, a3, 0xc ; ori a1, a3, 0
; ori a1, a5, 0
; j 8 ; j 8
; ori a1, zero, 0 ; ori a1, zero, 0
; ori a0, a1, 0 ; ori a0, a1, 0

View File

@@ -0,0 +1,20 @@
test interpret
test run
set opt_level=speed
target aarch64
target s390x
target x86_64
target riscv64
function %a(i8, i8) -> i32 {
block0(v0: i8, v1: i8):
v2 = icmp sle v0, v1
v3 = uextend.i32 v2
v4 = iconst.i32 0
v5 = iconst.i32 1
v6 = icmp.i32 eq v3, v4 ; v4 = 0
v7 = select v6, v5, v3 ; v5 = 1
return v7
}
; run: %a(20, -11) == 1