fix codegen riscv64 normalize_cmp_value. (#5873)

* fix issue5839

* add target.

* fix normalize_cmp_value.

* fix test failutre.

* fix test failure.

* fix parameter type.

* Update cranelift/codegen/src/isa/riscv64/inst.isle

Co-authored-by: Jamey Sharp <jamey@minilop.net>

* Update cranelift/codegen/src/isa/riscv64/lower.isle

Co-authored-by: Jamey Sharp <jamey@minilop.net>

* remove convert rule from IntCC to ExtendOp

---------

Co-authored-by: Jamey Sharp <jamey@minilop.net>
This commit is contained in:
yuyang
2023-03-01 07:00:23 +08:00
committed by GitHub
parent 0e9a48afd5
commit 32cfd60877
9 changed files with 109 additions and 66 deletions

View File

@@ -1910,21 +1910,20 @@
(decl lower_cond_br (IntCC ValueRegs VecMachLabel Type) Unit)
(extern constructor lower_cond_br lower_cond_br)
(decl intcc_to_extend_op (IntCC) ExtendOp)
(extern constructor intcc_to_extend_op intcc_to_extend_op)
;; Normalize a value for comparision.
;;
;; This ensures that types smaller than a register don't accidentally
;; pass undefined high bits when being compared as a full register.
(decl normalize_cmp_value (Type ValueRegs) ValueRegs)
(decl normalize_cmp_value (Type ValueRegs ExtendOp) ValueRegs)
(rule (normalize_cmp_value $I8 r)
(value_reg (alu_rr_imm12 (AluOPRRI.Andi) r (imm12_const 255))))
(rule (normalize_cmp_value $I16 r)
(value_reg (alu_rrr (AluOPRRR.And) r (imm $I16 65535))))
(rule (normalize_cmp_value $I32 r)
(value_reg (alu_rr_imm12 (AluOPRRI.Addiw) r (imm12_const 0))))
(rule 1 (normalize_cmp_value (fits_in_32 ity) r op)
(extend r op ity $I64))
(rule (normalize_cmp_value $I64 r) r)
(rule (normalize_cmp_value $I128 r) r)
(rule (normalize_cmp_value $I64 r _) r)
(rule (normalize_cmp_value $I128 r _) r)
;; Convert a truthy value, possibly of more than one register (an
;; I128), to one register. If narrower than 64 bits, must have already
@@ -1940,7 +1939,7 @@
;; Default behavior for branching based on an input value.
(rule
(lower_branch (brif v @ (value_type ty) _ _) targets)
(lower_cond_br (IntCC.NotEqual) (normalize_cmp_value ty v) targets ty))
(lower_cond_br (IntCC.NotEqual) (normalize_cmp_value ty v (ExtendOp.Zero)) targets ty))
;; Special case for SI128 to reify the comparison value and branch on it.
(rule 2
@@ -2118,7 +2117,7 @@
(rule
0
(lower_bmask (fits_in_64 _) (fits_in_64 in_ty) val)
(let ((input Reg (normalize_cmp_value in_ty val))
(let ((input Reg (normalize_cmp_value in_ty val (ExtendOp.Zero)))
(zero Reg (zero_reg))
(ones Reg (load_imm12 -1)))
(value_reg (gen_select_reg (IntCC.Equal) zero input zero ones))))

View File

@@ -143,7 +143,7 @@ mod tests {
assert_eq!(
format!("{:?}", fde),
"FrameDescriptionEntry { address: Constant(4321), length: 16, lsda: None, instructions: [] }"
"FrameDescriptionEntry { address: Constant(4321), length: 20, lsda: None, instructions: [] }"
);
}

View File

@@ -626,12 +626,12 @@
;;;;; Rules for `select`;;;;;;;;;
(rule
(lower (has_type ty (select c @ (value_type cty) x y)))
(gen_select ty (truthy_to_reg cty (normalize_cmp_value cty c)) x y))
(gen_select ty (truthy_to_reg cty (normalize_cmp_value cty c (ExtendOp.Zero))) x y))
(rule 1
(lower (has_type (fits_in_64 ty) (select (icmp cc a b @ (value_type in_ty)) x y)))
(let ((a Reg (normalize_cmp_value in_ty a))
(b Reg (normalize_cmp_value in_ty b)))
(let ((a Reg (normalize_cmp_value in_ty a (intcc_to_extend_op cc)))
(b Reg (normalize_cmp_value in_ty b (intcc_to_extend_op cc))))
(gen_select_reg cc a b x y)))
;;;;; Rules for `bitselect`;;;;;;;;;
@@ -851,7 +851,7 @@
(rule -1
(lower (has_type ty (select_spectre_guard c @ (value_type cty) x y)))
(gen_select ty (truthy_to_reg cty (normalize_cmp_value cty c)) x y))
(gen_select ty (truthy_to_reg cty (normalize_cmp_value cty c (ExtendOp.Zero))) x y))
;;;;; Rules for `bmask`;;;;;;;;;
(rule

View File

@@ -3,7 +3,7 @@
// Pull in the ISLE generated code.
#[allow(unused)]
pub mod generated_code;
use generated_code::{Context, MInst};
use generated_code::{Context, ExtendOp, MInst};
// Types that the generated ISLE code uses via `use super::*`.
use super::{writable_zero_reg, zero_reg};
@@ -60,7 +60,22 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Riscv64Backend> {
_ => unreachable!(),
}
}
fn intcc_to_extend_op(&mut self, cc: &IntCC) -> ExtendOp {
use IntCC::*;
match *cc {
Equal
| NotEqual
| UnsignedLessThan
| UnsignedGreaterThanOrEqual
| UnsignedGreaterThan
| UnsignedLessThanOrEqual => ExtendOp::Zero,
SignedLessThan
| SignedGreaterThanOrEqual
| SignedGreaterThan
| SignedLessThanOrEqual => ExtendOp::Signed,
}
}
fn lower_cond_br(
&mut self,
cc: &IntCC,

View File

@@ -16,8 +16,9 @@ block2:
; VCode:
; block0:
; sext.w a2,a0
; bne a2,zero,taken(label1),not_taken(label2)
; slli a2,a0,32
; srli a4,a2,32
; bne a4,zero,taken(label1),not_taken(label2)
; block1:
; j label3
; block2:
@@ -28,11 +29,12 @@ block2:
;
; Disassembled:
; block0: ; offset 0x0
; sext.w a2, a0
; bnez a2, 8
; block1: ; offset 0x8
; slli a2, a0, 0x20
; srli a4, a2, 0x20
; bnez a4, 8
; block1: ; offset 0xc
; addi a0, zero, 0x61
; block2: ; offset 0xc
; block2: ; offset 0x10
; ret
function %cold_annotation(i32) -> i32 {
@@ -49,8 +51,9 @@ block2 cold:
; VCode:
; block0:
; sext.w a2,a0
; bne a2,zero,taken(label1),not_taken(label2)
; slli a2,a0,32
; srli a4,a2,32
; bne a4,zero,taken(label1),not_taken(label2)
; block1:
; j label3
; block3:
@@ -61,11 +64,12 @@ block2 cold:
;
; Disassembled:
; block0: ; offset 0x0
; sext.w a2, a0
; beqz a2, 8
; block1: ; offset 0x8
; slli a2, a0, 0x20
; srli a4, a2, 0x20
; beqz a4, 8
; block1: ; offset 0xc
; ret
; block2: ; offset 0xc
; block2: ; offset 0x10
; addi a0, zero, 0x61
; j -8

View File

@@ -675,10 +675,9 @@ block1:
; VCode:
; block0:
; lui a1,16
; addi a1,a1,4095
; and a3,a0,a1
; bne a3,zero,taken(label1),not_taken(label2)
; slli t2,a0,48
; srli a1,t2,48
; bne a1,zero,taken(label1),not_taken(label2)
; block1:
; j label3
; block2:
@@ -688,10 +687,9 @@ block1:
;
; Disassembled:
; block0: ; offset 0x0
; lui a1, 0x10
; addi a1, a1, -1
; and a3, a0, a1
; block1: ; offset 0xc
; slli t2, a0, 0x30
; srli a1, t2, 0x30
; block1: ; offset 0x8
; ret
function %i32_brif(i32){
@@ -705,8 +703,9 @@ block1:
; VCode:
; block0:
; sext.w t2,a0
; bne t2,zero,taken(label1),not_taken(label2)
; slli t2,a0,32
; srli a1,t2,32
; bne a1,zero,taken(label1),not_taken(label2)
; block1:
; j label3
; block2:
@@ -716,8 +715,9 @@ block1:
;
; Disassembled:
; block0: ; offset 0x0
; sext.w t2, a0
; block1: ; offset 0x4
; slli t2, a0, 0x20
; srli a1, t2, 0x20
; block1: ; offset 0x8
; ret
function %i64_brif(i64){

View File

@@ -108,18 +108,22 @@ block0(v0: i32, v1: i8, v2: i8):
; VCode:
; block0:
; sext.w a3,a0
; li a4,42
; sext.w a5,a4
; select_reg a0,a1,a2##condition=(a3 eq a5)
; slli a3,a0,32
; srli a3,a3,32
; li a5,42
; slli a7,a5,32
; srli t4,a7,32
; select_reg a0,a1,a2##condition=(a3 eq t4)
; ret
;
; Disassembled:
; block0: ; offset 0x0
; sext.w a3, a0
; addi a4, zero, 0x2a
; sext.w a5, a4
; beq a3, a5, 0xc
; slli a3, a0, 0x20
; srli a3, a3, 0x20
; addi a5, zero, 0x2a
; slli a7, a5, 0x20
; srli t4, a7, 0x20
; beq a3, t4, 0xc
; ori a0, a2, 0
; j 8
; ori a0, a1, 0

View File

@@ -150,17 +150,20 @@ block0(v0: i32):
; VCode:
; block0:
; sext.w t2,a0
; li a1,-1
; select_reg a1,zero,a1##condition=(zero eq t2)
; slli t2,a0,32
; srli a1,t2,32
; li a3,-1
; select_reg a1,zero,a3##condition=(zero eq a1)
; mv a0,a1
; ret
;
; Disassembled:
; block0: ; offset 0x0
; sext.w t2, a0
; addi a1, zero, -1
; beq zero, t2, 8
; slli t2, a0, 0x20
; srli a1, t2, 0x20
; addi a3, zero, -1
; beq zero, a1, 0xc
; ori a1, a3, 0
; j 8
; ori a1, zero, 0
; ori a0, a1, 0
@@ -174,22 +177,20 @@ block0(v0: i16):
; VCode:
; block0:
; lui a1,16
; addi a1,a1,4095
; and a3,a0,a1
; li a5,-1
; select_reg a1,zero,a5##condition=(zero eq a3)
; slli t2,a0,48
; srli a1,t2,48
; li a3,-1
; select_reg a1,zero,a3##condition=(zero eq a1)
; mv a0,a1
; ret
;
; Disassembled:
; block0: ; offset 0x0
; lui a1, 0x10
; addi a1, a1, -1
; and a3, a0, a1
; addi a5, zero, -1
; beq zero, a3, 0xc
; ori a1, a5, 0
; slli t2, a0, 0x30
; srli a1, t2, 0x30
; addi a3, zero, -1
; beq zero, a1, 0xc
; ori a1, a3, 0
; j 8
; ori a1, zero, 0
; ori a0, a1, 0

View File

@@ -0,0 +1,20 @@
test interpret
test run
set opt_level=speed
target aarch64
target s390x
target x86_64
target riscv64
function %a(i8, i8) -> i32 {
block0(v0: i8, v1: i8):
v2 = icmp sle v0, v1
v3 = uextend.i32 v2
v4 = iconst.i32 0
v5 = iconst.i32 1
v6 = icmp.i32 eq v3, v4 ; v4 = 0
v7 = select v6, v5, v3 ; v5 = 1
return v7
}
; run: %a(20, -11) == 1