riscv64: Add Zba extension instructions (#6087)
* riscv64: Use `add.uw` to zero extend * riscv64: Implement `add.uw` optimizations * riscv64: Add `Zba` `iadd+ishl` optimizations * riscv64: Add `shl+uextend` optimizations based on `Zba` * riscv64: Fix some issues with `Zba` instructions * riscv64: Restrict shnadd selection * riscv64: Fix `extend` priorities * riscv64: Remove redundant `addw` rule * riscv64: Specify type for `add` extend rules * riscv64: Use `u64_from_imm64` extractor instead of `uimm8` * riscv64: Restrict `uextend` in `shnadd.uw` rules * riscv64: Use concrete type in `slli.uw` rule * riscv64: Add extra arithmetic extends tests Co-authored-by: Jamey Sharp <jsharp@fastly.com> * riscv64: Make `Adduw` types concrete * riscv64: Add extra arithmetic extend tests * riscv64: Add `sextend`+Arithmetic rules * riscv64: Fix whitespace * cranelift: Move arithmetic extends tests with i128 to separate file --------- Co-authored-by: Jamey Sharp <jsharp@fastly.com>
This commit is contained in:
@@ -1157,9 +1157,15 @@
|
||||
(let ((val Reg (value_regs_get val 0)))
|
||||
(alu_rr_imm12 (AluOPRRI.Zexth) val (imm12_const 0))))
|
||||
|
||||
;; With `zba` we have a `zext.w` instruction
|
||||
(rule 2 (extend val (ExtendOp.Zero) $I32 $I64)
|
||||
(if-let $true (has_zba))
|
||||
(let ((val Reg (value_regs_get val 0)))
|
||||
(alu_rrr (AluOPRRR.Adduw) val (zero_reg))))
|
||||
|
||||
;;; Signed rules extending to I128
|
||||
;; Extend the bottom part, and extract the sign bit from the bottom as the top
|
||||
(rule 2 (extend val (ExtendOp.Signed) (fits_in_64 from_ty) $I128)
|
||||
(rule 3 (extend val (ExtendOp.Signed) (fits_in_64 from_ty) $I128)
|
||||
(let ((val Reg (value_regs_get val 0))
|
||||
(low Reg (extend val (ExtendOp.Signed) from_ty $I64))
|
||||
(high Reg (alu_rr_imm12 (AluOPRRI.Srai) low (imm12_const 63))))
|
||||
|
||||
@@ -248,7 +248,6 @@ fn test_riscv64_binemit() {
|
||||
0x28755593,
|
||||
));
|
||||
|
||||
//
|
||||
insns.push(TestUnit::new(
|
||||
Inst::AluRRR {
|
||||
alu_op: AluOPRRR::Adduw,
|
||||
@@ -256,10 +255,21 @@ fn test_riscv64_binemit() {
|
||||
rs1: a0(),
|
||||
rs2: zero_reg(),
|
||||
},
|
||||
"add.uw a1,a0,zero",
|
||||
"zext.w a1,a0",
|
||||
0x80505bb,
|
||||
));
|
||||
|
||||
insns.push(TestUnit::new(
|
||||
Inst::AluRRR {
|
||||
alu_op: AluOPRRR::Adduw,
|
||||
rd: writable_a1(),
|
||||
rs1: a0(),
|
||||
rs2: a1(),
|
||||
},
|
||||
"add.uw a1,a0,a1",
|
||||
0x08b505bb,
|
||||
));
|
||||
|
||||
insns.push(TestUnit::new(
|
||||
Inst::AluRRR {
|
||||
alu_op: AluOPRRR::Andn,
|
||||
|
||||
@@ -1220,10 +1220,17 @@ impl Inst {
|
||||
rs1,
|
||||
rs2,
|
||||
} => {
|
||||
let rs1 = format_reg(rs1, allocs);
|
||||
let rs2 = format_reg(rs2, allocs);
|
||||
let rd = format_reg(rd.to_reg(), allocs);
|
||||
format!("{} {},{},{}", alu_op.op_name(), rd, rs1, rs2,)
|
||||
let rs1_s = format_reg(rs1, allocs);
|
||||
let rs2_s = format_reg(rs2, allocs);
|
||||
let rd_s = format_reg(rd.to_reg(), allocs);
|
||||
match alu_op {
|
||||
AluOPRRR::Adduw if rs2 == zero_reg() => {
|
||||
format!("zext.w {},{}", rd_s, rs1_s)
|
||||
}
|
||||
_ => {
|
||||
format!("{} {},{},{}", alu_op.op_name(), rd_s, rs1_s, rs2_s)
|
||||
}
|
||||
}
|
||||
}
|
||||
&Inst::FpuRR {
|
||||
frm,
|
||||
|
||||
@@ -26,11 +26,9 @@
|
||||
|
||||
|
||||
;;;; Rules for `iadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
(rule -1 (lower (has_type (fits_in_32 ty) (iadd x y)))
|
||||
(alu_rrr (AluOPRRR.Addw) x y))
|
||||
|
||||
;; Base case, simply adding things in registers.
|
||||
(rule -2 (lower (has_type (fits_in_64 ty) (iadd x y)))
|
||||
(rule 0 (lower (has_type (fits_in_64 ty) (iadd x y)))
|
||||
(alu_add x y))
|
||||
|
||||
;; Special cases for when one operand is an immediate that fits in 12 bits.
|
||||
@@ -40,17 +38,63 @@
|
||||
(rule 2 (lower (has_type (fits_in_64 ty) (iadd (imm12_from_value x) y)))
|
||||
(alu_rr_imm12 (select_addi ty) y x))
|
||||
|
||||
(rule
|
||||
(lower (has_type $I128 (iadd x y)))
|
||||
(let
|
||||
( ;; low part.
|
||||
(low Reg (alu_add (value_regs_get x 0) (value_regs_get y 0)))
|
||||
;; compute carry.
|
||||
(carry Reg (alu_rrr (AluOPRRR.SltU) low (value_regs_get y 0)))
|
||||
;;
|
||||
(high_tmp Reg (alu_add (value_regs_get x 1) (value_regs_get y 1)))
|
||||
;; add carry.
|
||||
(high Reg (alu_add high_tmp carry)))
|
||||
;; Special case when one of the operands is uextended
|
||||
;; Needs `Zba`
|
||||
(rule 3 (lower (has_type $I64 (iadd x (uextend y @ (value_type $I32)))))
|
||||
(if-let $true (has_zba))
|
||||
(alu_rrr (AluOPRRR.Adduw) y x))
|
||||
|
||||
(rule 4 (lower (has_type $I64 (iadd (uextend x @ (value_type $I32)) y)))
|
||||
(if-let $true (has_zba))
|
||||
(alu_rrr (AluOPRRR.Adduw) x y))
|
||||
|
||||
;; Add with const shift. We have a few of these instructions with `Zba`.
|
||||
(decl pure partial match_shnadd (Imm64) AluOPRRR)
|
||||
(rule (match_shnadd (u64_from_imm64 1)) (AluOPRRR.Sh1add))
|
||||
(rule (match_shnadd (u64_from_imm64 2)) (AluOPRRR.Sh2add))
|
||||
(rule (match_shnadd (u64_from_imm64 3)) (AluOPRRR.Sh3add))
|
||||
|
||||
(rule 3 (lower (has_type $I64 (iadd x (ishl y (maybe_uextend (iconst n))))))
|
||||
(if-let $true (has_zba))
|
||||
(if-let shnadd (match_shnadd n))
|
||||
(alu_rrr shnadd y x))
|
||||
|
||||
(rule 4 (lower (has_type $I64 (iadd (ishl x (maybe_uextend (iconst n))) y)))
|
||||
(if-let $true (has_zba))
|
||||
(if-let shnadd (match_shnadd n))
|
||||
(alu_rrr shnadd x y))
|
||||
|
||||
|
||||
;; Add with uextended const shift. We have a few of these instructions with `Zba`.
|
||||
;;
|
||||
;; !!! Important !!!
|
||||
;; These rules only work for (ishl (uextend _) _) and not for (uextend (ishl _ _))!
|
||||
;; Getting this wrong means a potential misscalculation of the shift amount.
|
||||
;; Additionaly we can only ensure that this is correct if the uextend is 32 to 64 bits.
|
||||
(decl pure partial match_shnadd_uw (Imm64) AluOPRRR)
|
||||
(rule (match_shnadd_uw (u64_from_imm64 1)) (AluOPRRR.Sh1adduw))
|
||||
(rule (match_shnadd_uw (u64_from_imm64 2)) (AluOPRRR.Sh2adduw))
|
||||
(rule (match_shnadd_uw (u64_from_imm64 3)) (AluOPRRR.Sh3adduw))
|
||||
|
||||
(rule 5 (lower (has_type $I64 (iadd x (ishl (uextend y @ (value_type $I32)) (maybe_uextend (iconst n))))))
|
||||
(if-let $true (has_zba))
|
||||
(if-let shnadd_uw (match_shnadd_uw n))
|
||||
(alu_rrr shnadd_uw y x))
|
||||
|
||||
(rule 6 (lower (has_type $I64 (iadd (ishl (uextend x @ (value_type $I32)) (maybe_uextend (iconst n))) y)))
|
||||
(if-let $true (has_zba))
|
||||
(if-let shnadd_uw (match_shnadd_uw n))
|
||||
(alu_rrr shnadd_uw x y))
|
||||
|
||||
;; I128 cases
|
||||
(rule 7 (lower (has_type $I128 (iadd x y)))
|
||||
(let ((low Reg (alu_add (value_regs_get x 0) (value_regs_get y 0)))
|
||||
;; compute carry.
|
||||
(carry Reg (alu_rrr (AluOPRRR.SltU) low (value_regs_get y 0)))
|
||||
;;
|
||||
(high_tmp Reg (alu_add (value_regs_get x 1) (value_regs_get y 1)))
|
||||
;; add carry.
|
||||
(high Reg (alu_add high_tmp carry)))
|
||||
(value_regs low high)))
|
||||
|
||||
;;; Rules for `uadd_overflow_trap` ;;;;;;;;;;;;;
|
||||
@@ -355,6 +399,38 @@
|
||||
(rule (lower (has_type out_ty (sextend val @ (value_type in_ty))))
|
||||
(sext val in_ty out_ty))
|
||||
|
||||
;; The instructions below are present in RV64I and sign-extend the result to 64 bits.
|
||||
|
||||
(rule 1 (lower (has_type $I64 (sextend (has_type $I32 (iadd x y)))))
|
||||
(alu_rrr (AluOPRRR.Addw) x y))
|
||||
|
||||
(rule 1 (lower (has_type $I64 (sextend (has_type $I32 (isub x y)))))
|
||||
(alu_rrr (AluOPRRR.Subw) x y))
|
||||
|
||||
(rule 1 (lower (has_type $I64 (sextend (has_type $I32 (ishl x y)))))
|
||||
(alu_rrr (AluOPRRR.Sllw) x (value_regs_get y 0)))
|
||||
|
||||
(rule 1 (lower (has_type $I64 (sextend (has_type $I32 (ushr x y)))))
|
||||
(alu_rrr (AluOPRRR.Srlw) x (value_regs_get y 0)))
|
||||
|
||||
(rule 1 (lower (has_type $I64 (sextend (has_type $I32 (sshr x y)))))
|
||||
(alu_rrr (AluOPRRR.Sraw) x (value_regs_get y 0)))
|
||||
|
||||
|
||||
(rule 2 (lower (has_type $I64 (sextend (has_type $I32 (iadd x (imm12_from_value y))))))
|
||||
(alu_rr_imm12 (AluOPRRI.Addiw) x y))
|
||||
|
||||
(rule 3 (lower (has_type $I64 (sextend (has_type $I32 (iadd (imm12_from_value x) y)))))
|
||||
(alu_rr_imm12 (AluOPRRI.Addiw) y x))
|
||||
|
||||
(rule 2 (lower (has_type $I64 (sextend (has_type $I32 (ishl x (imm12_from_value y))))))
|
||||
(alu_rr_imm12 (AluOPRRI.Slliw) x y))
|
||||
|
||||
(rule 2 (lower (has_type $I64 (sextend (has_type $I32 (ushr x (imm12_from_value y))))))
|
||||
(alu_rr_imm12 (AluOPRRI.SrliW) x y))
|
||||
|
||||
(rule 2 (lower (has_type $I64 (sextend (has_type $I32 (sshr x (imm12_from_value y))))))
|
||||
(alu_rr_imm12 (AluOPRRI.Sraiw) x y))
|
||||
|
||||
;;;; Rules for `popcnt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
(rule (lower (has_type (fits_in_64 ty) (popcnt x)))
|
||||
@@ -385,6 +461,12 @@
|
||||
(rule 1 (lower (has_type $I64 (ishl x y)))
|
||||
(alu_rrr (AluOPRRR.Sll) x (value_regs_get y 0)))
|
||||
|
||||
;; With `Zba` we have a shift that zero extends the LHS argument.
|
||||
(rule 3 (lower (has_type $I64 (ishl (uextend x @ (value_type $I32)) (maybe_uextend (imm12_from_value y)))))
|
||||
(if-let $true (has_zba))
|
||||
(alu_rr_imm12 (AluOPRRI.SlliUw) x y))
|
||||
|
||||
;; I128 cases
|
||||
(rule 0 (lower (has_type $I128 (ishl x y)))
|
||||
(lower_i128_ishl x y))
|
||||
|
||||
|
||||
Reference in New Issue
Block a user