From cb3b6c621f1238284729475aeb66b7284fc9b3f1 Mon Sep 17 00:00:00 2001 From: yuyang <96557710+yuyang-ok@users.noreply.github.com> Date: Wed, 1 Feb 2023 11:44:13 +0800 Subject: [PATCH] fix rotl.i16 with i128 shift value. (#5611) * fix issue 5523. * fix. * add missing issue file. * fix issue. * fix duplicate shamt_128. * issue 5523 add test target,and fix some wrong comment. * fix output file. * enable llvm_abi_extensions for regression test file. --- cranelift/codegen/src/isa/riscv64/inst.isle | 43 +++--- .../codegen/src/isa/riscv64/lower/isle.rs | 5 +- .../filetests/isa/riscv64/bitops.clif | 129 +++++++++--------- .../filetests/isa/riscv64/shift-rotate.clif | 44 +++--- .../filetests/runtests/issue5523.clif | 15 ++ 5 files changed, 133 insertions(+), 103 deletions(-) create mode 100644 cranelift/filetests/filetests/runtests/issue5523.clif diff --git a/cranelift/codegen/src/isa/riscv64/inst.isle b/cranelift/codegen/src/isa/riscv64/inst.isle index cf7ce9570d..9a7e5faf9e 100644 --- a/cranelift/codegen/src/isa/riscv64/inst.isle +++ b/cranelift/codegen/src/isa/riscv64/inst.isle @@ -1125,8 +1125,9 @@ (alu_rrr (AluOPRRR.Or) part1 part3))) -;;;; construct shift amount +;;;; construct shift amount.rotl on i128 will use shift to implement. So can call this function. ;;;; this will return shift amount and (ty_bits - "shift amount") +;;;; if ty_bits is greater than 64 like i128, then shmat will fallback to 64.because We are 64 bit platform. (decl gen_shamt (Type Reg) ValueRegs) (extern constructor gen_shamt gen_shamt) @@ -1243,12 +1244,13 @@ (high_part3 Reg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) high_part2)) (high Reg (alu_rrr (AluOPRRR.Or) high_part1 high_part3)) ;; - (const64 Reg (load_u64_constant 64))) + (const64 Reg (load_u64_constant 64)) + (shamt_128 Reg (alu_andi (value_regs_get y 0) 127))) ;; right now we only rotate less than 64 bits. - ;; if shamt is greater than 64 , we should switch low and high. + ;; if shamt is greater than or equal 64 , we should switch low and high. (value_regs - (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt const64 high low) - (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt const64 low high) + (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 high low) + (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 low high) ))) @@ -1272,12 +1274,13 @@ (high Reg (alu_rrr (AluOPRRR.Or) high_part1 high_part3)) ;; - (const64 Reg (load_u64_constant 64))) + (const64 Reg (load_u64_constant 64)) + (shamt_128 Reg (alu_andi (value_regs_get y 0) 127))) ;; right now we only rotate less than 64 bits. - ;; if shamt is greater than 64 , we should switch low and high. + ;; if shamt is greater than or equal 64 , we should switch low and high. (value_regs - (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt const64 high low) - (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt const64 low high) + (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 high low) + (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 low high) ))) @@ -1297,10 +1300,11 @@ (high_part3 Reg (alu_rrr (AluOPRRR.Sll) (value_regs_get x 1) shamt)) (high Reg (alu_rrr (AluOPRRR.Or) high_part2 high_part3 )) ;; - (const64 Reg (load_u64_constant 64))) + (const64 Reg (load_u64_constant 64)) + (shamt_128 Reg (alu_andi (value_regs_get y 0) 127))) (value_regs - (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt const64 (zero_reg) low) - (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt const64 low high)))) + (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 (zero_reg) low) + (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 low high)))) (decl lower_i128_ushr (ValueRegs ValueRegs) ValueRegs) (rule @@ -1320,10 +1324,11 @@ (const64 Reg (load_u64_constant 64)) ;; - (high Reg (alu_rrr (AluOPRRR.Srl) (value_regs_get x 1) shamt))) + (high Reg (alu_rrr (AluOPRRR.Srl) (value_regs_get x 1) shamt)) + (shamt_128 Reg (alu_andi (value_regs_get y 0) 127))) (value_regs - (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt const64 high low) - (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt const64 (zero_reg) high)))) + (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 high low) + (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 (zero_reg) high)))) (decl lower_i128_sshr (ValueRegs ValueRegs) ValueRegs) @@ -1347,10 +1352,12 @@ ;; (const_neg_1 Reg (load_imm12 -1)) ;; - (high_replacement Reg (gen_select_reg (IntCC.SignedLessThan) (value_regs_get x 1) (zero_reg) const_neg_1 (zero_reg)))) + (high_replacement Reg (gen_select_reg (IntCC.SignedLessThan) (value_regs_get x 1) (zero_reg) const_neg_1 (zero_reg))) + (const64 Reg (load_u64_constant 64)) + (shamt_128 Reg (alu_andi (value_regs_get y 0) 127))) (value_regs - (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt const64 high low) - (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt const64 high_replacement high)))) + (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 high low) + (gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 high_replacement high)))) (decl load_imm12 (i32) Reg) (rule diff --git a/cranelift/codegen/src/isa/riscv64/lower/isle.rs b/cranelift/codegen/src/isa/riscv64/lower/isle.rs index d0291e2191..343992cd85 100644 --- a/cranelift/codegen/src/isa/riscv64/lower/isle.rs +++ b/cranelift/codegen/src/isa/riscv64/lower/isle.rs @@ -253,19 +253,20 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Riscv64Backend> { // fn gen_shamt(&mut self, ty: Type, shamt: Reg) -> ValueRegs { + let ty_bits = if ty.bits() > 64 { 64 } else { ty.bits() }; let shamt = { let tmp = self.temp_writable_reg(I64); self.emit(&MInst::AluRRImm12 { alu_op: AluOPRRI::Andi, rd: tmp, rs: shamt, - imm12: Imm12::from_bits((ty.bits() - 1) as i16), + imm12: Imm12::from_bits((ty_bits - 1) as i16), }); tmp.to_reg() }; let len_sub_shamt = { let tmp = self.temp_writable_reg(I64); - self.emit(&MInst::load_imm12(tmp, Imm12::from_bits(ty.bits() as i16))); + self.emit(&MInst::load_imm12(tmp, Imm12::from_bits(ty_bits as i16))); let len_sub_shamt = self.temp_writable_reg(I64); self.emit(&MInst::AluRRR { alu_op: AluOPRRR::Sub, diff --git a/cranelift/filetests/filetests/isa/riscv64/bitops.clif b/cranelift/filetests/filetests/isa/riscv64/bitops.clif index c4d0575666..48697c3a0b 100644 --- a/cranelift/filetests/filetests/isa/riscv64/bitops.clif +++ b/cranelift/filetests/filetests/isa/riscv64/bitops.clif @@ -795,19 +795,18 @@ block0(v0: i128, v1: i8): } ; block0: -; mv t2,a1 -; andi a1,a2,127 -; li a3,128 -; sub a5,a3,a1 -; sll a7,a0,a1 -; srl t4,a0,a5 -; select_reg t1,zero,t4##condition=(a1 eq zero) -; mv a5,t2 -; sll a0,a5,a1 -; or a2,t1,a0 +; andi a3,a2,63 ; li a4,64 -; select_reg a0,zero,a7##condition=(a1 uge a4) -; select_reg a1,a7,a2##condition=(a1 uge a4) +; sub a5,a4,a3 +; sll a7,a0,a3 +; srl t4,a0,a5 +; select_reg t1,zero,t4##condition=(a3 eq zero) +; sll a0,a1,a3 +; or a3,t1,a0 +; li a4,64 +; andi a6,a2,127 +; select_reg a0,zero,a7##condition=(a6 uge a4) +; select_reg a1,a7,a3##condition=(a6 uge a4) ; ret function %ishl_i128_i128(i128, i128) -> i128 { @@ -817,17 +816,18 @@ block0(v0: i128, v1: i128): } ; block0: -; andi a2,a2,127 -; li a4,128 -; sub a6,a4,a2 -; sll t3,a0,a2 +; andi a3,a2,63 +; li a4,64 +; sub a6,a4,a3 +; sll t3,a0,a3 ; srl t0,a0,a6 -; select_reg t2,zero,t0##condition=(a2 eq zero) -; sll a1,a1,a2 +; select_reg t2,zero,t0##condition=(a3 eq zero) +; sll a1,a1,a3 ; or a3,t2,a1 ; li a5,64 -; select_reg a0,zero,t3##condition=(a2 uge a5) -; select_reg a1,t3,a3##condition=(a2 uge a5) +; andi a7,a2,127 +; select_reg a0,zero,t3##condition=(a7 uge a5) +; select_reg a1,t3,a3##condition=(a7 uge a5) ; ret function %ushr_i128_i8(i128, i8) -> i128 { @@ -837,19 +837,18 @@ block0(v0: i128, v1: i8): } ; block0: -; mv t2,a1 -; andi a1,a2,127 -; li a3,128 -; sub a5,a3,a1 -; mv a2,t2 -; sll a7,a2,a5 -; select_reg t4,zero,a7##condition=(a1 eq zero) -; srl t1,a0,a1 +; andi a3,a2,63 +; li a4,64 +; sub a5,a4,a3 +; sll a7,a1,a5 +; select_reg t4,zero,a7##condition=(a3 eq zero) +; srl t1,a0,a3 ; or a0,t4,t1 -; li a3,64 -; srl a4,a2,a1 -; select_reg a0,a4,a0##condition=(a1 uge a3) -; select_reg a1,zero,a4##condition=(a1 uge a3) +; li a4,64 +; srl a5,a1,a3 +; andi a6,a2,127 +; select_reg a0,a5,a0##condition=(a6 uge a4) +; select_reg a1,zero,a5##condition=(a6 uge a4) ; ret function %ushr_i128_i128(i128, i128) -> i128 { @@ -859,17 +858,18 @@ block0(v0: i128, v1: i128): } ; block0: -; andi a2,a2,127 -; li a4,128 -; sub a6,a4,a2 +; andi a3,a2,63 +; li a4,64 +; sub a6,a4,a3 ; sll t3,a1,a6 -; select_reg t0,zero,t3##condition=(a2 eq zero) -; srl t2,a0,a2 -; or a4,t0,t2 -; li a3,64 -; srl a5,a1,a2 -; select_reg a0,a5,a4##condition=(a2 uge a3) -; select_reg a1,zero,a5##condition=(a2 uge a3) +; select_reg t0,zero,t3##condition=(a3 eq zero) +; srl t2,a0,a3 +; or a5,t0,t2 +; li a4,64 +; srl a6,a1,a3 +; andi a7,a2,127 +; select_reg a0,a6,a5##condition=(a7 uge a4) +; select_reg a1,zero,a6##condition=(a7 uge a4) ; ret function %sshr_i128_i8(i128, i8) -> i128 { @@ -879,20 +879,21 @@ block0(v0: i128, v1: i8): } ; block0: -; mv a4,a1 -; andi a1,a2,127 -; li a3,128 -; sub a5,a3,a1 -; sll a7,a4,a5 -; select_reg t4,zero,a7##condition=(a1 eq zero) -; srl t1,a0,a1 +; andi a3,a2,63 +; li a4,64 +; sub a5,a4,a3 +; sll a7,a1,a5 +; select_reg t4,zero,a7##condition=(a3 eq zero) +; srl t1,a0,a3 ; or a0,t4,t1 -; li a2,64 -; sra a5,a4,a1 +; li a4,64 +; sra a4,a1,a3 ; li a6,-1 -; select_reg t3,a6,zero##condition=(a4 slt zero) -; select_reg a0,a5,a0##condition=(a1 uge a2) -; select_reg a1,t3,a5##condition=(a1 uge a2) +; select_reg t3,a6,zero##condition=(a1 slt zero) +; li t0,64 +; andi t2,a2,127 +; select_reg a0,a4,a0##condition=(t2 uge t0) +; select_reg a1,t3,a4##condition=(t2 uge t0) ; ret function %sshr_i128_i128(i128, i128) -> i128 { @@ -902,18 +903,20 @@ block0(v0: i128, v1: i128): } ; block0: -; andi a2,a2,127 -; li a4,128 -; sub a6,a4,a2 +; andi a3,a2,63 +; li a4,64 +; sub a6,a4,a3 ; sll t3,a1,a6 -; select_reg t0,zero,t3##condition=(a2 eq zero) -; srl t2,a0,a2 +; select_reg t0,zero,t3##condition=(a3 eq zero) +; srl t2,a0,a3 ; or a4,t0,t2 -; li a3,64 -; sra a5,a1,a2 +; li a5,64 +; sra a5,a1,a3 ; li a7,-1 ; select_reg t4,a7,zero##condition=(a1 slt zero) -; select_reg a0,a5,a4##condition=(a2 uge a3) -; select_reg a1,t4,a5##condition=(a2 uge a3) +; li t1,64 +; andi a1,a2,127 +; select_reg a0,a5,a4##condition=(a1 uge t1) +; select_reg a1,t4,a5##condition=(a1 uge t1) ; ret diff --git a/cranelift/filetests/filetests/isa/riscv64/shift-rotate.clif b/cranelift/filetests/filetests/isa/riscv64/shift-rotate.clif index e1e4839999..d54feeea36 100644 --- a/cranelift/filetests/filetests/isa/riscv64/shift-rotate.clif +++ b/cranelift/filetests/filetests/isa/riscv64/shift-rotate.clif @@ -13,20 +13,22 @@ block0(v0: i128, v1: i128): } ; block0: -; andi a2,a2,127 -; li a4,128 -; sub a6,a4,a2 -; srl t3,a0,a2 +; andi a3,a2,63 +; li a4,64 +; sub a6,a4,a3 +; srl t3,a0,a3 ; sll t0,a1,a6 -; select_reg t2,zero,t0##condition=(a2 eq zero) -; or a3,t3,t2 -; srl a4,a1,a2 +; mv t1,a1 +; select_reg t2,zero,t0##condition=(a3 eq zero) +; or a1,t3,t2 +; srl a4,t1,a3 ; sll a5,a0,a6 -; select_reg a7,zero,a5##condition=(a2 eq zero) +; select_reg a7,zero,a5##condition=(a3 eq zero) ; or t4,a4,a7 ; li t1,64 -; select_reg a0,t4,a3##condition=(a2 uge t1) -; select_reg a1,a3,t4##condition=(a2 uge t1) +; andi a2,a2,127 +; select_reg a0,t4,a1##condition=(a2 uge t1) +; select_reg a1,a1,t4##condition=(a2 uge t1) ; ret function %f0(i64, i64) -> i64 { @@ -105,20 +107,22 @@ block0(v0: i128, v1: i128): } ; block0: -; andi a2,a2,127 -; li a4,128 -; sub a6,a4,a2 -; sll t3,a0,a2 +; andi a3,a2,63 +; li a4,64 +; sub a6,a4,a3 +; sll t3,a0,a3 ; srl t0,a1,a6 -; select_reg t2,zero,t0##condition=(a2 eq zero) -; or a3,t3,t2 -; sll a4,a1,a2 +; mv t1,a1 +; select_reg t2,zero,t0##condition=(a3 eq zero) +; or a1,t3,t2 +; sll a4,t1,a3 ; srl a5,a0,a6 -; select_reg a7,zero,a5##condition=(a2 eq zero) +; select_reg a7,zero,a5##condition=(a3 eq zero) ; or t4,a4,a7 ; li t1,64 -; select_reg a0,t4,a3##condition=(a2 uge t1) -; select_reg a1,a3,t4##condition=(a2 uge t1) +; andi a2,a2,127 +; select_reg a0,t4,a1##condition=(a2 uge t1) +; select_reg a1,a1,t4##condition=(a2 uge t1) ; ret function %f4(i64, i64) -> i64 { diff --git a/cranelift/filetests/filetests/runtests/issue5523.clif b/cranelift/filetests/filetests/runtests/issue5523.clif new file mode 100644 index 0000000000..e0f26f0380 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/issue5523.clif @@ -0,0 +1,15 @@ +test interpret +test run +set enable_llvm_abi_extensions=true +target riscv64 +target aarch64 +target s390x +target x86_64 + +function %a(i16, i128) -> i128 system_v { +block0(v0: i16, v1: i128): + v2 = rotl v1, v0 + return v2 +} + +; run: %a(64, 1095219937288) == 20203241887575960770402119057408 \ No newline at end of file