diff --git a/cranelift/codegen/src/isa/aarch64/inst.isle b/cranelift/codegen/src/isa/aarch64/inst.isle index f4248d5f94..46e8e842d4 100644 --- a/cranelift/codegen/src/isa/aarch64/inst.isle +++ b/cranelift/codegen/src/isa/aarch64/inst.isle @@ -2235,7 +2235,14 @@ (let ((dst WritableReg (temp_writable_reg $I64))) (ConsumesFlags.ConsumesFlagsReturnsReg (MInst.CSet dst cond) dst))) -;; Helper for constructing `csetm` instructions. +;; Helper for constructing `cset` instructions, when the flags producer will +;; also return a value. +(decl cset_paired (Cond) ConsumesFlags) +(rule (cset_paired cond) + (let ((dst WritableReg (temp_writable_reg $I64))) + (ConsumesFlags.ConsumesFlagsReturnsResultWithProducer (MInst.CSet dst cond) dst))) + +;; Helper for constructing `csetm` instructions. (decl csetm (Cond) ConsumesFlags) (rule (csetm cond) (let ((dst WritableReg (temp_writable_reg $I64))) @@ -2280,6 +2287,9 @@ (decl add_extend (Type Reg ExtendedValue) Reg) (rule (add_extend ty x y) (alu_rr_extend_reg (ALUOp.Add) ty x y)) +(decl add_extend_op (Type Reg Reg ExtendOp) Reg) +(rule (add_extend_op ty x y extend) (alu_rrr_extend (ALUOp.Add) ty x y extend)) + (decl add_shift (Type Reg Reg ShiftOpAndAmt) Reg) (rule (add_shift ty x y z) (alu_rrr_shift (ALUOp.Add) ty x y z)) @@ -3442,11 +3452,11 @@ (vec_cmp rn rm in_ty cond))) ;; Determines the appropriate extend op given the value type and whether it is signed. -(decl lower_icmp_extend (Type bool) ExtendOp) -(rule (lower_icmp_extend $I8 $true) (ExtendOp.SXTB)) -(rule (lower_icmp_extend $I16 $true) (ExtendOp.SXTH)) -(rule (lower_icmp_extend $I8 $false) (ExtendOp.UXTB)) -(rule (lower_icmp_extend $I16 $false) (ExtendOp.UXTH)) +(decl lower_extend_op (Type bool) ExtendOp) +(rule (lower_extend_op $I8 $true) (ExtendOp.SXTB)) +(rule (lower_extend_op $I16 $true) (ExtendOp.SXTH)) +(rule (lower_extend_op $I8 $false) (ExtendOp.UXTB)) +(rule (lower_extend_op $I16 $false) (ExtendOp.UXTH)) ;; Integers <= 64-bits. (rule -2 (lower_icmp_into_reg cond rn rm in_ty out_ty) @@ -3457,13 +3467,13 @@ (rule 1 (lower_icmp cond rn rm (fits_in_16 ty)) (if (signed_cond_code cond)) (let ((rn Reg (put_in_reg_sext32 rn))) - (flags_and_cc (cmp_extend (operand_size ty) rn rm (lower_icmp_extend ty $true)) cond))) + (flags_and_cc (cmp_extend (operand_size ty) rn rm (lower_extend_op ty $true)) cond))) (rule -1 (lower_icmp cond rn (imm12_from_value rm) (fits_in_16 ty)) (let ((rn Reg (put_in_reg_zext32 rn))) (flags_and_cc (cmp_imm (operand_size ty) rn rm) cond))) (rule -2 (lower_icmp cond rn rm (fits_in_16 ty)) (let ((rn Reg (put_in_reg_zext32 rn))) - (flags_and_cc (cmp_extend (operand_size ty) rn rm (lower_icmp_extend ty $false)) cond))) + (flags_and_cc (cmp_extend (operand_size ty) rn rm (lower_extend_op ty $false)) cond))) (rule -3 (lower_icmp cond rn (u64_from_iconst c) ty) (if (ty_int_ref_scalar_64 ty)) (lower_icmp_const cond rn c ty)) diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle index 24fabee6a2..45d7c38651 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.isle +++ b/cranelift/codegen/src/isa/aarch64/lower.isle @@ -2366,6 +2366,43 @@ (add_with_flags ty a b) (invalid_reg))) +;;; Rules for `iadd_cout` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; For values smaller than a register, we do a normal `add` with both arguments +;; sign extended. We then check if the output sign bit has flipped. +(rule 0 (lower (has_type (fits_in_16 ty) (iadd_cout a b))) + (let ((extend ExtendOp (lower_extend_op ty $true)) + + ;; Instead of emitting two `sxt{b,h}` we do one as an instruction and + ;; the other as an extend operation in the `add` instruction. + ;; + ;; sxtb a_sext, a + ;; add out, a_sext, b, sxtb + ;; cmp out, out, sxtb + ;; cset out_carry, ne + (a_sext Reg (put_in_reg_sext32 a)) + (out Reg (add_extend_op ty a_sext b extend)) + (out_carry Reg (with_flags_reg + (cmp_extend (OperandSize.Size32) out out extend) + (cset (Cond.Ne))))) + (output_pair + (value_reg out) + (value_reg out_carry)))) + + +;; For register sized add's we just emit a adds+cset, without further masking. +;; +;; adds out, a, b +;; cset carry, vs +(rule 1 (lower (has_type (ty_32_or_64 ty) (iadd_cout a b))) + (let ((out ValueRegs + (with_flags + (add_with_flags_paired ty a b) + (cset_paired (Cond.Vs))))) + (output_pair + (value_regs_get out 0) + (value_regs_get out 1)))) + ;;; Rules for `uadd_overflow_trap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type (fits_in_64 ty) (uadd_overflow_trap a b tc))) diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 433db8407e..f99f946418 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -257,6 +257,8 @@ pub(crate) fn lower_insn_to_regs( Opcode::UaddOverflowTrap => implemented_in_isle(ctx), + Opcode::IaddCout => implemented_in_isle(ctx), + Opcode::IaddImm | Opcode::ImulImm | Opcode::UdivImm @@ -266,7 +268,6 @@ pub(crate) fn lower_insn_to_regs( | Opcode::IrsubImm | Opcode::IaddCin | Opcode::IaddIfcin - | Opcode::IaddCout | Opcode::IaddCarry | Opcode::IaddIfcarry | Opcode::IsubBin diff --git a/cranelift/filetests/filetests/runtests/i128-iaddcout.clif b/cranelift/filetests/filetests/runtests/i128-iaddcout.clif new file mode 100644 index 0000000000..ad0c885708 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/i128-iaddcout.clif @@ -0,0 +1,29 @@ +test interpret +; test run +; set enable_llvm_abi_extensions=true +; target aarch64 +; target s390x +; target x86_64 +; target riscv64 + +function %iaddcout_i128_v(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2, v3 = iadd_cout v0, v1 + return v2 +} +; run: %iaddcout_i128_v(0, 1) == 1 +; run: %iaddcout_i128_v(100, 27) == 127 +; run: %iaddcout_i128_v(100, 28) == 128 +; run: %iaddcout_i128_v(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFF0000, 0xFFFF) == 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF +; run: %iaddcout_i128_v(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFF0000, 0x10000) == 0x80000000_00000000_00000000_00000000 + +function %iaddcout_i128_c(i128, i128) -> i8 { +block0(v0: i128, v1: i128): + v2, v3 = iadd_cout v0, v1 + return v3 +} +; run: %iaddcout_i128_c(0, 1) == 0 +; run: %iaddcout_i128_c(100, 27) == 0 +; run: %iaddcout_i128_c(100, 28) == 0 +; run: %iaddcout_i128_c(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFF0000, 0xFFFF) == 0 +; run: %iaddcout_i128_c(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFF0000, 0x10000) == 1 diff --git a/cranelift/filetests/filetests/runtests/i128-isubbout.clif b/cranelift/filetests/filetests/runtests/i128-isubbout.clif new file mode 100644 index 0000000000..11f99f6431 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/i128-isubbout.clif @@ -0,0 +1,30 @@ +test interpret +; test run +; set enable_llvm_abi_extensions=true +; target aarch64 +; target s390x +; target x86_64 +; target riscv64 + + +function %isubbout_i128_v(i128, i128) -> i128 { +block0(v0: i128, v1: i128): + v2, v3 = isub_bout v0, v1 + return v2 +} +; run: %isubbout_i128_v(0, 1) == -1 +; run: %isubbout_i128_v(100, 20) == 80 +; run: %isubbout_i128_v(100, -28) == 128 +; run: %isubbout_i128_v(-2147483640, 8) == -2147483648 +; run: %isubbout_i128_v(-2147483640, 9) == -2147483649 + +function %isubbout_i128_c(i128, i128) -> i8 { +block0(v0: i128, v1: i128): + v2, v3 = isub_bout v0, v1 + return v3 +} +; run: %isubbout_i128_c(0, 1) == 1 +; run: %isubbout_i128_c(100, 20) == 0 +; run: %isubbout_i128_c(100, -28) == 0 +; run: %isubbout_i128_c(-2147483640, 8) == 1 +; run: %isubbout_i128_c(-2147483640, 9) == 1 diff --git a/cranelift/filetests/filetests/runtests/iaddcout.clif b/cranelift/filetests/filetests/runtests/iaddcout.clif index e03ae5a89a..26a91243fd 100644 --- a/cranelift/filetests/filetests/runtests/iaddcout.clif +++ b/cranelift/filetests/filetests/runtests/iaddcout.clif @@ -1,4 +1,9 @@ test interpret +test run +target aarch64 +; target s390x +; target x86_64 +; target riscv64 function %iaddcout_i8_v(i8, i8) -> i8 { block0(v0: i8, v1: i8): @@ -74,8 +79,8 @@ block0(v0: i64, v1: i64): ; run: %iaddcout_i64_v(0, 1) == 1 ; run: %iaddcout_i64_v(100, 27) == 127 ; run: %iaddcout_i64_v(100, 28) == 128 -; run: %iaddcout_i64_v(2000000000, 147483647) == 2147483647 -; run: %iaddcout_i64_v(2000000000, 147483648) == 2147483648 +; run: %iaddcout_i64_v(0x7FFFFFFF_FFFF0000, 0xFFFF) == 0x7FFFFFFF_FFFFFFFF +; run: %iaddcout_i64_v(0x7FFFFFFF_FFFF0000, 0x10000) == 0x80000000_00000000 function %iaddcout_i64_c(i64, i64) -> i8 { block0(v0: i64, v1: i64): @@ -85,5 +90,5 @@ block0(v0: i64, v1: i64): ; run: %iaddcout_i64_c(0, 1) == 0 ; run: %iaddcout_i64_c(100, 27) == 0 ; run: %iaddcout_i64_c(100, 28) == 0 -; run: %iaddcout_i64_c(2000000000, 147483647) == 0 -; run: %iaddcout_i64_c(2000000000, 147483648) == 0 +; run: %iaddcout_i64_c(0x7FFFFFFF_FFFF0000, 0xFFFF) == 0 +; run: %iaddcout_i64_c(0x7FFFFFFF_FFFF0000, 0x10000) == 1 diff --git a/cranelift/filetests/filetests/runtests/isubbout.clif b/cranelift/filetests/filetests/runtests/isubbout.clif index 9c43770d38..bbbf725560 100644 --- a/cranelift/filetests/filetests/runtests/isubbout.clif +++ b/cranelift/filetests/filetests/runtests/isubbout.clif @@ -1,4 +1,9 @@ test interpret +; test run +; target aarch64 +; target s390x +; target x86_64 +; target riscv64 function %isubbout_i8_v(i8, i8) -> i8 { block0(v0: i8, v1: i8): diff --git a/cranelift/fuzzgen/src/function_generator.rs b/cranelift/fuzzgen/src/function_generator.rs index b1af0ca062..4872b4e8b1 100644 --- a/cranelift/fuzzgen/src/function_generator.rs +++ b/cranelift/fuzzgen/src/function_generator.rs @@ -273,6 +273,18 @@ const OPCODE_SIGNATURES: &'static [( (Opcode::Iadd, &[I32, I32], &[I32], insert_opcode), (Opcode::Iadd, &[I64, I64], &[I64], insert_opcode), (Opcode::Iadd, &[I128, I128], &[I128], insert_opcode), + // IaddCout + // IaddCout not implemented in x64 + #[cfg(not(target_arch = "x86_64"))] + (Opcode::IaddCout, &[I8, I8], &[I8, I8], insert_opcode), + #[cfg(not(target_arch = "x86_64"))] + (Opcode::IaddCout, &[I16, I16], &[I16, I8], insert_opcode), + #[cfg(not(target_arch = "x86_64"))] + (Opcode::IaddCout, &[I32, I32], &[I32, I8], insert_opcode), + #[cfg(not(target_arch = "x86_64"))] + (Opcode::IaddCout, &[I64, I64], &[I64, I8], insert_opcode), + #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] + (Opcode::IaddCout, &[I128, I128], &[I128, I8], insert_opcode), // Isub (Opcode::Isub, &[I8, I8], &[I8], insert_opcode), (Opcode::Isub, &[I16, I16], &[I16], insert_opcode),