Add {u,s}{add,sub,mul}_overflow instructions (#5784)

* add `{u,s}{add,sub,mul}_overflow` with interpreter

* add `{u,s}{add,sub,mul}_overflow` for x64

* add `{u,s}{add,sub,mul}_overflow` for aarch64

* 128bit filetests for `{u,s}{add,sub,mul}_overflow`

* `{u,s}{add,sub,mul}_overflow` emit tests for x64

* `{u,s}{add,sub,mul}_overflow` emit tests for aarch64

* Initial review changes

* add `with_flags_extended` helper

* add `with_flags_chained` helper
This commit is contained in:
T0b1-iOS
2023-04-11 22:16:04 +02:00
committed by GitHub
parent 4c32dd7786
commit 569089e473
27 changed files with 2195 additions and 99 deletions

View File

@@ -2057,6 +2057,125 @@ pub(crate) fn define(
]),
);
{
let of_out = Operand::new("of", i8).with_doc("Overflow flag");
ig.push(
Inst::new(
"uadd_overflow",
r#"
Add integers unsigned with overflow out.
``of`` is set when the addition overflowed.
```text
a &= x + y \pmod 2^B \\
of &= x+y >= 2^B
```
Polymorphic over all scalar integer types, but does not support vector
types.
"#,
&formats.binary,
)
.operands_in(vec![Operand::new("x", iB), Operand::new("y", iB)])
.operands_out(vec![Operand::new("a", iB), of_out.clone()]),
);
ig.push(
Inst::new(
"sadd_overflow",
r#"
Add integers signed with overflow out.
``of`` is set when the addition over- or underflowed.
Polymorphic over all scalar integer types, but does not support vector
types.
"#,
&formats.binary,
)
.operands_in(vec![Operand::new("x", iB), Operand::new("y", iB)])
.operands_out(vec![Operand::new("a", iB), of_out.clone()]),
);
ig.push(
Inst::new(
"usub_overflow",
r#"
Subtract integers unsigned with overflow out.
``of`` is set when the subtraction underflowed.
```text
a &= x - y \pmod 2^B \\
of &= x - y < 0
```
Polymorphic over all scalar integer types, but does not support vector
types.
"#,
&formats.binary,
)
.operands_in(vec![Operand::new("x", iB), Operand::new("y", iB)])
.operands_out(vec![Operand::new("a", iB), of_out.clone()]),
);
ig.push(
Inst::new(
"ssub_overflow",
r#"
Subtract integers signed with overflow out.
``of`` is set when the subtraction over- or underflowed.
Polymorphic over all scalar integer types, but does not support vector
types.
"#,
&formats.binary,
)
.operands_in(vec![Operand::new("x", iB), Operand::new("y", iB)])
.operands_out(vec![Operand::new("a", iB), of_out.clone()]),
);
{
let NarrowScalar = &TypeVar::new(
"NarrowScalar",
"A scalar integer type up to 64 bits",
TypeSetBuilder::new().ints(8..64).build(),
);
ig.push(
Inst::new(
"umul_overflow",
r#"
Multiply integers unsigned with overflow out.
``of`` is set when the multiplication overflowed.
```text
a &= x * y \pmod 2^B \\
of &= x * y > 2^B
```
Polymorphic over all scalar integer types except i128, but does not support vector
types.
"#,
&formats.binary,
)
.operands_in(vec![
Operand::new("x", NarrowScalar),
Operand::new("y", NarrowScalar),
])
.operands_out(vec![Operand::new("a", NarrowScalar), of_out.clone()]),
);
ig.push(
Inst::new(
"smul_overflow",
r#"
Multiply integers signed with overflow out.
``of`` is set when the multiplication over- or underflowed.
Polymorphic over all scalar integer types except i128, but does not support vector
types.
"#,
&formats.binary,
)
.operands_in(vec![
Operand::new("x", NarrowScalar),
Operand::new("y", NarrowScalar),
])
.operands_out(vec![Operand::new("a", NarrowScalar), of_out.clone()]),
);
}
}
let i32_64 = &TypeVar::new(
"i32_64",
"A 32 or 64-bit scalar integer type",

View File

@@ -258,6 +258,18 @@ impl DataValue {
(DataValue::F32(a), DataValue::F32(b)) => a.bits() == b.bits(),
(DataValue::F64(a), DataValue::F64(b)) => a.bits() == b.bits(),
// when testing for bitwise equality, the sign information does not matter
(DataValue::I8(a), DataValue::U8(b)) => *a as u8 == *b,
(DataValue::U8(a), DataValue::I8(b)) => *a == *b as u8,
(DataValue::I16(a), DataValue::U16(b)) => *a as u16 == *b,
(DataValue::U16(a), DataValue::I16(b)) => *a == *b as u16,
(DataValue::I32(a), DataValue::U32(b)) => *a as u32 == *b,
(DataValue::U32(a), DataValue::I32(b)) => *a == *b as u32,
(DataValue::I64(a), DataValue::U64(b)) => *a as u64 == *b,
(DataValue::U64(a), DataValue::I64(b)) => *a == *b as u64,
(DataValue::I128(a), DataValue::U128(b)) => *a as u128 == *b,
(DataValue::U128(a), DataValue::I128(b)) => *a == *b as u128,
// We don't need to worry about F32x4 / F64x2 Since we compare V128 which is already the
// raw bytes anyway
(a, b) => a == b,

View File

@@ -1004,6 +1004,10 @@
(MAdd)
;; Multiply-sub
(MSub)
;; Unsigned-Multiply-add
(UMAddL)
;; Signed-Multiply-add
(SMAddL)
))
(type MoveWideOp
@@ -1727,6 +1731,9 @@
(decl pure partial lshl_from_u64 (Type u64) ShiftOpAndAmt)
(extern constructor lshl_from_u64 lshl_from_u64)
(decl pure partial ashr_from_u64 (Type u64) ShiftOpAndAmt)
(extern constructor ashr_from_u64 ashr_from_u64)
(decl integral_ty (Type) Type)
(extern extractor integral_ty integral_ty)
@@ -1966,6 +1973,15 @@
(MInst.AluRRRShift (ALUOp.SubS) size (writable_zero_reg)
src1 src2 shift)))
;; Helper for emitting `cmp` instructions, setting flags, with an arithmetic right-shifted
;; second operand register.
(decl cmp_rr_shift_asr (OperandSize Reg Reg u64) ProducesFlags)
(rule (cmp_rr_shift_asr size src1 src2 shift_amount)
(if-let shift (ashr_from_u64 $I64 shift_amount))
(ProducesFlags.ProducesFlagsSideEffect
(MInst.AluRRRShift (ALUOp.SubS) size (writable_zero_reg)
src1 src2 shift)))
;; Helper for emitting `MInst.AluRRRExtend` instructions.
(decl alu_rrr_extend (ALUOp Type Reg Reg ExtendOp) Reg)
(rule (alu_rrr_extend op ty src1 src2 extend)
@@ -1988,6 +2004,22 @@
(_ Unit (emit (MInst.AluRRRR op (operand_size ty) dst src1 src2 src3))))
dst))
;; Helper for emitting paired `MInst.AluRRR` instructions
(decl alu_rrr_with_flags_paired (Type Reg Reg ALUOp) ProducesFlags)
(rule (alu_rrr_with_flags_paired ty src1 src2 alu_op)
(let ((dst WritableReg (temp_writable_reg $I64)))
(ProducesFlags.ProducesFlagsReturnsResultWithConsumer
(MInst.AluRRR alu_op (operand_size ty) dst src1 src2)
dst)))
;; Should only be used for AdcS and SbcS
(decl alu_rrr_with_flags_chained (Type Reg Reg ALUOp) ConsumesAndProducesFlags)
(rule (alu_rrr_with_flags_chained ty src1 src2 alu_op)
(let ((dst WritableReg (temp_writable_reg $I64)))
(ConsumesAndProducesFlags.ReturnsReg
(MInst.AluRRR alu_op (operand_size ty) dst src1 src2)
dst)))
;; Helper for emitting `MInst.BitRR` instructions.
(decl bit_rr (BitOp Type Reg) Reg)
(rule (bit_rr op ty src)
@@ -2335,7 +2367,7 @@
;; immediately by the `MInst.CCmp` instruction.
(decl ccmp (OperandSize Reg Reg NZCV Cond ProducesFlags) ProducesFlags)
(rule (ccmp size rn rm nzcv cond inst_input)
(produces_flags_append inst_input (MInst.CCmp size rn rm nzcv cond)))
(produces_flags_concat inst_input (ProducesFlags.ProducesFlagsSideEffect (MInst.CCmp size rn rm nzcv cond))))
;; Helper for generating `MInst.CCmpImm` instructions.
(decl ccmp_imm (OperandSize Reg UImm5 NZCV Cond) ConsumesFlags)
@@ -2411,6 +2443,14 @@
(decl msub (Type Reg Reg Reg) Reg)
(rule (msub ty x y z) (alu_rrrr (ALUOp3.MSub) ty x y z))
;; Helpers for generating `umaddl` instructions
(decl umaddl (Reg Reg Reg) Reg)
(rule (umaddl x y z) (alu_rrrr (ALUOp3.UMAddL) $I32 x y z))
;; Helpers for generating `smaddl` instructions
(decl smaddl (Reg Reg Reg) Reg)
(rule (smaddl x y z) (alu_rrrr (ALUOp3.SMAddL) $I32 x y z))
;; Helper for generating `uqadd` instructions.
(decl uqadd (Reg Reg VectorSize) Reg)
(rule (uqadd x y size) (vec_rrr (VecALUOp.Uqadd) x y size))
@@ -2620,6 +2660,9 @@
(decl orr_imm (Type Reg ImmLogic) Reg)
(rule (orr_imm ty x y) (alu_rr_imm_logic (ALUOp.Orr) ty x y))
(decl orr_shift (Type Reg Reg ShiftOpAndAmt) Reg)
(rule (orr_shift ty x y shift) (alu_rrr_shift (ALUOp.Orr) ty x y shift))
(decl orr_vec (Reg Reg VectorSize) Reg)
(rule (orr_vec x y size) (vec_rrr (VecALUOp.Orr) x y size))
@@ -3659,12 +3702,12 @@
(rm Reg (put_in_reg y)))
(vec_cmp rn rm in_ty cond)))
;; Determines the appropriate extend op given the value type and whether it is signed.
(decl lower_extend_op (Type bool) ExtendOp)
(rule (lower_extend_op $I8 $true) (ExtendOp.SXTB))
(rule (lower_extend_op $I16 $true) (ExtendOp.SXTH))
(rule (lower_extend_op $I8 $false) (ExtendOp.UXTB))
(rule (lower_extend_op $I16 $false) (ExtendOp.UXTH))
;; Determines the appropriate extend op given the value type and the given ArgumentExtension.
(decl lower_extend_op (Type ArgumentExtension) ExtendOp)
(rule (lower_extend_op $I8 (ArgumentExtension.Sext)) (ExtendOp.SXTB))
(rule (lower_extend_op $I16 (ArgumentExtension.Sext)) (ExtendOp.SXTH))
(rule (lower_extend_op $I8 (ArgumentExtension.Uext)) (ExtendOp.UXTB))
(rule (lower_extend_op $I16 (ArgumentExtension.Uext)) (ExtendOp.UXTH))
;; Integers <= 64-bits.
(rule -2 (lower_icmp_into_reg cond rn rm in_ty out_ty)
@@ -3675,13 +3718,13 @@
(rule 1 (lower_icmp cond rn rm (fits_in_16 ty))
(if (signed_cond_code cond))
(let ((rn Reg (put_in_reg_sext32 rn)))
(flags_and_cc (cmp_extend (operand_size ty) rn rm (lower_extend_op ty $true)) cond)))
(flags_and_cc (cmp_extend (operand_size ty) rn rm (lower_extend_op ty (ArgumentExtension.Sext))) cond)))
(rule -1 (lower_icmp cond rn (imm12_from_value rm) (fits_in_16 ty))
(let ((rn Reg (put_in_reg_zext32 rn)))
(flags_and_cc (cmp_imm (operand_size ty) rn rm) cond)))
(rule -2 (lower_icmp cond rn rm (fits_in_16 ty))
(let ((rn Reg (put_in_reg_zext32 rn)))
(flags_and_cc (cmp_extend (operand_size ty) rn rm (lower_extend_op ty $false)) cond)))
(flags_and_cc (cmp_extend (operand_size ty) rn rm (lower_extend_op ty (ArgumentExtension.Uext))) cond)))
(rule -3 (lower_icmp cond rn (u64_from_iconst c) ty)
(if (ty_int_ref_scalar_64 ty))
(lower_icmp_const cond rn c ty))

View File

@@ -789,6 +789,14 @@ impl MachInstEmit for Inst {
let (top11, bit15) = match alu_op {
ALUOp3::MAdd => (0b0_00_11011_000, 0),
ALUOp3::MSub => (0b0_00_11011_000, 1),
ALUOp3::UMAddL => {
debug_assert!(size == OperandSize::Size32);
(0b1_00_11011_1_01, 0)
}
ALUOp3::SMAddL => {
debug_assert!(size == OperandSize::Size32);
(0b1_00_11011_0_01, 0)
}
};
let top11 = top11 | size.sf_bit() << 10;
sink.put4(enc_arith_rrrr(top11, rm, bit15, ra, rn, rd));

View File

@@ -1086,6 +1086,30 @@ fn test_aarch64_binemit() {
"4190039B",
"msub x1, x2, x3, x4",
));
insns.push((
Inst::AluRRRR {
alu_op: ALUOp3::UMAddL,
size: OperandSize::Size32,
rd: writable_xreg(1),
rn: xreg(2),
rm: xreg(3),
ra: xreg(4),
},
"4110A39B",
"umaddl x1, w2, w3, x4",
));
insns.push((
Inst::AluRRRR {
alu_op: ALUOp3::SMAddL,
size: OperandSize::Size32,
rd: writable_xreg(1),
rn: xreg(2),
rm: xreg(3),
ra: xreg(4),
},
"4110239B",
"smaddl x1, w2, w3, x4",
));
insns.push((
Inst::AluRRR {
alu_op: ALUOp::SMulH,

View File

@@ -1191,14 +1191,16 @@ impl Inst {
rm,
ra,
} => {
let op = match alu_op {
ALUOp3::MAdd => "madd",
ALUOp3::MSub => "msub",
let (op, da_size) = match alu_op {
ALUOp3::MAdd => ("madd", size),
ALUOp3::MSub => ("msub", size),
ALUOp3::UMAddL => ("umaddl", OperandSize::Size64),
ALUOp3::SMAddL => ("smaddl", OperandSize::Size64),
};
let rd = pretty_print_ireg(rd.to_reg(), size, allocs);
let rd = pretty_print_ireg(rd.to_reg(), da_size, allocs);
let rn = pretty_print_ireg(rn, size, allocs);
let rm = pretty_print_ireg(rm, size, allocs);
let ra = pretty_print_ireg(ra, size, allocs);
let ra = pretty_print_ireg(ra, da_size, allocs);
format!("{} {}, {}, {}, {}", op, rd, rn, rm, ra)
}

View File

@@ -2580,7 +2580,7 @@
;; For values smaller than a register, we do a normal `add` with both arguments
;; sign extended. We then check if the output sign bit has flipped.
(rule 0 (lower (has_type (fits_in_16 ty) (iadd_cout a b)))
(let ((extend ExtendOp (lower_extend_op ty $true))
(let ((extend ExtendOp (lower_extend_op ty (ArgumentExtension.Sext)))
;; Instead of emitting two `sxt{b,h}` we do one as an instruction and
;; the other as an extend operation in the `add` instruction.
@@ -2617,6 +2617,250 @@
(rule (lower (has_type (fits_in_64 ty) (uadd_overflow_trap a b tc)))
(trap_if_overflow (add_with_flags_paired ty a b) tc))
;;;; Helpers for `*_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; put a narrow value into a register and sign-/zero-extend depending on the ArgumentExtension
(decl put_in_reg_ext32 (Value ArgumentExtension) Reg)
(rule (put_in_reg_ext32 val (ArgumentExtension.Sext))
(put_in_reg_sext32 val))
(rule (put_in_reg_ext32 val (ArgumentExtension.Uext))
(put_in_reg_zext32 val))
;; For narrow values emit a normal op with both arguments zero/sign extended.
;; Then check if the output is the same as itself zero/sign extended from the narrower width.
(decl overflow_op_small (Type Value Value ArgumentExtension ALUOp) InstOutput)
(rule (overflow_op_small ty a b arg_ext alu_op)
(let ((extend ExtendOp (lower_extend_op ty arg_ext))
;; Instead of emitting two `{u,s}xt{b,h}` we do one as an instruction and
;; the other as an extend operation in the alu_op.
;;
;; uxtb a_ext, a
;; alu_op out, a_ext, b, {u,s}xtb
;; cmp out, out, {u,s}xtb
;; cset out_of, ne
(a_ext Reg (put_in_reg_ext32 a arg_ext))
(out Reg (alu_rrr_extend alu_op ty a_ext b extend))
(out_of Reg (with_flags_reg
(cmp_extend (OperandSize.Size32) out out extend)
(cset (Cond.Ne)))))
(output_pair
(value_reg out)
(value_reg out_of))))
;; For register sized op's just emit a op+cset, without further masking.
;;
;; op out, a, b
;; cset out_of, cond
;;
;; conds expected:
;; Hs: Carry set, unsigned overflow; Vs: Signed Over-/Underflow;
;; Lo: Carry clear, meaning no unsigned overflow.
;; (this is because subtraction is implemented as an add with the two's complement value on aarch64, meaning there is a sub-overflow if the add does not overflow)
(decl overflow_op_normal (Type Value Value ALUOp Cond) InstOutput)
(rule (overflow_op_normal ty a b alu_op cond)
(let ((out ValueRegs
(with_flags
(alu_rrr_with_flags_paired ty a b alu_op)
(cset_paired cond))))
(output_pair
(value_regs_get out 0)
(value_regs_get out 1))))
;; For 128bit integers emit, for example, add+adcs+cset
(decl overflow_op_128 (Value Value ALUOp ALUOp Cond) InstOutput)
(rule (overflow_op_128 x y alu_op1 alu_op2 cond)
(let
;; Get the high/low registers for `x`.
((x_regs ValueRegs x)
(x_lo Reg (value_regs_get x_regs 0))
(x_hi Reg (value_regs_get x_regs 1))
;; Get the high/low registers for `y`.
(y_regs ValueRegs y)
(y_lo Reg (value_regs_get y_regs 0))
(y_hi Reg (value_regs_get y_regs 1)))
;; cannot use the with_flags helper here but it should be fine right now
(let
((lo_inst ProducesFlags (alu_rrr_with_flags_paired $I64 x_lo y_lo alu_op1))
(hi_inst ConsumesAndProducesFlags (alu_rrr_with_flags_chained $I64 x_hi y_hi alu_op2))
(of_inst ConsumesFlags (cset_paired cond))
(result MultiReg (with_flags_chained lo_inst hi_inst of_inst)))
(multi_reg_to_pair_and_single result)))
)
;;;; Rules for `uadd_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; For values smaller than a register, we do a normal `add` with both arguments
;; zero extended. We then check if the output is the same as itself zero extended.
(rule 1 (lower (has_type (fits_in_16 ty) (uadd_overflow a b)))
(overflow_op_small ty a b (ArgumentExtension.Uext) (ALUOp.Add)))
;; For register sized add's we just emit a adds+cset, without further masking.
(rule 2 (lower (has_type (ty_32_or_64 ty) (uadd_overflow a b)))
(overflow_op_normal ty a b (ALUOp.AddS) (Cond.Hs)))
;; For 128bit integers we emit add+adcs+cset
(rule 0 (lower (has_type $I128 (uadd_overflow x y)))
(overflow_op_128 x y (ALUOp.AddS) (ALUOp.AdcS) (Cond.Hs)))
;;;; Rules for `sadd_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; sxt{b,h} a_ext, a
;; add out, a_ext, b, sxt{b,h}
;; cmp out, out, sxt{b,h}
;; cset of, ne
(rule 1 (lower (has_type (fits_in_16 ty) (sadd_overflow a b)))
(overflow_op_small ty a b (ArgumentExtension.Sext) (ALUOp.Add)))
;; adds a, b
;; cset of, vs
(rule 2 (lower (has_type (ty_32_or_64 ty) (sadd_overflow a b)))
(overflow_op_normal ty a b (ALUOp.AddS) (Cond.Vs)))
;; adds x_lo, y_lo
;; addcs x_hi, y_hi
;; cset of, vs
(rule 0 (lower (has_type $I128 (sadd_overflow x y)))
(overflow_op_128 x y (ALUOp.AddS) (ALUOp.AdcS) (Cond.Vs)))
;;;; Rules for `usub_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; uxt{b,h} a_ext, a
;; sub out, a_ext, b, ext{b,h}
;; cmp out, out, uxt{b,h}
;; cset of, ne
(rule 1 (lower (has_type (fits_in_16 ty) (usub_overflow a b)))
(overflow_op_small ty a b (ArgumentExtension.Uext) (ALUOp.Sub)))
;; subs a, b
;; cset of, lo
(rule 2 (lower (has_type (ty_32_or_64 ty) (usub_overflow a b)))
(overflow_op_normal ty a b (ALUOp.SubS) (Cond.Lo)))
;; subs x_lo, y_lo
;; sbcs x_hi, y_hi
;; cset of, lo
(rule 0 (lower (has_type $I128 (usub_overflow x y)))
(overflow_op_128 x y (ALUOp.SubS) (ALUOp.SbcS) (Cond.Lo)))
;;;; Rules for `ssub_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; sxt{b,h} a_ext, a
;; sub out, a_ext, b, sxt{b,h}
;; cmp out, out, sxt{b,h}
;; cset of, ne
(rule 1 (lower (has_type (fits_in_16 ty) (ssub_overflow a b)))
(overflow_op_small ty a b (ArgumentExtension.Sext) (ALUOp.Sub)))
;; subs a, b
;; cset of, vs
(rule 2 (lower (has_type (ty_32_or_64 ty) (ssub_overflow a b)))
(overflow_op_normal ty a b (ALUOp.SubS) (Cond.Vs)))
;; subs x_lo, y_lo
;; sbcs x_hi, y_hi
;; cset of, vs
(rule 0 (lower (has_type $I128 (ssub_overflow x y)))
(overflow_op_128 x y (ALUOp.SubS) (ALUOp.SbcS) (Cond.Vs)))
;;;; Rules for `umul_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; uxt{b,h} a_ext, a
;; uxt{b,h} b_ext, b
;; mul out, a_ext, b_ext
;; cmp out, out, uxt{b,h}
;; cset of, ne
(rule 1 (lower (has_type (fits_in_16 ty) (umul_overflow a b)))
(let ((extend ExtendOp (lower_extend_op ty (ArgumentExtension.Uext)))
(a_uext Reg (put_in_reg_zext32 a))
(b_uext Reg (put_in_reg_zext32 b))
(out Reg (madd ty a_uext b_uext (zero_reg)))
(out_of Reg (with_flags_reg
(cmp_extend (OperandSize.Size32) out out extend)
(cset (Cond.Ne)))))
(output_pair
(value_reg out)
(value_reg out_of))))
;; umull out, a, b
;; cmp out, out, uxtw
;; cset of, ne
(rule 2 (lower (has_type $I32 (umul_overflow a b)))
(let (
(out Reg (umaddl a b (zero_reg)))
(out_of Reg (with_flags_reg
(cmp_extend (OperandSize.Size64) out out (ExtendOp.UXTW))
(cset (Cond.Ne)))))
(output_pair
(value_reg out)
(value_reg out_of))))
;; mul out, a, b
;; umulh tmp, a, b
;; cmp tmp, #0
;; cset of, ne
(rule 2 (lower (has_type $I64 (umul_overflow a b)))
(let (
(out Reg (madd $I64 a b (zero_reg)))
(tmp Reg (umulh $I64 a b))
(out_of Reg (with_flags_reg
(cmp64_imm tmp (u8_into_imm12 0))
(cset (Cond.Ne)))))
(output_pair
(value_reg out)
(value_reg out_of))))
;;;; Rules for `smul_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; sxt{b,h} a_ext, a
;; sxt{b,h} b_ext, b
;; mul out, a_ext, b_ext
;; cmp out, out, sxt{b,h}
;; cset of, ne
(rule 1 (lower (has_type (fits_in_16 ty) (smul_overflow a b)))
(let ((extend ExtendOp (lower_extend_op ty (ArgumentExtension.Sext)))
(a_sext Reg (put_in_reg_sext32 a))
(b_sext Reg (put_in_reg_sext32 b))
(out Reg (madd ty a_sext b_sext (zero_reg)))
(out_of Reg (with_flags_reg
(cmp_extend (OperandSize.Size32) out out extend)
(cset (Cond.Ne)))))
(output_pair
(value_reg out)
(value_reg out_of))))
;; smull out, a, b
;; cmp out, out, sxtw
;; cset of, ne
(rule 2 (lower (has_type $I32 (smul_overflow a b)))
(let (
(out Reg (smaddl a b (zero_reg)))
(out_of Reg (with_flags_reg
(cmp_extend (OperandSize.Size64) out out (ExtendOp.SXTW))
(cset (Cond.Ne)))))
(output_pair
(value_reg out)
(value_reg out_of))))
;; mul out, a, b
;; smulh tmp, a, b
;; cmp tmp, out, ASR #63
;; cset of, ne
(rule 2 (lower (has_type $I64 (smul_overflow a b)))
(let (
(out Reg (madd $I64 a b (zero_reg)))
(tmp Reg (smulh $I64 a b))
(out_of Reg (with_flags_reg
(cmp_rr_shift_asr (OperandSize.Size64) tmp out 63)
(cset (Cond.Ne)))))
(output_pair
(value_reg out)
(value_reg out_of))))
;;; Rules for `tls_value` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type (tls_model (TlsModel.ElfGd)) (tls_value (symbol_value_data name _ _))))

View File

@@ -155,6 +155,17 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> {
}
}
fn ashr_from_u64(&mut self, ty: Type, n: u64) -> Option<ShiftOpAndAmt> {
let shiftimm = ShiftOpShiftImm::maybe_from_shift(n)?;
let shiftee_bits = ty_bits(ty);
if shiftee_bits <= std::u8::MAX as usize {
let shiftimm = shiftimm.mask(shiftee_bits as u8);
Some(ShiftOpAndAmt::new(ShiftOp::ASR, shiftimm))
} else {
None
}
}
fn integral_ty(&mut self, ty: Type) -> Option<Type> {
match ty {
I8 | I16 | I32 | I64 | R64 => Some(ty),

View File

@@ -2303,9 +2303,6 @@
(test Reg (rv_srli sum (imm12_const (ty_bits ty)))))
(value_regs sum test)))
(decl inst_output_get (InstOutput u8) ValueRegs)
(extern constructor inst_output_get inst_output_get)
(decl label_to_br_target (MachLabel) BranchTarget)
(extern constructor label_to_br_target label_to_br_target)

View File

@@ -303,10 +303,6 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Riscv64Backend> {
self.backend.isa_flags.has_zbs()
}
fn inst_output_get(&mut self, x: InstOutput, index: u8) -> ValueRegs {
x[index as usize]
}
fn move_f_to_x(&mut self, r: Reg, ty: Type) -> Reg {
let result = self.temp_writable_reg(I64);
self.emit(&gen_move(result, I64, r, ty));

View File

@@ -14,14 +14,14 @@
;; Integer instructions.
;; Integer arithmetic/bit-twiddling.
(AluRmiR (size OperandSize) ;; 4 or 8
(AluRmiR (size OperandSize) ;; 1, 2, 4 or 8
(op AluRmiROpcode)
(src1 Gpr)
(src2 GprMemImm)
(dst WritableGpr))
;; Integer arithmetic read-modify-write on memory.
(AluRM (size OperandSize) ;; 4 or 8
(AluRM (size OperandSize) ;; 1, 2, 4 or 8
(op AluRmiROpcode)
(src1_dst SyntheticAmode)
(src2 Gpr))
@@ -86,6 +86,12 @@
(dst_lo WritableGpr)
(dst_hi WritableGpr))
;; x64 'mul' instruction but it only outputs the low half
(UMulLo (size OperandSize)
(src1 Gpr)
(src2 GprMem)
(dst WritableGpr))
;; A synthetic instruction sequence used as part of the lowering of the
;; `srem` instruction which returns 0 if the divisor is -1 and
;; otherwise executes an `idiv` instruction.
@@ -2118,6 +2124,29 @@
dst)
dst)))
(decl x64_alurmi_with_flags_paired (AluRmiROpcode Type Gpr GprMemImm) ProducesFlags)
(rule (x64_alurmi_with_flags_paired opc (fits_in_64 ty) src1 src2)
(let ((dst WritableGpr (temp_writable_gpr)))
(ProducesFlags.ProducesFlagsReturnsResultWithConsumer
(MInst.AluRmiR (raw_operand_size_of_type ty)
opc
src1
src2
dst)
dst)))
;; Should only be used for Adc and Sbb
(decl x64_alurmi_with_flags_chained (AluRmiROpcode Type Gpr GprMemImm) ConsumesAndProducesFlags)
(rule (x64_alurmi_with_flags_chained opc (fits_in_64 ty) src1 src2)
(let ((dst WritableGpr (temp_writable_gpr)))
(ConsumesAndProducesFlags.ReturnsReg
(MInst.AluRmiR (raw_operand_size_of_type ty)
opc
src1
src2
dst)
dst)))
;; Helper for creating `adc` instructions.
(decl x64_adc_paired (Type Gpr GprMemImm) ConsumesFlags)
(rule (x64_adc_paired ty src1 src2)
@@ -2170,6 +2199,24 @@
src1
src2))
;; Helper for creating `umullo` instructions.
(decl x64_umullo (Type Gpr GprMem) Gpr)
(rule (x64_umullo ty src1 src2)
(let ((dst WritableGpr (temp_writable_gpr))
(size OperandSize (raw_operand_size_of_type ty))
(_ Unit (emit (MInst.UMulLo size src1 src2 dst))))
dst))
(decl x64_umullo_with_flags_paired (Type Gpr GprMem) ProducesFlags)
(rule (x64_umullo_with_flags_paired ty src1 src2)
(let ((dst WritableGpr (temp_writable_gpr)))
(ProducesFlags.ProducesFlagsReturnsResultWithConsumer
(MInst.UMulLo (raw_operand_size_of_type ty)
src1
src2
dst)
dst)))
;; Helper for emitting `and` instructions.
(decl x64_and (Type Gpr GprMemImm) Gpr)
(rule (x64_and ty src1 src2)

View File

@@ -154,35 +154,69 @@ pub(crate) fn emit(
debug_assert_eq!(src1, reg_g);
let src2 = src2.clone().to_reg_mem_imm().with_allocs(allocs);
let rex = RexFlags::from(*size);
let prefix = if *size == OperandSize::Size16 {
LegacyPrefixes::_66
} else {
LegacyPrefixes::None
};
let mut rex = RexFlags::from(*size);
if *op == AluRmiROpcode::Mul {
// We kinda freeloaded Mul into RMI_R_Op, but it doesn't fit the usual pattern, so
// we have to special-case it.
match src2 {
RegMemImm::Reg { reg: reg_e } => {
emit_std_reg_reg(sink, LegacyPrefixes::None, 0x0FAF, 2, reg_g, reg_e, rex);
}
if *size == OperandSize::Size8 {
match src2 {
RegMemImm::Reg { reg: reg_e } => {
debug_assert!(reg_e.is_real());
rex.always_emit_if_8bit_needed(reg_e);
let enc_e = int_reg_enc(reg_e);
emit_std_enc_enc(sink, LegacyPrefixes::None, 0xF6, 1, 5, enc_e, rex);
}
RegMemImm::Mem { addr } => {
let amode = addr.finalize(state, sink);
emit_std_reg_mem(
sink,
LegacyPrefixes::None,
0x0FAF,
2,
reg_g,
&amode,
rex,
0,
);
}
RegMemImm::Mem { addr } => {
let amode = addr.finalize(state, sink);
emit_std_enc_mem(
sink,
LegacyPrefixes::None,
0xF6,
1,
5,
&amode,
rex,
0,
);
}
RegMemImm::Imm { simm32 } => {
let use_imm8 = low8_will_sign_extend_to_32(simm32);
let opcode = if use_imm8 { 0x6B } else { 0x69 };
// Yes, really, reg_g twice.
emit_std_reg_reg(sink, LegacyPrefixes::None, opcode, 1, reg_g, reg_g, rex);
emit_simm(sink, if use_imm8 { 1 } else { 4 }, simm32);
RegMemImm::Imm { .. } => {
panic!("Cannot emit 8bit imul with 8bit immediate");
}
}
} else {
match src2 {
RegMemImm::Reg { reg: reg_e } => {
emit_std_reg_reg(sink, prefix, 0x0FAF, 2, reg_g, reg_e, rex);
}
RegMemImm::Mem { addr } => {
let amode = addr.finalize(state, sink);
emit_std_reg_mem(sink, prefix, 0x0FAF, 2, reg_g, &amode, rex, 0);
}
RegMemImm::Imm { simm32 } => {
let imm_size = if low8_will_sign_extend_to_32(simm32) {
1
} else {
if *size == OperandSize::Size16 {
2
} else {
4
}
};
let opcode = if imm_size == 1 { 0x6B } else { 0x69 };
// Yes, really, reg_g twice.
emit_std_reg_reg(sink, prefix, opcode, 1, reg_g, reg_g, rex);
emit_simm(sink, imm_size, simm32);
}
}
}
} else {
@@ -197,52 +231,63 @@ pub(crate) fn emit(
AluRmiROpcode::Mul => panic!("unreachable"),
};
let (opcode_r, opcode_m) = if *size == OperandSize::Size8 {
(opcode_r - 1, opcode_m - 1)
} else {
(opcode_r, opcode_m)
};
if *size == OperandSize::Size8 {
debug_assert!(reg_g.is_real());
rex.always_emit_if_8bit_needed(reg_g);
}
match src2 {
RegMemImm::Reg { reg: reg_e } => {
if *size == OperandSize::Size8 {
debug_assert!(reg_e.is_real());
rex.always_emit_if_8bit_needed(reg_e);
}
// GCC/llvm use the swapped operand encoding (viz., the R/RM vs RM/R
// duality). Do this too, so as to be able to compare generated machine
// code easily.
emit_std_reg_reg(
sink,
LegacyPrefixes::None,
opcode_r,
1,
reg_e,
reg_g,
rex,
);
emit_std_reg_reg(sink, prefix, opcode_r, 1, reg_e, reg_g, rex);
}
RegMemImm::Mem { addr } => {
let amode = addr.finalize(state, sink);
// Here we revert to the "normal" G-E ordering.
emit_std_reg_mem(
sink,
LegacyPrefixes::None,
opcode_m,
1,
reg_g,
&amode,
rex,
0,
);
emit_std_reg_mem(sink, prefix, opcode_m, 1, reg_g, &amode, rex, 0);
}
RegMemImm::Imm { simm32 } => {
let use_imm8 = low8_will_sign_extend_to_32(simm32);
let opcode = if use_imm8 { 0x83 } else { 0x81 };
let imm_size = if *size == OperandSize::Size8 {
1
} else {
if low8_will_sign_extend_to_32(simm32) {
1
} else {
if *size == OperandSize::Size16 {
2
} else {
4
}
}
};
let opcode = if *size == OperandSize::Size8 {
0x80
} else if low8_will_sign_extend_to_32(simm32) {
0x83
} else {
0x81
};
// And also here we use the "normal" G-E ordering.
let enc_g = int_reg_enc(reg_g);
emit_std_enc_enc(
sink,
LegacyPrefixes::None,
opcode,
1,
subopcode_i,
enc_g,
rex,
);
emit_simm(sink, if use_imm8 { 1 } else { 4 }, simm32);
emit_std_enc_enc(sink, prefix, opcode, 1, subopcode_i, enc_g, rex);
emit_simm(sink, imm_size, simm32);
}
}
}
@@ -274,7 +319,6 @@ pub(crate) fn emit(
let src2 = allocs.next(src2.to_reg());
let src1_dst = src1_dst.finalize(state, sink).with_allocs(allocs);
assert!(*size == OperandSize::Size32 || *size == OperandSize::Size64);
let opcode = match op {
AluRmiROpcode::Add => 0x01,
AluRmiROpcode::Sub => 0x29,
@@ -283,17 +327,26 @@ pub(crate) fn emit(
AluRmiROpcode::Xor => 0x31,
_ => panic!("Unsupported read-modify-write ALU opcode"),
};
let prefix = if *size == OperandSize::Size16 {
LegacyPrefixes::_66
} else {
LegacyPrefixes::None
};
let opcode = if *size == OperandSize::Size8 {
opcode - 1
} else {
opcode
};
let mut rex = RexFlags::from(*size);
if *size == OperandSize::Size8 {
debug_assert!(src2.is_real());
rex.always_emit_if_8bit_needed(src2);
}
let enc_g = int_reg_enc(src2);
emit_std_enc_mem(
sink,
LegacyPrefixes::None,
opcode,
1,
enc_g,
&src1_dst,
RexFlags::from(*size),
0,
);
emit_std_enc_mem(sink, prefix, opcode, 1, enc_g, &src1_dst, rex, 0);
}
Inst::AluRmRVex {
@@ -521,6 +574,45 @@ pub(crate) fn emit(
}
}
Inst::UMulLo {
size,
src1,
src2,
dst,
} => {
let src1 = allocs.next(src1.to_reg());
let dst = allocs.next(dst.to_reg().to_reg());
debug_assert_eq!(src1, regs::rax());
debug_assert_eq!(dst, regs::rax());
let mut rex = RexFlags::from(*size);
let prefix = match size {
OperandSize::Size16 => LegacyPrefixes::_66,
_ => LegacyPrefixes::None,
};
let opcode = if *size == OperandSize::Size8 {
0xF6
} else {
0xF7
};
match src2.clone().to_reg_mem() {
RegMem::Reg { reg } => {
let reg = allocs.next(reg);
if *size == OperandSize::Size8 {
rex.always_emit_if_8bit_needed(reg);
}
let reg_e = int_reg_enc(reg);
emit_std_enc_enc(sink, prefix, opcode, 1, 4, reg_e, rex);
}
RegMem::Mem { addr: src } => {
let amode = src.finalize(state, sink).with_allocs(allocs);
emit_std_enc_mem(sink, prefix, opcode, 1, 4, &amode, rex, 0);
}
}
}
Inst::SignExtendData { size, src, dst } => {
let src = allocs.next(src.to_reg());
let dst = allocs.next(dst.to_reg().to_reg());

View File

@@ -78,6 +78,15 @@ impl Inst {
}
}
fn umul_lo(size: OperandSize, operand: RegMem) -> Inst {
Inst::UMulLo {
size,
src1: Gpr::new(regs::rax()).unwrap(),
src2: GprMem::new(operand).unwrap(),
dst: WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()),
}
}
fn xmm_rm_r_evex(op: Avx512Opcode, src1: RegMem, src2: Reg, dst: Writable<Reg>) -> Self {
src1.assert_regclass_is(RegClass::Float);
debug_assert!(src2.class() == RegClass::Float);
@@ -1535,6 +1544,415 @@ fn test_x64_emit() {
"imull %esi, $76543210, %esi",
));
insns.push((
Inst::alu_rmi_r(
OperandSize::Size16,
AluRmiROpcode::Add,
RegMemImm::reg(rax),
w_rdx,
),
"6601C2",
"addw %dx, %ax, %dx",
));
insns.push((
Inst::alu_rmi_r(
OperandSize::Size16,
AluRmiROpcode::Add,
RegMemImm::imm(10),
w_rdx,
),
"6683C20A",
"addw %dx, $10, %dx",
));
insns.push((
Inst::alu_rmi_r(
OperandSize::Size16,
AluRmiROpcode::Add,
RegMemImm::imm(-512i32 as u32),
w_rdx,
),
"6681C200FE",
"addw %dx, $-512, %dx",
));
insns.push((
Inst::alu_rmi_r(
OperandSize::Size16,
AluRmiROpcode::Sub,
RegMemImm::reg(rax),
w_r12,
),
"664129C4",
"subw %r12w, %ax, %r12w",
));
insns.push((
Inst::alu_rmi_r(
OperandSize::Size16,
AluRmiROpcode::Xor,
RegMemImm::reg(r10),
w_rcx,
),
"664431D1",
"xorw %cx, %r10w, %cx",
));
insns.push((
Inst::alu_rmi_r(
OperandSize::Size16,
AluRmiROpcode::And,
RegMemImm::reg(r10),
w_r14,
),
"664521D6",
"andw %r14w, %r10w, %r14w",
));
insns.push((
Inst::alu_rmi_r(
OperandSize::Size16,
AluRmiROpcode::And,
RegMemImm::imm(10),
w_r14,
),
"664183E60A",
"andw %r14w, $10, %r14w",
));
insns.push((
Inst::alu_rmi_r(
OperandSize::Size16,
AluRmiROpcode::And,
RegMemImm::imm(-512i32 as u32),
w_r14,
),
"664181E600FE",
"andw %r14w, $-512, %r14w",
));
insns.push((
Inst::alu_rmi_r(
OperandSize::Size16,
AluRmiROpcode::Mul,
RegMemImm::imm(10),
w_rax,
),
"666BC00A",
"imulw %ax, $10, %ax",
));
insns.push((
Inst::alu_rmi_r(
OperandSize::Size16,
AluRmiROpcode::Mul,
RegMemImm::imm(-512i32 as u32),
w_rax,
),
"6669C000FE",
"imulw %ax, $-512, %ax",
));
insns.push((
Inst::alu_rmi_r(
OperandSize::Size16,
AluRmiROpcode::Mul,
RegMemImm::imm(10),
w_r11,
),
"66456BDB0A",
"imulw %r11w, $10, %r11w",
));
insns.push((
Inst::alu_rmi_r(
OperandSize::Size16,
AluRmiROpcode::Mul,
RegMemImm::imm(-512i32 as u32),
w_r11,
),
"664569DB00FE",
"imulw %r11w, $-512, %r11w",
));
insns.push((
Inst::alu_rmi_r(
OperandSize::Size16,
AluRmiROpcode::Mul,
RegMemImm::reg(rdx),
w_rax,
),
"660FAFC2",
"imulw %ax, %dx, %ax",
));
insns.push((
Inst::alu_rmi_r(
OperandSize::Size16,
AluRmiROpcode::Mul,
RegMemImm::reg(r12),
w_rax,
),
"66410FAFC4",
"imulw %ax, %r12w, %ax",
));
insns.push((
Inst::alu_rmi_r(
OperandSize::Size16,
AluRmiROpcode::Mul,
RegMemImm::reg(rdx),
w_r11,
),
"66440FAFDA",
"imulw %r11w, %dx, %r11w",
));
insns.push((
Inst::alu_rmi_r(
OperandSize::Size16,
AluRmiROpcode::Mul,
RegMemImm::reg(r12),
w_r11,
),
"66450FAFDC",
"imulw %r11w, %r12w, %r11w",
));
insns.push((
Inst::alu_rmi_r(
OperandSize::Size8,
AluRmiROpcode::Add,
RegMemImm::imm(10),
w_rax,
),
"80C00A", // there is theoretically 040A as a valid encoding also
"addb %al, $10, %al",
));
insns.push((
Inst::alu_rmi_r(
OperandSize::Size8,
AluRmiROpcode::Add,
RegMemImm::reg(rcx),
w_rax,
),
"00C8",
"addb %al, %cl, %al",
));
insns.push((
Inst::alu_rmi_r(
OperandSize::Size8,
AluRmiROpcode::Add,
RegMemImm::reg(rsi),
w_rax,
),
"4000F0",
"addb %al, %sil, %al",
));
insns.push((
Inst::alu_rmi_r(
OperandSize::Size8,
AluRmiROpcode::Add,
RegMemImm::reg(r11),
w_rax,
),
"4400D8",
"addb %al, %r11b, %al",
));
insns.push((
Inst::alu_rmi_r(
OperandSize::Size8,
AluRmiROpcode::Add,
RegMemImm::reg(r15),
w_rax,
),
"4400F8",
"addb %al, %r15b, %al",
));
insns.push((
Inst::alu_rmi_r(
OperandSize::Size8,
AluRmiROpcode::Sub,
RegMemImm::imm(10),
_w_rbp,
),
"4080ED0A",
"subb %bpl, $10, %bpl",
));
insns.push((
Inst::alu_rmi_r(
OperandSize::Size8,
AluRmiROpcode::Sub,
RegMemImm::reg(rcx),
_w_rbp,
),
"4028CD",
"subb %bpl, %cl, %bpl",
));
insns.push((
Inst::alu_rmi_r(
OperandSize::Size8,
AluRmiROpcode::Sub,
RegMemImm::reg(rsi),
_w_rbp,
),
"4028F5",
"subb %bpl, %sil, %bpl",
));
insns.push((
Inst::alu_rmi_r(
OperandSize::Size8,
AluRmiROpcode::Sub,
RegMemImm::reg(r11),
_w_rbp,
),
"4428DD",
"subb %bpl, %r11b, %bpl",
));
insns.push((
Inst::alu_rmi_r(
OperandSize::Size8,
AluRmiROpcode::Sub,
RegMemImm::reg(r15),
_w_rbp,
),
"4428FD",
"subb %bpl, %r15b, %bpl",
));
insns.push((
Inst::alu_rmi_r(
OperandSize::Size8,
AluRmiROpcode::Xor,
RegMemImm::imm(10),
_w_r10,
),
"4180F20A",
"xorb %r10b, $10, %r10b",
));
insns.push((
Inst::alu_rmi_r(
OperandSize::Size8,
AluRmiROpcode::Xor,
RegMemImm::reg(rcx),
_w_r10,
),
"4130CA",
"xorb %r10b, %cl, %r10b",
));
insns.push((
Inst::alu_rmi_r(
OperandSize::Size8,
AluRmiROpcode::Xor,
RegMemImm::reg(rsi),
_w_r10,
),
"4130F2",
"xorb %r10b, %sil, %r10b",
));
insns.push((
Inst::alu_rmi_r(
OperandSize::Size8,
AluRmiROpcode::Xor,
RegMemImm::reg(r11),
_w_r10,
),
"4530DA",
"xorb %r10b, %r11b, %r10b",
));
insns.push((
Inst::alu_rmi_r(
OperandSize::Size8,
AluRmiROpcode::Xor,
RegMemImm::reg(r15),
_w_r10,
),
"4530FA",
"xorb %r10b, %r15b, %r10b",
));
insns.push((
Inst::alu_rmi_r(
OperandSize::Size8,
AluRmiROpcode::And,
RegMemImm::imm(10),
w_r15,
),
"4180E70A",
"andb %r15b, $10, %r15b",
));
insns.push((
Inst::alu_rmi_r(
OperandSize::Size8,
AluRmiROpcode::And,
RegMemImm::reg(rcx),
w_r15,
),
"4120CF",
"andb %r15b, %cl, %r15b",
));
insns.push((
Inst::alu_rmi_r(
OperandSize::Size8,
AluRmiROpcode::And,
RegMemImm::reg(rsi),
w_r15,
),
"4120F7",
"andb %r15b, %sil, %r15b",
));
insns.push((
Inst::alu_rmi_r(
OperandSize::Size8,
AluRmiROpcode::And,
RegMemImm::reg(r11),
w_r15,
),
"4520DF",
"andb %r15b, %r11b, %r15b",
));
insns.push((
Inst::alu_rmi_r(
OperandSize::Size8,
AluRmiROpcode::And,
RegMemImm::reg(r15),
w_r15,
),
"4520FF",
"andb %r15b, %r15b, %r15b",
));
// the 8bit imul has rax as fixed dst
insns.push((
Inst::alu_rmi_r(
OperandSize::Size8,
AluRmiROpcode::Mul,
RegMemImm::reg(rcx),
w_rax,
),
"F6E9",
"imulb %al, %cl, %al",
));
insns.push((
Inst::alu_rmi_r(
OperandSize::Size8,
AluRmiROpcode::Mul,
RegMemImm::reg(rbp),
w_rax,
),
"40F6ED",
"imulb %al, %bpl, %al",
));
insns.push((
Inst::alu_rmi_r(
OperandSize::Size8,
AluRmiROpcode::Mul,
RegMemImm::reg(r10),
w_rax,
),
"41F6EA",
"imulb %al, %r10b, %al",
));
insns.push((
Inst::alu_rmi_r(
OperandSize::Size8,
AluRmiROpcode::Mul,
RegMemImm::reg(r15),
w_rax,
),
"41F6EF",
"imulb %al, %r15b, %al",
));
// ========================================================
// AluRM
@@ -1654,6 +2072,68 @@ fn test_x64_emit() {
"xorq %rax, 0(%rbp)",
));
insns.push((
Inst::AluRM {
size: OperandSize::Size16,
op: AluRmiROpcode::Add,
src1_dst: Amode::imm_reg(0, rbp).into(),
src2: Gpr::new(rax).unwrap(),
},
"66014500",
"addw %ax, 0(%rbp)",
));
insns.push((
Inst::AluRM {
size: OperandSize::Size16,
op: AluRmiROpcode::Sub,
src1_dst: Amode::imm_reg(0, rbp).into(),
src2: Gpr::new(r12).unwrap(),
},
"6644296500",
"subw %r12w, 0(%rbp)",
));
insns.push((
Inst::AluRM {
size: OperandSize::Size8,
op: AluRmiROpcode::Add,
src1_dst: Amode::imm_reg(0, rbp).into(),
src2: Gpr::new(rax).unwrap(),
},
"004500",
"addb %al, 0(%rbp)",
));
insns.push((
Inst::AluRM {
size: OperandSize::Size8,
op: AluRmiROpcode::Sub,
src1_dst: Amode::imm_reg(0, rbp).into(),
src2: Gpr::new(rbp).unwrap(),
},
"40286D00",
"subb %bpl, 0(%rbp)",
));
insns.push((
Inst::AluRM {
size: OperandSize::Size8,
op: AluRmiROpcode::Xor,
src1_dst: Amode::imm_reg(0, rbp).into(),
src2: Gpr::new(r10).unwrap(),
},
"44305500",
"xorb %r10b, 0(%rbp)",
));
insns.push((
Inst::AluRM {
size: OperandSize::Size8,
op: AluRmiROpcode::And,
src1_dst: Amode::imm_reg(0, rbp).into(),
src2: Gpr::new(r15).unwrap(),
},
"44207D00",
"andb %r15b, 0(%rbp)",
));
// ========================================================
// UnaryRmR
@@ -1864,6 +2344,59 @@ fn test_x64_emit() {
"mul %rax, %rdi, %rax, %rdx",
));
// ========================================================
// UMulLo
insns.push((
Inst::umul_lo(OperandSize::Size64, RegMem::reg(regs::rdx())),
"48F7E2",
"mulq %rax, %rdx, %rax",
));
insns.push((
Inst::umul_lo(OperandSize::Size64, RegMem::reg(regs::r12())),
"49F7E4",
"mulq %rax, %r12, %rax",
));
insns.push((
Inst::umul_lo(OperandSize::Size32, RegMem::reg(regs::rdx())),
"F7E2",
"mull %eax, %edx, %eax",
));
insns.push((
Inst::umul_lo(OperandSize::Size32, RegMem::reg(regs::r12())),
"41F7E4",
"mull %eax, %r12d, %eax",
));
insns.push((
Inst::umul_lo(OperandSize::Size16, RegMem::reg(regs::rdx())),
"66F7E2",
"mulw %ax, %dx, %ax",
));
insns.push((
Inst::umul_lo(OperandSize::Size16, RegMem::reg(regs::r12())),
"6641F7E4",
"mulw %ax, %r12w, %ax",
));
insns.push((
Inst::umul_lo(OperandSize::Size8, RegMem::reg(regs::rdx())),
"F6E2",
"mulb %al, %dl, %al",
));
insns.push((
Inst::umul_lo(OperandSize::Size8, RegMem::reg(regs::rdi())),
"40F6E7",
"mulb %al, %dil, %al",
));
insns.push((
Inst::umul_lo(OperandSize::Size8, RegMem::reg(regs::r9())),
"41F6E1",
"mulb %al, %r9b, %al",
));
insns.push((
Inst::umul_lo(OperandSize::Size8, RegMem::reg(regs::r12())),
"41F6E4",
"mulb %al, %r12b, %al",
));
// ========================================================
// Imm_R
//

View File

@@ -101,6 +101,7 @@ impl Inst {
| Inst::MovsxRmR { .. }
| Inst::MovzxRmR { .. }
| Inst::MulHi { .. }
| Inst::UMulLo { .. }
| Inst::Neg { .. }
| Inst::Not { .. }
| Inst::Nop { .. }
@@ -180,7 +181,6 @@ impl Inst {
src: RegMemImm,
dst: Writable<Reg>,
) -> Self {
debug_assert!(size.is_one_of(&[OperandSize::Size32, OperandSize::Size64]));
src.assert_regclass_is(RegClass::Int);
debug_assert!(dst.to_reg().class() == RegClass::Int);
Self::AluRmiR {
@@ -657,6 +657,7 @@ impl PrettyPrint for Inst {
.to_string()
}
#[allow(dead_code)]
fn suffix_lqb(size: OperandSize) -> String {
match size {
OperandSize::Size32 => "l",
@@ -691,7 +692,7 @@ impl PrettyPrint for Inst {
let src2 = src2.pretty_print(size_bytes, allocs);
format!(
"{} {}, {}, {}",
ljustify2(op.to_string(), suffix_lqb(*size)),
ljustify2(op.to_string(), suffix_bwlq(*size)),
src1,
src2,
dst
@@ -716,7 +717,7 @@ impl PrettyPrint for Inst {
let src1_dst = src1_dst.pretty_print(size_bytes, allocs);
format!(
"{} {}, {}",
ljustify2(op.to_string(), suffix_lqb(*size)),
ljustify2(op.to_string(), suffix_bwlq(*size)),
src2,
src1_dst,
)
@@ -849,6 +850,24 @@ impl PrettyPrint for Inst {
)
}
Inst::UMulLo {
size,
src1,
src2,
dst,
} => {
let src1 = pretty_print_reg(src1.to_reg(), size.to_bytes(), allocs);
let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes(), allocs);
let src2 = src2.pretty_print(size.to_bytes(), allocs);
format!(
"{} {}, {}, {}",
ljustify2("mul".to_string(), suffix_bwlq(*size)),
src1,
src2,
dst,
)
}
Inst::CheckedSRemSeq {
size,
divisor,
@@ -1854,11 +1873,23 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
// method above.
match inst {
Inst::AluRmiR {
src1, src2, dst, ..
size,
op,
src1,
src2,
dst,
..
} => {
collector.reg_use(src1.to_reg());
collector.reg_reuse_def(dst.to_writable_reg(), 0);
src2.get_operands(collector);
if *size == OperandSize::Size8 && *op == AluRmiROpcode::Mul {
// 8-bit imul has RAX as a fixed input/output
collector.reg_fixed_use(src1.to_reg(), regs::rax());
collector.reg_fixed_def(dst.to_writable_reg(), regs::rax());
src2.get_operands(collector);
} else {
collector.reg_use(src1.to_reg());
collector.reg_reuse_def(dst.to_writable_reg(), 0);
src2.get_operands(collector);
}
}
Inst::AluConstOp { dst, .. } => collector.reg_def(dst.to_writable_reg()),
Inst::AluRM { src1_dst, src2, .. } => {
@@ -1925,6 +1956,20 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
collector.reg_fixed_def(dst_hi.to_writable_reg(), regs::rdx());
src2.get_operands(collector);
}
Inst::UMulLo {
size,
src1,
src2,
dst,
..
} => {
collector.reg_fixed_use(src1.to_reg(), regs::rax());
collector.reg_fixed_def(dst.to_writable_reg(), regs::rax());
if *size != OperandSize::Size8 {
collector.reg_clobbers(PRegSet::empty().with(regs::gpr_preg(regs::ENC_RDX)));
}
src2.get_operands(collector);
}
Inst::SignExtendData { size, src, dst } => {
match size {
OperandSize::Size8 => {

View File

@@ -111,6 +111,87 @@
(output_pair (value_regs_get results 0)
(value_regs_get results 1))))
;;;; Helpers for `*_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(decl construct_overflow_op (CC ProducesFlags) InstOutput)
(rule (construct_overflow_op cc inst)
(let ((results ValueRegs (with_flags inst
(x64_setcc_paired cc))))
(output_pair (value_regs_get results 0)
(value_regs_get results 1))))
(decl construct_overflow_op_alu (Type CC AluRmiROpcode Gpr GprMemImm) InstOutput)
(rule (construct_overflow_op_alu ty cc alu_op src1 src2)
(construct_overflow_op cc (x64_alurmi_with_flags_paired alu_op ty src1 src2)))
;; This essentially creates
;; alu_<op1> x_lo, y_lo
;; alu_<op2> x_hi, y_hi
;; set<cc> r8
(decl construct_overflow_op_alu_128 (CC AluRmiROpcode AluRmiROpcode Value Value) InstOutput)
(rule (construct_overflow_op_alu_128 cc op1 op2 x y)
;; Get the high/low registers for `x`.
(let ((x_regs ValueRegs x)
(x_lo Gpr (value_regs_get_gpr x_regs 0))
(x_hi Gpr (value_regs_get_gpr x_regs 1)))
;; Get the high/low registers for `y`.
(let ((y_regs ValueRegs y)
(y_lo Gpr (value_regs_get_gpr y_regs 0))
(y_hi Gpr (value_regs_get_gpr y_regs 1)))
(let ((lo_inst ProducesFlags (x64_alurmi_with_flags_paired op1 $I64 x_lo y_lo))
(hi_inst ConsumesAndProducesFlags (x64_alurmi_with_flags_chained op2 $I64 x_hi y_hi))
(of_inst ConsumesFlags (x64_setcc_paired cc))
(result MultiReg (with_flags_chained lo_inst hi_inst of_inst)))
(multi_reg_to_pair_and_single result)))))
;;;; Rules for `uadd_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 1 (lower (uadd_overflow x y @ (value_type (fits_in_64 ty))))
(construct_overflow_op_alu ty (CC.B) (AluRmiROpcode.Add) x y))
;; i128 gets lowered into adc and add
(rule 0 (lower (uadd_overflow x y @ (value_type $I128)))
(construct_overflow_op_alu_128 (CC.B) (AluRmiROpcode.Add) (AluRmiROpcode.Adc) x y))
;;;; Rules for `sadd_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 1 (lower (sadd_overflow x y @ (value_type (fits_in_64 ty))))
(construct_overflow_op_alu ty (CC.O) (AluRmiROpcode.Add) x y))
(rule 0 (lower (sadd_overflow x y @ (value_type $I128)))
(construct_overflow_op_alu_128 (CC.O) (AluRmiROpcode.Add) (AluRmiROpcode.Adc) x y))
;;;; Rules for `usub_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 1 (lower (usub_overflow x y @ (value_type (fits_in_64 ty))))
(construct_overflow_op_alu ty (CC.B) (AluRmiROpcode.Sub) x y))
(rule 0 (lower (usub_overflow x y @ (value_type $I128)))
(construct_overflow_op_alu_128 (CC.B) (AluRmiROpcode.Sub) (AluRmiROpcode.Sbb) x y))
;;;; Rules for `ssub_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 1 (lower (ssub_overflow x y @ (value_type (fits_in_64 ty))))
(construct_overflow_op_alu ty (CC.O) (AluRmiROpcode.Sub) x y))
(rule 0 (lower (ssub_overflow x y @ (value_type $I128)))
(construct_overflow_op_alu_128 (CC.O) (AluRmiROpcode.Sub) (AluRmiROpcode.Sbb) x y))
;;;; Rules for `umul_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 2 (lower (umul_overflow x y @ (value_type (fits_in_64 ty))))
(construct_overflow_op (CC.O) (x64_umullo_with_flags_paired ty x y)))
;;;; Rules for `smul_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 2 (lower (smul_overflow x y @ (value_type (ty_int_ref_16_to_64 ty))))
(construct_overflow_op_alu ty (CC.O) (AluRmiROpcode.Mul) x y))
;; there is no 8bit imul with an immediate operand so we need to put it in a register or memory
(rule 1 (lower (smul_overflow x y @ (value_type $I8)))
(construct_overflow_op (CC.O) (x64_alurmi_with_flags_paired (AluRmiROpcode.Mul) $I8 x (reg_mem_to_reg_mem_imm (put_in_reg_mem y)))))
;;;; Rules for `sadd_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type (multi_lane 8 16)

View File

@@ -355,6 +355,14 @@ macro_rules! isle_common_prelude_methods {
}
}
#[inline]
fn ty_int_ref_16_to_64(&mut self, ty: Type) -> Option<Type> {
match ty {
I16 | I32 | I64 | R64 => Some(ty),
_ => None,
}
}
#[inline]
fn ty_int(&mut self, ty: Type) -> Option<Type> {
ty.is_int().then(|| ty)

View File

@@ -361,6 +361,10 @@
(decl ty_int_ref_64 (Type) Type)
(extern extractor ty_int_ref_64 ty_int_ref_64)
;; An extractor that matches int or reference types bigger than 16 bits but at most 64 bits.
(decl ty_int_ref_16_to_64 (Type) Type)
(extern extractor ty_int_ref_16_to_64 ty_int_ref_16_to_64)
;; An extractor that only matches integers.
(decl ty_int (Type) Type)
(extern extractor ty_int ty_int)

View File

@@ -17,6 +17,16 @@
;; (Mutable) builder to incrementally construct an `InstOutput`.
(type InstOutputBuilder extern (enum))
;; Type to hold multiple Regs
(type MultiReg
(enum
(Empty)
(One (a Reg))
(Two (a Reg) (b Reg))
(Three (a Reg) (b Reg) (c Reg))
(Four (a Reg) (b Reg) (c Reg) (d Reg))
))
;;;; Registers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(type Reg (primitive Reg))
@@ -144,6 +154,22 @@
(decl preg_to_reg (PReg) Reg)
(extern constructor preg_to_reg preg_to_reg)
;; Convert a MultiReg with three registers into an InstOutput containing
;; one ValueRegs containing the first two regs and one containing the third reg
(decl multi_reg_to_pair_and_single (MultiReg) InstOutput)
(rule (multi_reg_to_pair_and_single (MultiReg.Three a b c))
(output_pair (value_regs a b) c))
;; Convert a MultiReg with two registers into an InstOutput containing one ValueRegs with both regs
(decl multi_reg_to_pair (MultiReg) InstOutput)
(rule (multi_reg_to_pair (MultiReg.Two a b))
(value_regs a b))
;; Convert a MultiReg with one register into an InstOutput containing one ValueRegs with the register
(decl multi_reg_to_single (MultiReg) InstOutput)
(rule (multi_reg_to_single (MultiReg.One a))
(value_reg a))
;;;; Common Mach Types ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(type MachLabel (primitive MachLabel))
@@ -335,10 +361,15 @@
(ProducesFlagsReturnsResultWithConsumer (inst MInst) (result Reg))))
;; Chain another producer to a `ProducesFlags`.
(decl produces_flags_append (ProducesFlags MInst) ProducesFlags)
(rule (produces_flags_append (ProducesFlags.ProducesFlagsSideEffect inst1) inst2)
(decl produces_flags_concat (ProducesFlags ProducesFlags) ProducesFlags)
(rule (produces_flags_concat (ProducesFlags.ProducesFlagsSideEffect inst1) (ProducesFlags.ProducesFlagsSideEffect inst2))
(ProducesFlags.ProducesFlagsTwiceSideEffect inst1 inst2))
;; Newtype wrapper around `MInst` for instructions that consume and produce flags
(type ConsumesAndProducesFlags (enum
(SideEffect (inst MInst))
(ReturnsReg (inst MInst) (result Reg))))
;; Newtype wrapper around `MInst` for instructions that consume flags.
;;
;; Variant determines how result is given when combined with a
@@ -528,6 +559,250 @@
(ConsumesFlags.ConsumesFlagsSideEffect c))
(SideEffectNoResult.Inst3 p1 p2 c))
;; Combine flag-producing and -consuming instruction that allows more than two results to be returned
(decl with_flags_chained (ProducesFlags ConsumesAndProducesFlags ConsumesFlags) MultiReg)
;; ProducesFlags.SideEffect + ConsumesAndProducesFlags.SideEffect with all possible ConsumeFlags options
(rule (with_flags_chained (ProducesFlags.ProducesFlagsSideEffect prod_inst)
(ConsumesAndProducesFlags.SideEffect middle_inst)
(ConsumesFlags.ConsumesFlagsSideEffect consume_inst))
(let ((_ Unit (emit prod_inst))
(_ Unit (emit middle_inst))
(_ Unit (emit consume_inst)))
(MultiReg.Empty)))
(rule (with_flags_chained (ProducesFlags.ProducesFlagsSideEffect prod_inst)
(ConsumesAndProducesFlags.SideEffect middle_inst)
(ConsumesFlags.ConsumesFlagsSideEffect2 consume_inst1 consume_inst2))
(let ((_ Unit (emit prod_inst))
(_ Unit (emit middle_inst))
(_ Unit (emit consume_inst1))
(_ Unit (emit consume_inst2)))
(MultiReg.Empty)))
(rule (with_flags_chained (ProducesFlags.ProducesFlagsSideEffect prod_inst)
(ConsumesAndProducesFlags.SideEffect middle_inst)
(ConsumesFlags.ConsumesFlagsReturnsReg consume_inst reg))
(let ((_ Unit (emit prod_inst))
(_ Unit (emit middle_inst))
(_ Unit (emit consume_inst)))
(MultiReg.One reg)))
(rule (with_flags_chained (ProducesFlags.ProducesFlagsSideEffect prod_inst)
(ConsumesAndProducesFlags.SideEffect middle_inst)
(ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs consume_inst1 consume_inst2 consume_result))
(let ((_ Unit (emit prod_inst))
(_ Unit (emit middle_inst))
(_ Unit (emit consume_inst1))
(_ Unit (emit consume_inst2)))
(MultiReg.Two (value_regs_get consume_result 0) (value_regs_get consume_result 1))))
(rule (with_flags_chained (ProducesFlags.ProducesFlagsSideEffect prod_inst)
(ConsumesAndProducesFlags.SideEffect middle_inst)
(ConsumesFlags.ConsumesFlagsFourTimesReturnsValueRegs consume_inst1 consume_inst2 consume_inst3 consume_inst4 consume_result))
(let ((_ Unit (emit prod_inst))
(_ Unit (emit middle_inst))
(_ Unit (emit consume_inst1))
(_ Unit (emit consume_inst2))
(_ Unit (emit consume_inst3))
(_ Unit (emit consume_inst4)))
(MultiReg.Two (value_regs_get consume_result 0) (value_regs_get consume_result 1))))
;; ProducesFlags.ReturnsReg + ConsumesAndProducesFlags.SideEffect with all possible ConsumeFlags options
(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsReg prod_inst prod_result)
(ConsumesAndProducesFlags.SideEffect middle_inst)
(ConsumesFlags.ConsumesFlagsSideEffect consume_inst))
(let ((_ Unit (emit prod_inst))
(_ Unit (emit middle_inst))
(_ Unit (emit consume_inst)))
(MultiReg.One prod_result)))
(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsReg prod_inst prod_result)
(ConsumesAndProducesFlags.SideEffect middle_inst)
(ConsumesFlags.ConsumesFlagsSideEffect2 consume_inst1 consume_inst2))
(let ((_ Unit (emit prod_inst))
(_ Unit (emit middle_inst))
(_ Unit (emit consume_inst1))
(_ Unit (emit consume_inst2)))
(MultiReg.One prod_result)))
(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsReg prod_inst prod_result)
(ConsumesAndProducesFlags.SideEffect middle_inst)
(ConsumesFlags.ConsumesFlagsReturnsReg consume_inst consume_result))
(let ((_ Unit (emit prod_inst))
(_ Unit (emit middle_inst))
(_ Unit (emit consume_inst)))
(MultiReg.Two prod_result consume_result)))
(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsReg prod_inst prod_result)
(ConsumesAndProducesFlags.SideEffect middle_inst)
(ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs consume_inst1 consume_inst2 consume_result))
(let ((_ Unit (emit prod_inst))
(_ Unit (emit middle_inst))
(_ Unit (emit consume_inst1))
(_ Unit (emit consume_inst2)))
(MultiReg.Three prod_result (value_regs_get consume_result 0) (value_regs_get consume_result 1))))
(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsReg prod_inst prod_result)
(ConsumesAndProducesFlags.SideEffect middle_inst)
(ConsumesFlags.ConsumesFlagsFourTimesReturnsValueRegs consume_inst1 consume_inst2 consume_inst3 consume_inst4 consume_result))
(let ((_ Unit (emit prod_inst))
(_ Unit (emit middle_inst))
(_ Unit (emit consume_inst1))
(_ Unit (emit consume_inst2))
(_ Unit (emit consume_inst3))
(_ Unit (emit consume_inst4)))
(MultiReg.Three prod_result (value_regs_get consume_result 0) (value_regs_get consume_result 1))))
;; ProducesFlags.SideEffect + ConsumesAndProducesFlags.ReturnsReg with all possible ConsumeFlags options
(rule (with_flags_chained (ProducesFlags.ProducesFlagsSideEffect prod_inst)
(ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result)
(ConsumesFlags.ConsumesFlagsSideEffect consume_inst))
(let ((_ Unit (emit prod_inst))
(_ Unit (emit middle_inst))
(_ Unit (emit consume_inst)))
(MultiReg.One middle_result)))
(rule (with_flags_chained (ProducesFlags.ProducesFlagsSideEffect prod_inst)
(ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result)
(ConsumesFlags.ConsumesFlagsSideEffect2 consume_inst1 consume_inst2))
(let ((_ Unit (emit prod_inst))
(_ Unit (emit middle_inst))
(_ Unit (emit consume_inst1))
(_ Unit (emit consume_inst2)))
(MultiReg.One middle_result)))
(rule (with_flags_chained (ProducesFlags.ProducesFlagsSideEffect prod_inst)
(ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result)
(ConsumesFlags.ConsumesFlagsReturnsReg consume_inst consume_result))
(let ((_ Unit (emit prod_inst))
(_ Unit (emit middle_inst))
(_ Unit (emit consume_inst)))
(MultiReg.Two middle_result consume_result)))
(rule (with_flags_chained (ProducesFlags.ProducesFlagsSideEffect prod_inst)
(ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result)
(ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs consume_inst1 consume_inst2 consume_result))
(let ((_ Unit (emit prod_inst))
(_ Unit (emit middle_inst))
(_ Unit (emit consume_inst1))
(_ Unit (emit consume_inst2)))
(MultiReg.Three middle_result (value_regs_get consume_result 0) (value_regs_get consume_result 1))))
(rule (with_flags_chained (ProducesFlags.ProducesFlagsSideEffect prod_inst)
(ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result)
(ConsumesFlags.ConsumesFlagsFourTimesReturnsValueRegs consume_inst1 consume_inst2 consume_inst3 consume_inst4 consume_result))
(let ((_ Unit (emit prod_inst))
(_ Unit (emit middle_inst))
(_ Unit (emit consume_inst1))
(_ Unit (emit consume_inst2))
(_ Unit (emit consume_inst3))
(_ Unit (emit consume_inst4)))
(MultiReg.Three middle_result (value_regs_get consume_result 0) (value_regs_get consume_result 1))))
;; ProducesFlags.ReturnsReg + ConsumesAndProducesFlags.ReturnsReg with all possible ConsumeFlags options
(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsReg prod_inst prod_result)
(ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result)
(ConsumesFlags.ConsumesFlagsSideEffect consume_inst))
(let ((_ Unit (emit prod_inst))
(_ Unit (emit middle_inst))
(_ Unit (emit consume_inst)))
(MultiReg.Two prod_result middle_result)))
(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsReg prod_inst prod_result)
(ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result)
(ConsumesFlags.ConsumesFlagsSideEffect2 consume_inst1 consume_inst2))
(let ((_ Unit (emit prod_inst))
(_ Unit (emit middle_inst))
(_ Unit (emit consume_inst1))
(_ Unit (emit consume_inst2)))
(MultiReg.Two prod_result middle_result)))
(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsReg prod_inst prod_result)
(ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result)
(ConsumesFlags.ConsumesFlagsReturnsReg consume_inst consume_result))
(let ((_ Unit (emit prod_inst))
(_ Unit (emit middle_inst))
(_ Unit (emit consume_inst)))
(MultiReg.Three prod_result middle_result consume_result)))
(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsReg prod_inst prod_result)
(ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result)
(ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs consume_inst1 consume_inst2 consume_result))
(let ((_ Unit (emit prod_inst))
(_ Unit (emit middle_inst))
(_ Unit (emit consume_inst1))
(_ Unit (emit consume_inst2)))
(MultiReg.Four prod_result middle_result (value_regs_get consume_result 0) (value_regs_get consume_result 1))))
(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsReg prod_inst prod_result)
(ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result)
(ConsumesFlags.ConsumesFlagsFourTimesReturnsValueRegs consume_inst1 consume_inst2 consume_inst3 consume_inst4 consume_result))
(let ((_ Unit (emit prod_inst))
(_ Unit (emit middle_inst))
(_ Unit (emit consume_inst1))
(_ Unit (emit consume_inst2))
(_ Unit (emit consume_inst3))
(_ Unit (emit consume_inst4)))
(MultiReg.Four prod_result middle_result (value_regs_get consume_result 0) (value_regs_get consume_result 1))))
;; ProducesFlags.ReturnsResultWithConsumer + ConsumesAndProducesFlags.ReturnsReg with all possible ConsumeFlags options
(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsResultWithConsumer prod_inst prod_result)
(ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result)
(ConsumesFlags.ConsumesFlagsSideEffect consume_inst))
(let ((_ Unit (emit prod_inst))
(_ Unit (emit middle_inst))
(_ Unit (emit consume_inst)))
(MultiReg.Two prod_result middle_result)))
(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsResultWithConsumer prod_inst prod_result)
(ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result)
(ConsumesFlags.ConsumesFlagsSideEffect2 consume_inst1 consume_inst2))
(let ((_ Unit (emit prod_inst))
(_ Unit (emit middle_inst))
(_ Unit (emit consume_inst1))
(_ Unit (emit consume_inst2)))
(MultiReg.Two prod_result middle_result)))
(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsResultWithConsumer prod_inst prod_result)
(ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result)
(ConsumesFlags.ConsumesFlagsReturnsReg consume_inst consume_result))
(let ((_ Unit (emit prod_inst))
(_ Unit (emit middle_inst))
(_ Unit (emit consume_inst)))
(MultiReg.Three prod_result middle_result consume_result)))
(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsResultWithConsumer prod_inst prod_result)
(ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result)
(ConsumesFlags.ConsumesFlagsReturnsResultWithProducer consume_inst consume_result))
(let ((_ Unit (emit prod_inst))
(_ Unit (emit middle_inst))
(_ Unit (emit consume_inst)))
(MultiReg.Three prod_result middle_result consume_result)))
(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsResultWithConsumer prod_inst prod_result)
(ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result)
(ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs consume_inst1 consume_inst2 consume_result))
(let ((_ Unit (emit prod_inst))
(_ Unit (emit middle_inst))
(_ Unit (emit consume_inst1))
(_ Unit (emit consume_inst2)))
(MultiReg.Four prod_result middle_result (value_regs_get consume_result 0) (value_regs_get consume_result 1))))
(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsResultWithConsumer prod_inst prod_result)
(ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result)
(ConsumesFlags.ConsumesFlagsFourTimesReturnsValueRegs consume_inst1 consume_inst2 consume_inst3 consume_inst4 consume_result))
(let ((_ Unit (emit prod_inst))
(_ Unit (emit middle_inst))
(_ Unit (emit consume_inst1))
(_ Unit (emit consume_inst2))
(_ Unit (emit consume_inst3))
(_ Unit (emit consume_inst4)))
(MultiReg.Four prod_result middle_result (value_regs_get consume_result 0) (value_regs_get consume_result 1))))
;;;; Helpers for accessing compilation flags ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; This definition should be kept up to date with the values defined in

View File

@@ -0,0 +1,85 @@
test interpret
test run
set enable_llvm_abi_extensions=true
target x86_64
target aarch64
function %saddof_i128(i128, i128) -> i128, i8 {
block0(v0: i128,v1: i128):
v2, v3 = sadd_overflow v0, v1
return v2, v3
}
; run: %saddof_i128(0, 0) == [0, 0]
; run: %saddof_i128(1, 0) == [1, 0]
; run: %saddof_i128(1, 1) == [2, 0]
; run: %saddof_i128(1, -1) == [0, 0]
; run: %saddof_i128(0xFFFFFFFF_FFFFFFFF_00000000_00000000, 0x00000000_00000000_FFFFFFFF_FFFFFFFF) == [-1, 0]
; run: %saddof_i128(0x00000000_00000000_FFFFFFFF_FFFFFFFF, 1) == [0x1_00000000_00000000, 0]
; run: %saddof_i128(-1, 1) == [0, 0]
; run: %saddof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == [-2, 1]
; run: %saddof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == [-1, 0]
; run: %saddof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000001) == [0, 0]
; run: %saddof_i128(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == [0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 1]
; run: %saddof_i128(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000001) == [0x80000000_00000000_00000000_00000000, 0]
; run: %saddof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 1) == [0x80000000_00000000_00000000_00000000, 1]
; run: %saddof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFE, 1) == [0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0]
; run: %saddof_i128(0x01234567_89ABCDEF_01234567_89ABCDEF, 0xFEDCBA98_76543210_FEDCBA98_76543210) == [-1, 0]
; run: %saddof_i128(0x06060606_06060606_A00A00A0_0A00A00A, 0x30303030_30303030_0BB0BB0B_B0BB0BB0) == [0x36363636_36363636_ABBABBAB_BABBABBA, 0]
; run: %saddof_i128(0xC0FFEEEE_C0FFEEEE_C0FFEEEE_C0FFEEEE, 0x1DCB1111_1DCB1111_1DCB1111_1DCB1111) == [0xDECAFFFF_DECAFFFF_DECAFFFF_DECAFFFF, 0]
function %saddof_i64(i64, i64) -> i64, i8 {
block0(v0: i64,v1: i64):
v2, v3 = sadd_overflow v0, v1
return v2, v3
}
; run: %saddof_i64(0, 0) == [0, 0]
; run: %saddof_i64(0, 1) == [1, 0]
; run: %saddof_i64(-1, 0) == [-1, 0]
; run: %saddof_i64(-1, 1) == [0, 0]
; run: %saddof_i64(0x7FFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF) == [-2, 1]
; run: %saddof_i64(0x7FFFFFFF_FFFFFFFF, 0x80000000_00000000) == [-1, 0]
; run: %saddof_i64(0x7FFFFFFF_FFFFFFFF, 0x80000000_00000001) == [0, 0]
; run: %saddof_i64(0x01234567_89ABCDEF, 0xFEDCBA98_76543210) == [-1, 0]
; run: %saddof_i64(0x01234567_89ABCDEF, 0xFEDCBA98_76543220) == [0xF, 0]
; run: %saddof_i64(0xA00A00A0_0A00A00A, 0x0BB0BB0B_B0BB0BB0) == [0xABBABBAB_BABBABBA, 0]
; run: %saddof_i64(0xC0FFEEEE_C0FFEEEE, 0x1DCB1111_1DCB1111) == [0xDECAFFFF_DECAFFFF, 0]
function %saddof_i8(i8, i8) -> i8, i8 {
block0(v0: i8, v1: i8):
v2, v3 = sadd_overflow v0, v1
return v2, v3
}
; run: %saddof_i8(0, 1) == [1, 0]
; run: %saddof_i8(100, 27) == [127, 0]
; run: %saddof_i8(100, -20) == [80, 0]
; run: %saddof_i8(100, 28) == [-128, 1]
; run: %saddof_i8(-128, -128) == [0, 1]
; run: %saddof_i8(-128, -1) == [0x7F, 1]
; run: %saddof_i8(-127, -1) == [-128, 0]
; run: %saddof_i8(127, 1) == [0x80, 1]
function %saddof_i16(i16, i16) -> i16, i8 {
block0(v0: i16, v1: i16):
v2, v3 = sadd_overflow v0, v1
return v2, v3
}
; run: %saddof_i16(0, 1) == [1, 0]
; run: %saddof_i16(100, 27) == [127, 0]
; run: %saddof_i16(100, 28) == [128, 0]
; run: %saddof_i16(32000, 767) == [32767, 0]
; run: %saddof_i16(32000, 768) == [-32768, 1]
; run: %saddof_i16(-32767, -1) == [-32768, 0]
; run: %saddof_i16(-32768, -1) == [32767, 1]
function %saddof_i32(i32, i32) -> i32, i8 {
block0(v0: i32, v1: i32):
v2, v3 = sadd_overflow v0, v1
return v2, v3
}
; run: %saddof_i32(0, 1) == [1, 0]
; run: %saddof_i32(100, 27) == [127, 0]
; run: %saddof_i32(100, 28) == [128, 0]
; run: %saddof_i32(0x7FFF_FFFE, 1) == [0x7FFF_FFFF, 0]
; run: %saddof_i32(0x7FFF_FFFF, 1) == [0x8000_0000, 1]
; run: %saddof_i32(0x8000_0000, 0xFFFF_FFFF) == [0x7FFF_FFFF, 1]
; run: %saddof_i32(0x8000_0001, 0xFFFF_FFFF) == [0x8000_0000, 0]

View File

@@ -0,0 +1,76 @@
test interpret
test run
target x86_64
target aarch64
function %smulof_i64(i64, i64) -> i64, i8 {
block0(v0: i64, v1: i64):
v2, v3 = smul_overflow v0, v1
return v2, v3
}
; run: %smulof_i64(0, 1) == [0, 0]
; run: %smulof_i64(1, 1) == [1, 0]
; run: %smulof_i64(0xFFFFFFFF_FFFFFFFF, 2) == [0xFFFFFFFF_FFFFFFFE, 0]
; run: %smulof_i64(0x7FFFFFFF_FFFFFFFF, 2) == [0xFFFFFFFF_FFFFFFFE, 1]
; run: %smulof_i64(1, -1) == [-1, 0]
; run: %smulof_i64(2, 2) == [4, 0]
; run: %smulof_i64(2, -2) == [-4, 0]
; run: %smulof_i64(0x7FFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF) == [1, 1]
; run: %smulof_i64(0x80000000_00000000, 0x7FFFFFFF_FFFFFFFF) == [0x80000000_00000000, 1]
; run: %smulof_i64(0x01234567_89ABCDEF, 0xFEDCBA98_76543210) == [0x2236D88F_E5618CF0, 1]
; run: %smulof_i64(0xC0FFEEEE_C0FFEEEE, 0xDECAFFFF_DECAFFFF) == [0xDB6B1E48_19BA1112, 1]
function %smulof_i32(i32, i32) -> i32, i8 {
block0(v0: i32, v1: i32):
v2, v3 = smul_overflow v0, v1
return v2, v3
}
; run: %smulof_i32(0, 1) == [0, 0]
; run: %smulof_i32(1, 1) == [1, 0]
; run: %smulof_i32(0xFFFFFFFF, 2) == [0xFFFFFFFE, 0]
; run: %smulof_i32(0x7FFFFFFF, 2) == [0xFFFFFFFE, 1]
; run: %smulof_i32(1, -1) == [-1, 0]
; run: %smulof_i32(2, 2) == [4, 0]
; run: %smulof_i32(2, -2) == [-4, 0]
; run: %smulof_i32(0x7FFFFFFF, 0x7FFFFFFF) == [1, 1]
; run: %smulof_i32(0x80000000, 0x7FFFFFFF) == [0x80000000, 1]
; run: %smulof_i32(0x01234567, 0xFEDCBA98) == [0x23E20B28, 1]
; run: %smulof_i32(0xC0FFEEEE, 0xDECAFFFF) == [0x19BA1112, 1]
function %smulof_i16(i16, i16) -> i16, i8 {
block0(v0: i16, v1: i16):
v2, v3 = smul_overflow v0, v1
return v2, v3
}
; run: %smulof_i16(0, 1) == [0, 0]
; run: %smulof_i16(1, 1) == [1, 0]
; run: %smulof_i16(0xFFFF, 2) == [0xFFFE, 0]
; run: %smulof_i16(0x7FFF, 2) == [0xFFFE, 1]
; run: %smulof_i16(1, -1) == [-1, 0]
; run: %smulof_i16(2, 2) == [4, 0]
; run: %smulof_i16(2, -2) == [-4, 0]
; run: %smulof_i16(0x7FFF, 0x7FFF) == [1, 1]
; run: %smulof_i16(0x8000, 0x7FFF) == [0x8000, 1]
; run: %smulof_i16(0x0123, 0xFEDC) == [0xB414, 1]
; run: %smulof_i16(0xC0FF, 0xDECA) == [0x6B36, 1]
function %smulof_i8(i8, i8) -> i8, i8 {
block0(v0: i8, v1: i8):
v2, v3 = smul_overflow v0, v1
return v2, v3
}
; run: %smulof_i8(0, 1) == [0, 0]
; run: %smulof_i8(1, 1) == [1, 0]
; run: %smulof_i8(0xFF, 2) == [0xFE, 0]
; run: %smulof_i8(0x7F, 2) == [0xFE, 1]
; run: %smulof_i8(1, -1) == [-1, 0]
; run: %smulof_i8(2, 2) == [4, 0]
; run: %smulof_i8(2, -2) == [-4, 0]
; run: %smulof_i8(0x7F, 0x7F) == [1, 1]
; run: %smulof_i8(0x80, 0x7F) == [0x80, 1]
; run: %smulof_i8(0x01, 0xFE) == [0xFE, 0]
; run: %smulof_i8(0xC0, 0xDE) == [0x80, 1]

View File

@@ -0,0 +1,91 @@
test interpret
test run
set enable_llvm_abi_extensions=true
target x86_64
target aarch64
function %ssubof_i128(i128, i128) -> i128, i8 {
block0(v0: i128,v1: i128):
v2, v3 = ssub_overflow v0, v1
return v2, v3
}
; run: %ssubof_i128(0, 0) == [0, 0]
; run: %ssubof_i128(0, 1) == [-1, 0]
; run: %ssubof_i128(-1, 0) == [-1, 0]
; run: %ssubof_i128(-1, 1) == [-2, 0]
; run: %ssubof_i128(-1, -2) == [1, 0]
; run: %ssubof_i128(0x00000000_00000001_00000000_00000000, 1) == [0xFFFFFFFF_FFFFFFFF, 0]
; run: %ssubof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == [0, 0]
; run: %ssubof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == [-1, 1]
; run: %ssubof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000001) == [-2, 1]
; run: %ssubof_i128(0, 0x80000000_00000000_00000000_00000000) == [0x80000000_00000000_00000000_00000000, 1]
; run: %ssubof_i128(0x80000000_00000000_00000000_00000000, 1) == [0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 1]
; run: %ssubof_i128(0x80000000_00000000_00000000_00000001, 1) == [0x80000000_00000000_00000000_00000000, 0]
function %ssubof_i64(i64, i64) -> i64, i8 {
block0(v0: i64,v1: i64):
v2, v3 = ssub_overflow v0, v1
return v2, v3
}
; run: %ssubof_i64(0, 0) == [0, 0]
; run: %ssubof_i64(0, 1) == [-1, 0]
; run: %ssubof_i64(-1, 0) == [-1, 0]
; run: %ssubof_i64(-1, 1) == [-2, 0]
; run: %ssubof_i64(-1, -2) == [1, 0]
; run: %ssubof_i64(0x7FFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF) == [0, 0]
; run: %ssubof_i64(0x7FFFFFFF_FFFFFFFF, 0x80000000_00000000) == [-1, 1]
; run: %ssubof_i64(0x7FFFFFFF_FFFFFFFF, 0x80000000_00000001) == [-2, 1]
; run: %ssubof_i64(0, 0x80000000_00000000) == [0x80000000_00000000, 1]
; run: %ssubof_i64(0x80000000_00000000, 1) == [0x7FFFFFFF_FFFFFFFF, 1]
; run: %ssubof_i64(0x80000000_00000001, 1) == [0x80000000_00000000, 0]
; run: %ssubof_i64(0x01234567_89ABCDEF, 0xFEDCBA98_76543210) == [0x0246_8ACF_1357_9BDF, 0]
; run: %ssubof_i64(0xFEDCBA98_76543220, 0x01234567_89ABCDEF) == [0xFDB9_7530_ECA8_6431, 0]
function %ssubof_i8(i8, i8) -> i8, i8 {
block0(v0: i8, v1: i8):
v2, v3 = ssub_overflow v0, v1
return v2, v3
}
; run: %ssubof_i8(0, 1) == [-1, 0]
; run: %ssubof_i8(100, 20) == [80, 0]
; run: %ssubof_i8(100, -20) == [120, 0]
; run: %ssubof_i8(0x80, 0x80) == [0, 0]
; run: %ssubof_i8(0x7F, 0x80) == [0xFF, 1]
; run: %ssubof_i8(0, 0x80) == [0x80, 1]
; run: %ssubof_i8(0x80, 0x80) == [0, 0]
; run: %ssubof_i8(0x80, 0x01) == [0x7F, 1]
; run: %ssubof_i8(0x7F, 0xFF) == [0x80, 1]
; run: %ssubof_i8(0x7E, 0xFF) == [0x7F, 0]
; run: %ssubof_i8(0x80, 1) == [0x7F, 1]
function %ssubof_i16(i16, i16) -> i16, i8 {
block0(v0: i16, v1: i16):
v2, v3 = ssub_overflow v0, v1
return v2, v3
}
; run: %ssubof_i16(0, 1) == [-1, 0]
; run: %ssubof_i16(100, 20) == [80, 0]
; run: %ssubof_i16(0xFFFF, 0xFFFF) == [0, 0]
; run: %ssubof_i16(0xFFFE, 0xFFFF) == [-1, 0]
; run: %ssubof_i16(0xFFFE, 0xFE) == [0xFF00, 0]
; run: %ssubof_i16(0, 0x8000) == [0x8000, 1]
; run: %ssubof_i16(0x8000, 0x0001) == [0x7FFF, 1]
; run: %ssubof_i16(0x8000, 0xFFFF) == [0x8001, 0]
; run: %ssubof_i16(0x7FFF, 0xFFFF) == [0x8000, 1]
; run: %ssubof_i16(0x7FFE, 0xFFFF) == [0x7FFF, 0]
function %ssubof_i32(i32, i32) -> i32, i8 {
block0(v0: i32, v1: i32):
v2, v3 = ssub_overflow v0, v1
return v2, v3
}
; run: %ssubof_i32(0, 1) == [-1, 0]
; run: %ssubof_i32(100, 20) == [80, 0]
; run: %ssubof_i32(0xFFFF_FFFF, 0xFFFF_FFFF) == [0, 0]
; run: %ssubof_i32(0, 0x8000_0000) == [0x8000_0000, 1]
; run: %ssubof_i32(0x8000_0000, 0x0000_0001) == [0x7FFF_FFFF, 1]
; run: %ssubof_i32(0x8000_0000, 0xFFFF_FFFF) == [0x8000_0001, 0]
; run: %ssubof_i32(0xFFFF_FFFE, 0xFFFF_FFFF) == [-1, 0]
; run: %ssubof_i32(0xFFFF_FFFE, 0xFE) == [0xFFFF_FF00, 0]
; run: %ssubof_i32(0x7FFF_FFFF, 0xFFFF_FFFF) == [0x8000_0000, 1]
; run: %ssubof_i32(0x7FFF_FFFE, 0xFFFF_FFFF) == [0x7FFF_FFFF, 0]

View File

@@ -0,0 +1,77 @@
test interpret
test run
set enable_llvm_abi_extensions=true
target x86_64
target aarch64
function %uaddof_i128(i128, i128) -> i128, i8 {
block0(v0: i128,v1: i128):
v2, v3 = uadd_overflow v0, v1
return v2, v3
}
; run: %uaddof_i128(0, 0) == [0, 0]
; run: %uaddof_i128(1, 0) == [1, 0]
; run: %uaddof_i128(1, 1) == [2, 0]
; run: %uaddof_i128(1, -1) == [0, 1]
; run: %uaddof_i128(0xFFFFFFFF_FFFFFFFF_00000000_00000000, 0x00000000_00000000_FFFFFFFF_FFFFFFFF) == [-1, 0]
; run: %uaddof_i128(0x00000000_00000000_FFFFFFFF_FFFFFFFF, 1) == [0x1_00000000_00000000, 0]
; run: %uaddof_i128(-1, 1) == [0, 1]
; run: %uaddof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == [-2, 0]
; run: %uaddof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == [-1, 0]
; run: %uaddof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000001) == [0, 1]
; run: %uaddof_i128(0x01234567_89ABCDEF_01234567_89ABCDEF, 0xFEDCBA98_76543210_FEDCBA98_76543210) == [-1, 0]
; run: %uaddof_i128(0x06060606_06060606_A00A00A0_0A00A00A, 0x30303030_30303030_0BB0BB0B_B0BB0BB0) == [0x36363636_36363636_ABBABBAB_BABBABBA, 0]
; run: %uaddof_i128(0xC0FFEEEE_C0FFEEEE_C0FFEEEE_C0FFEEEE, 0x1DCB1111_1DCB1111_1DCB1111_1DCB1111) == [0xDECAFFFF_DECAFFFF_DECAFFFF_DECAFFFF, 0]
function %uaddof_i64(i64, i64) -> i64, i8 {
block0(v0: i64,v1: i64):
v2, v3 = uadd_overflow v0, v1
return v2, v3
}
; run: %uaddof_i64(0, 0) == [0, 0]
; run: %uaddof_i64(0, 1) == [1, 0]
; run: %uaddof_i64(-1, 0) == [-1, 0]
; run: %uaddof_i64(-1, 1) == [0, 1]
; run: %uaddof_i64(0x7FFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF) == [-2, 0]
; run: %uaddof_i64(0x7FFFFFFF_FFFFFFFF, 0x80000000_00000000) == [-1, 0]
; run: %uaddof_i64(0x7FFFFFFF_FFFFFFFF, 0x80000000_00000001) == [0, 1]
; run: %uaddof_i64(0x01234567_89ABCDEF, 0xFEDCBA98_76543210) == [-1, 0]
; run: %uaddof_i64(0x01234567_89ABCDEF, 0xFEDCBA98_76543220) == [0xF, 1]
; run: %uaddof_i64(0xA00A00A0_0A00A00A, 0x0BB0BB0B_B0BB0BB0) == [0xABBABBAB_BABBABBA, 0]
; run: %uaddof_i64(0xC0FFEEEE_C0FFEEEE, 0x1DCB1111_1DCB1111) == [0xDECAFFFF_DECAFFFF, 0]
function %uaddof_i8(i8, i8) -> i8, i8 {
block0(v0: i8, v1: i8):
v2, v3 = uadd_overflow v0, v1
return v2, v3
}
; run: %uaddof_i8(0, 1) == [1, 0]
; run: %uaddof_i8(100, 27) == [127, 0]
; run: %uaddof_i8(100, -20) == [80, 1]
; run: %uaddof_i8(100, 28) == [-128, 0]
; run: %uaddof_i8(-128, -128) == [0, 1]
; run: %uaddof_i8(127, 1) == [0x80, 0]
function %uaddof_i16(i16, i16) -> i16, i8 {
block0(v0: i16, v1: i16):
v2, v3 = uadd_overflow v0, v1
return v2, v3
}
; run: %uaddof_i16(0, 1) == [1, 0]
; run: %uaddof_i16(100, 27) == [127, 0]
; run: %uaddof_i16(100, 28) == [128, 0]
; run: %uaddof_i16(32000, 767) == [32767, 0]
; run: %uaddof_i16(32000, 768) == [-32768, 0]
; run: %uaddof_i16(65000, 535) == [65535, 0]
; run: %uaddof_i16(65000, 536) == [0, 1]
function %uaddof_i32(i32, i32) -> i32, i8 {
block0(v0: i32, v1: i32):
v2, v3 = uadd_overflow v0, v1
return v2, v3
}
; run: %uaddof_i32(0, 1) == [1, 0]
; run: %uaddof_i32(100, 27) == [127, 0]
; run: %uaddof_i32(100, 28) == [128, 0]
; run: %uaddof_i32(3000000000, 1294967295) == [-1, 0]
; run: %uaddof_i32(3000000000, 1294967296) == [0, 1]

View File

@@ -0,0 +1,68 @@
test interpret
test run
target x86_64
target aarch64
function %umulof_i64(i64, i64) -> i64, i8 {
block0(v0: i64, v1: i64):
v2, v3 = umul_overflow v0, v1
return v2, v3
}
; run: %umulof_i64(0, 1) == [0, 0]
; run: %umulof_i64(1, 1) == [1, 0]
; run: %umulof_i64(0xFFFFFFFF_FFFFFFFF, 2) == [0xFFFFFFFF_FFFFFFFE, 1]
; run: %umulof_i64(1, -1) == [-1, 0]
; run: %umulof_i64(2, 2) == [4, 0]
; run: %umulof_i64(0x7FFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF) == [1, 1]
; run: %umulof_i64(0x80000000_00000000, 0x7FFFFFFF_FFFFFFFF) == [0x80000000_00000000, 1]
; run: %umulof_i64(0x01234567_89ABCDEF, 0xFEDCBA98_76543210) == [0x2236D88F_E5618CF0, 1]
; run: %umulof_i64(0xC0FFEEEE_C0FFEEEE, 0xDECAFFFF_DECAFFFF) == [0xDB6B1E48_19BA1112, 1]
function %umulof_i32(i32, i32) -> i32, i8 {
block0(v0: i32, v1: i32):
v2, v3 = umul_overflow v0, v1
return v2, v3
}
; run: %umulof_i32(0, 1) == [0, 0]
; run: %umulof_i32(1, 1) == [1, 0]
; run: %umulof_i32(0xFFFFFFFF, 2) == [0xFFFFFFFE, 1]
; run: %umulof_i32(1, -1) == [-1, 0]
; run: %umulof_i32(2, 2) == [4, 0]
; run: %umulof_i32(0x7FFFFFFF, 0x7FFFFFFF) == [1, 1]
; run: %umulof_i32(0x80000000, 0x7FFFFFFF) == [0x80000000, 1]
; run: %umulof_i32(0x01234567, 0xFEDCBA98) == [0x23E20B28, 1]
; run: %umulof_i32(0xC0FFEEEE, 0xDECAFFFF) == [0x19BA1112, 1]
function %umulof_i16(i16, i16) -> i16, i8 {
block0(v0: i16, v1: i16):
v2, v3 = umul_overflow v0, v1
return v2, v3
}
; run: %umulof_i16(0, 1) == [0, 0]
; run: %umulof_i16(1, 1) == [1, 0]
; run: %umulof_i16(0xFFFF, 2) == [0xFFFE, 1]
; run: %umulof_i16(1, -1) == [-1, 0]
; run: %umulof_i16(2, 2) == [4, 0]
; run: %umulof_i16(0x7FFF, 0x7FFF) == [1, 1]
; run: %umulof_i16(0x8000, 0x7FFF) == [0x8000, 1]
; run: %umulof_i16(0x0123, 0xFEDC) == [0xB414, 1]
; run: %umulof_i16(0xC0FF, 0xDECA) == [0x6B36, 1]
function %umulof_i8(i8, i8) -> i8, i8 {
block0(v0: i8, v1: i8):
v2, v3 = umul_overflow v0, v1
return v2, v3
}
; run: %umulof_i8(0, 1) == [0, 0]
; run: %umulof_i8(1, 1) == [1, 0]
; run: %umulof_i8(0xFF, 2) == [0xFE, 1]
; run: %umulof_i8(1, -1) == [-1, 0]
; run: %umulof_i8(2, 2) == [4, 0]
; run: %umulof_i8(0x7F, 0x7F) == [1, 1]
; run: %umulof_i8(0x80, 0x7F) == [0x80, 1]
; run: %umulof_i8(0x01, 0xFE) == [0xFE, 0]
; run: %umulof_i8(0xC0, 0xDE) == [0x80, 1]

View File

@@ -0,0 +1,74 @@
test interpret
test run
set enable_llvm_abi_extensions=true
target x86_64
target aarch64
function %usubof_i128(i128, i128) -> i128, i8 {
block0(v0: i128,v1: i128):
v2, v3 = usub_overflow v0, v1
return v2, v3
}
; run: %usubof_i128(0, 0) == [0, 0]
; run: %usubof_i128(0, 1) == [-1, 1]
; run: %usubof_i128(-1, 0) == [-1, 0]
; run: %usubof_i128(-1, 1) == [-2, 0]
; run: %usubof_i128(-1, -2) == [1, 0]
; run: %usubof_i128(0x00000000_00000001_00000000_00000000, 1) == [0xFFFFFFFF_FFFFFFFF, 0]
; run: %usubof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == [0, 0]
; run: %usubof_i128(0x80000000_00000000_00000000_00000000, 1) == [0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0]
; run: %usubof_i128(0x80000000_00000000_00000000_00000001, 1) == [0x80000000_00000000_00000000_00000000, 0]
; run: %usubof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == [-1, 1]
; run: %usubof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000001) == [-2, 1]
function %usubof_i64(i64, i64) -> i64, i8 {
block0(v0: i64,v1: i64):
v2, v3 = usub_overflow v0, v1
return v2, v3
}
; run: %usubof_i64(0, 0) == [0, 0]
; run: %usubof_i64(0, 1) == [-1, 1]
; run: %usubof_i64(-1, 0) == [-1, 0]
; run: %usubof_i64(-1, 1) == [-2, 0]
; run: %usubof_i64(-1, -2) == [1, 0]
; run: %usubof_i64(0x7FFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF) == [0, 0]
; run: %usubof_i64(0x7FFFFFFF_FFFFFFFF, 0x80000000_00000000) == [-1, 1]
; run: %usubof_i64(0x7FFFFFFF_FFFFFFFF, 0x80000000_00000001) == [-2, 1]
; run: %usubof_i64(0x01234567_89ABCDEF, 0xFEDCBA98_76543210) == [0x0246_8ACF_1357_9BDF, 1]
; run: %usubof_i64(0xFEDCBA98_76543220, 0x01234567_89ABCDEF) == [0xFDB9_7530_ECA8_6431, 0]
function %usubof_i8(i8, i8) -> i8, i8 {
block0(v0: i8, v1: i8):
v2, v3 = usub_overflow v0, v1
return v2, v3
}
; run: %usubof_i8(0, 1) == [-1, 1]
; run: %usubof_i8(100, 20) == [80, 0]
; run: %usubof_i8(100, -20) == [120, 1]
; run: %usubof_i8(127, -128) == [-1, 1]
; run: %usubof_i8(0x80, 0x80) == [0, 0]
; run: %usubof_i8(0xFF, 0xFF) == [0, 0]
; run: %usubof_i8(0xFE, 0xFF) == [0xFF, 1]
; run: %usubof_i8(0x80, 1) == [0x7F, 0]
function %usubof_i16(i16, i16) -> i16, i8 {
block0(v0: i16, v1: i16):
v2, v3 = usub_overflow v0, v1
return v2, v3
}
; run: %usubof_i16(0, 1) == [-1, 1]
; run: %usubof_i16(100, 20) == [80, 0]
; run: %usubof_i16(0xFFFF, 0xFFFF) == [0, 0]
; run: %usubof_i16(0xFFFE, 0xFFFF) == [-1, 1]
; run: %usubof_i16(0xFFFE, 0xFE) == [0xFF00, 0]
function %usubof_i32(i32, i32) -> i32, i8 {
block0(v0: i32, v1: i32):
v2, v3 = usub_overflow v0, v1
return v2, v3
}
; run: %usubof_i32(0, 1) == [-1, 1]
; run: %usubof_i32(100, 20) == [80, 0]
; run: %usubof_i32(0xFFFF_FFFF, 0xFFFF_FFFF) == [0, 0]
; run: %usubof_i32(0xFFFF_FFFE, 0xFFFF_FFFF) == [-1, 1]
; run: %usubof_i32(0xFFFF_FFFE, 0xFE) == [0xFFFF_FF00, 0]

View File

@@ -466,6 +466,7 @@ fn valid_for_target(triple: &Triple, op: Opcode, args: &[Type], rets: &[Type]) -
args,
rets,
(Opcode::IaddCout, &([I8, I8] | [I16, I16] | [I128, I128])),
(Opcode::UmulOverflow | Opcode::SmulOverflow, &[I128, I128]),
(Opcode::Imul, &[I8X16, I8X16]),
// https://github.com/bytecodealliance/wasmtime/issues/5468
(Opcode::Smulhi | Opcode::Umulhi, &[I8, I8]),
@@ -583,6 +584,7 @@ fn valid_for_target(triple: &Triple, op: Opcode, args: &[Type], rets: &[Type]) -
args,
rets,
(Opcode::IaddCout, &[I128, I128]),
(Opcode::UmulOverflow | Opcode::SmulOverflow, &[I128, I128]),
// https://github.com/bytecodealliance/wasmtime/issues/4864
(Opcode::Udiv | Opcode::Sdiv, &[I128, I128]),
// https://github.com/bytecodealliance/wasmtime/issues/5472
@@ -638,6 +640,9 @@ fn valid_for_target(triple: &Triple, op: Opcode, args: &[Type], rets: &[Type]) -
args,
rets,
(Opcode::IaddCout),
(Opcode::UaddOverflow | Opcode::SaddOverflow),
(Opcode::UsubOverflow | Opcode::SsubOverflow),
(Opcode::UmulOverflow | Opcode::SmulOverflow),
(
Opcode::Udiv | Opcode::Sdiv | Opcode::Urem | Opcode::Srem,
&[I128, I128]
@@ -682,6 +687,9 @@ fn valid_for_target(triple: &Triple, op: Opcode, args: &[Type], rets: &[Type]) -
rets,
// TODO
(Opcode::IaddCout),
(Opcode::UaddOverflow | Opcode::SaddOverflow),
(Opcode::UsubOverflow | Opcode::SsubOverflow),
(Opcode::UmulOverflow | Opcode::SmulOverflow),
// TODO
(
Opcode::Udiv | Opcode::Sdiv | Opcode::Urem | Opcode::Srem,

View File

@@ -752,6 +752,48 @@ where
Opcode::UremImm => binary_unsigned_can_trap(DataValueExt::rem, arg(0), imm_as_ctrl_ty()?)?,
Opcode::SremImm => binary_can_trap(DataValueExt::rem, arg(0), imm_as_ctrl_ty()?)?,
Opcode::IrsubImm => binary(DataValueExt::sub, imm_as_ctrl_ty()?, arg(0))?,
Opcode::UaddOverflow => {
let lhs = arg(0).convert(ValueConversionKind::ToUnsigned)?;
let rhs = arg(1).convert(ValueConversionKind::ToUnsigned)?;
let (mut sum, carry) = lhs.overflowing_add(rhs)?;
sum = sum.convert(ValueConversionKind::ToSigned)?;
assign_multiple(&[sum, DataValueExt::bool(carry, false, types::I8)?])
}
Opcode::SaddOverflow => {
let ty = arg(0).ty();
let lhs = arg(0).convert(ValueConversionKind::ToSigned)?;
let rhs = arg(1).convert(ValueConversionKind::ToSigned)?;
let (sum, carry) = lhs.overflowing_add(rhs)?;
assign_multiple(&[sum, DataValueExt::bool(carry, false, types::I8)?])
}
Opcode::UsubOverflow => {
let lhs = arg(0).convert(ValueConversionKind::ToUnsigned)?;
let rhs = arg(1).convert(ValueConversionKind::ToUnsigned)?;
let (mut sum, carry) = lhs.overflowing_sub(rhs)?;
sum = sum.convert(ValueConversionKind::ToSigned)?;
assign_multiple(&[sum, DataValueExt::bool(carry, false, types::I8)?])
}
Opcode::SsubOverflow => {
let ty = arg(0).ty();
let lhs = arg(0).convert(ValueConversionKind::ToSigned)?;
let rhs = arg(1).convert(ValueConversionKind::ToSigned)?;
let (sum, carry) = lhs.overflowing_sub(rhs)?;
assign_multiple(&[sum, DataValueExt::bool(carry, false, types::I8)?])
}
Opcode::UmulOverflow => {
let lhs = arg(0).convert(ValueConversionKind::ToUnsigned)?;
let rhs = arg(1).convert(ValueConversionKind::ToUnsigned)?;
let (mut sum, carry) = lhs.overflowing_mul(rhs)?;
sum = sum.convert(ValueConversionKind::ToSigned)?;
assign_multiple(&[sum, DataValueExt::bool(carry, false, types::I8)?])
}
Opcode::SmulOverflow => {
let ty = arg(0).ty();
let lhs = arg(0).convert(ValueConversionKind::ToSigned)?;
let rhs = arg(1).convert(ValueConversionKind::ToSigned)?;
let (sum, carry) = lhs.overflowing_mul(rhs)?;
assign_multiple(&[sum, DataValueExt::bool(carry, false, types::I8)?])
}
Opcode::IaddCin => choose(
DataValueExt::into_bool(arg(2))?,
DataValueExt::add(

View File

@@ -44,6 +44,9 @@ pub trait DataValueExt: Sized {
fn fma(self, a: Self, b: Self) -> ValueResult<Self>;
fn abs(self) -> ValueResult<Self>;
fn checked_add(self, other: Self) -> ValueResult<Option<Self>>;
fn overflowing_add(self, other: Self) -> ValueResult<(Self, bool)>;
fn overflowing_sub(self, other: Self) -> ValueResult<(Self, bool)>;
fn overflowing_mul(self, other: Self) -> ValueResult<(Self, bool)>;
// Float operations
fn neg(self) -> ValueResult<Self>;
@@ -181,6 +184,15 @@ macro_rules! binary_match {
_ => unimplemented!()
}
};
( pair $op:ident($arg1:expr, $arg2:expr); [ $( $data_value_ty:ident ),* ] ) => {
match ($arg1, $arg2) {
$( (DataValue::$data_value_ty(a), DataValue::$data_value_ty(b)) => {
let (f, s) = a.$op(*b);
Ok((DataValue::$data_value_ty(f), s))
} )*
_ => unimplemented!()
}
};
( $op:tt($arg1:expr, $arg2:expr); [ $( $data_value_ty:ident ),* ] ) => {
match ($arg1, $arg2) {
$( (DataValue::$data_value_ty(a), DataValue::$data_value_ty(b)) => { Ok(DataValue::$data_value_ty(a $op b)) } )*
@@ -439,6 +451,11 @@ impl DataValueExt for DataValue {
DataValue::I32(n) => DataValue::U32(n as u32),
DataValue::I64(n) => DataValue::U64(n as u64),
DataValue::I128(n) => DataValue::U128(n as u128),
DataValue::U8(_) => self,
DataValue::U16(_) => self,
DataValue::U32(_) => self,
DataValue::U64(_) => self,
DataValue::U128(_) => self,
_ => unimplemented!("conversion: {} -> {:?}", self.ty(), kind),
},
ValueConversionKind::ToSigned => match self {
@@ -447,6 +464,11 @@ impl DataValueExt for DataValue {
DataValue::U32(n) => DataValue::I32(n as i32),
DataValue::U64(n) => DataValue::I64(n as i64),
DataValue::U128(n) => DataValue::I128(n as i128),
DataValue::I8(_) => self,
DataValue::I16(_) => self,
DataValue::I32(_) => self,
DataValue::I64(_) => self,
DataValue::I128(_) => self,
_ => unimplemented!("conversion: {} -> {:?}", self.ty(), kind),
},
ValueConversionKind::RoundNearestEven(ty) => match (self, ty) {
@@ -615,6 +637,18 @@ impl DataValueExt for DataValue {
binary_match!(option checked_add(&self, &other); [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128])
}
fn overflowing_add(self, other: Self) -> ValueResult<(Self, bool)> {
binary_match!(pair overflowing_add(&self, &other); [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128])
}
fn overflowing_sub(self, other: Self) -> ValueResult<(Self, bool)> {
binary_match!(pair overflowing_sub(&self, &other); [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128])
}
fn overflowing_mul(self, other: Self) -> ValueResult<(Self, bool)> {
binary_match!(pair overflowing_mul(&self, &other); [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128])
}
fn neg(self) -> ValueResult<Self> {
unary_match!(neg(&self); [F32, F64])
}