Merge pull request #2928 from afonso360/aarch64-i128-ops
Implement iadd,isub,imul for i128 in AArch64
This commit is contained in:
@@ -597,8 +597,12 @@ impl MachInstEmit for Inst {
|
|||||||
let top11 = match alu_op {
|
let top11 = match alu_op {
|
||||||
ALUOp::Add32 => 0b00001011_000,
|
ALUOp::Add32 => 0b00001011_000,
|
||||||
ALUOp::Add64 => 0b10001011_000,
|
ALUOp::Add64 => 0b10001011_000,
|
||||||
|
ALUOp::Adc32 => 0b00011010_000,
|
||||||
|
ALUOp::Adc64 => 0b10011010_000,
|
||||||
ALUOp::Sub32 => 0b01001011_000,
|
ALUOp::Sub32 => 0b01001011_000,
|
||||||
ALUOp::Sub64 => 0b11001011_000,
|
ALUOp::Sub64 => 0b11001011_000,
|
||||||
|
ALUOp::Sbc32 => 0b01011010_000,
|
||||||
|
ALUOp::Sbc64 => 0b11011010_000,
|
||||||
ALUOp::Orr32 => 0b00101010_000,
|
ALUOp::Orr32 => 0b00101010_000,
|
||||||
ALUOp::Orr64 => 0b10101010_000,
|
ALUOp::Orr64 => 0b10101010_000,
|
||||||
ALUOp::And32 => 0b00001010_000,
|
ALUOp::And32 => 0b00001010_000,
|
||||||
|
|||||||
@@ -50,6 +50,26 @@ fn test_aarch64_binemit() {
|
|||||||
"A400068B",
|
"A400068B",
|
||||||
"add x4, x5, x6",
|
"add x4, x5, x6",
|
||||||
));
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::AluRRR {
|
||||||
|
alu_op: ALUOp::Adc32,
|
||||||
|
rd: writable_xreg(1),
|
||||||
|
rn: xreg(2),
|
||||||
|
rm: xreg(3),
|
||||||
|
},
|
||||||
|
"4100031A",
|
||||||
|
"adc w1, w2, w3",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::AluRRR {
|
||||||
|
alu_op: ALUOp::Adc64,
|
||||||
|
rd: writable_xreg(4),
|
||||||
|
rn: xreg(5),
|
||||||
|
rm: xreg(6),
|
||||||
|
},
|
||||||
|
"A400069A",
|
||||||
|
"adc x4, x5, x6",
|
||||||
|
));
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::AluRRR {
|
Inst::AluRRR {
|
||||||
alu_op: ALUOp::Sub32,
|
alu_op: ALUOp::Sub32,
|
||||||
@@ -70,6 +90,27 @@ fn test_aarch64_binemit() {
|
|||||||
"A40006CB",
|
"A40006CB",
|
||||||
"sub x4, x5, x6",
|
"sub x4, x5, x6",
|
||||||
));
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::AluRRR {
|
||||||
|
alu_op: ALUOp::Sbc32,
|
||||||
|
rd: writable_xreg(1),
|
||||||
|
rn: xreg(2),
|
||||||
|
rm: xreg(3),
|
||||||
|
},
|
||||||
|
"4100035A",
|
||||||
|
"sbc w1, w2, w3",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::AluRRR {
|
||||||
|
alu_op: ALUOp::Sbc64,
|
||||||
|
rd: writable_xreg(4),
|
||||||
|
rn: xreg(5),
|
||||||
|
rm: xreg(6),
|
||||||
|
},
|
||||||
|
"A40006DA",
|
||||||
|
"sbc x4, x5, x6",
|
||||||
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::AluRRR {
|
Inst::AluRRR {
|
||||||
alu_op: ALUOp::Orr32,
|
alu_op: ALUOp::Orr32,
|
||||||
|
|||||||
@@ -84,6 +84,12 @@ pub enum ALUOp {
|
|||||||
Asr64,
|
Asr64,
|
||||||
Lsl32,
|
Lsl32,
|
||||||
Lsl64,
|
Lsl64,
|
||||||
|
/// Add with carry
|
||||||
|
Adc32,
|
||||||
|
Adc64,
|
||||||
|
/// Subtract with carry
|
||||||
|
Sbc32,
|
||||||
|
Sbc64,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// An ALU operation with three arguments.
|
/// An ALU operation with three arguments.
|
||||||
@@ -1365,6 +1371,23 @@ impl Inst {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Create instructions that load a 128-bit constant.
|
||||||
|
pub fn load_constant128(to_regs: ValueRegs<Writable<Reg>>, value: u128) -> SmallVec<[Inst; 4]> {
|
||||||
|
assert_eq!(to_regs.len(), 2, "Expected to load i128 into two registers");
|
||||||
|
|
||||||
|
let lower = value as u64;
|
||||||
|
let upper = (value >> 64) as u64;
|
||||||
|
|
||||||
|
let lower_reg = to_regs.regs()[0];
|
||||||
|
let upper_reg = to_regs.regs()[1];
|
||||||
|
|
||||||
|
let mut load_ins = Inst::load_constant(lower_reg, lower);
|
||||||
|
let load_upper = Inst::load_constant(upper_reg, upper);
|
||||||
|
|
||||||
|
load_ins.extend(load_upper.into_iter());
|
||||||
|
load_ins
|
||||||
|
}
|
||||||
|
|
||||||
/// Create instructions that load a 32-bit floating-point constant.
|
/// Create instructions that load a 32-bit floating-point constant.
|
||||||
pub fn load_fp_constant32<F: FnMut(Type) -> Writable<Reg>>(
|
pub fn load_fp_constant32<F: FnMut(Type) -> Writable<Reg>>(
|
||||||
rd: Writable<Reg>,
|
rd: Writable<Reg>,
|
||||||
@@ -3033,30 +3056,15 @@ impl MachInst for Inst {
|
|||||||
ty: Type,
|
ty: Type,
|
||||||
alloc_tmp: F,
|
alloc_tmp: F,
|
||||||
) -> SmallVec<[Inst; 4]> {
|
) -> SmallVec<[Inst; 4]> {
|
||||||
let to_reg = to_regs
|
let to_reg = to_regs.only_reg();
|
||||||
.only_reg()
|
match ty {
|
||||||
.expect("multi-reg values not supported yet");
|
F64 => Inst::load_fp_constant64(to_reg.unwrap(), value as u64, alloc_tmp),
|
||||||
let value = value as u64;
|
F32 => Inst::load_fp_constant32(to_reg.unwrap(), value as u32, alloc_tmp),
|
||||||
if ty == F64 {
|
B1 | B8 | B16 | B32 | B64 | I8 | I16 | I32 | I64 | R32 | R64 => {
|
||||||
Inst::load_fp_constant64(to_reg, value, alloc_tmp)
|
Inst::load_constant(to_reg.unwrap(), value as u64)
|
||||||
} else if ty == F32 {
|
}
|
||||||
Inst::load_fp_constant32(to_reg, value as u32, alloc_tmp)
|
I128 => Inst::load_constant128(to_regs, value),
|
||||||
} else {
|
_ => panic!("Cannot generate constant for type: {}", ty),
|
||||||
// Must be an integer type.
|
|
||||||
debug_assert!(
|
|
||||||
ty == B1
|
|
||||||
|| ty == I8
|
|
||||||
|| ty == B8
|
|
||||||
|| ty == I16
|
|
||||||
|| ty == B16
|
|
||||||
|| ty == I32
|
|
||||||
|| ty == B32
|
|
||||||
|| ty == I64
|
|
||||||
|| ty == B64
|
|
||||||
|| ty == R32
|
|
||||||
|| ty == R64
|
|
||||||
);
|
|
||||||
Inst::load_constant(to_reg, value)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3202,6 +3210,10 @@ impl Inst {
|
|||||||
ALUOp::Asr64 => ("asr", OperandSize::Size64),
|
ALUOp::Asr64 => ("asr", OperandSize::Size64),
|
||||||
ALUOp::Lsl32 => ("lsl", OperandSize::Size32),
|
ALUOp::Lsl32 => ("lsl", OperandSize::Size32),
|
||||||
ALUOp::Lsl64 => ("lsl", OperandSize::Size64),
|
ALUOp::Lsl64 => ("lsl", OperandSize::Size64),
|
||||||
|
ALUOp::Adc32 => ("adc", OperandSize::Size32),
|
||||||
|
ALUOp::Adc64 => ("adc", OperandSize::Size64),
|
||||||
|
ALUOp::Sbc32 => ("sbc", OperandSize::Size32),
|
||||||
|
ALUOp::Sbc64 => ("sbc", OperandSize::Size64),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -64,36 +64,118 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
lower_constant_f64(ctx, rd, value);
|
lower_constant_f64(ctx, rd, value);
|
||||||
}
|
}
|
||||||
Opcode::Iadd => {
|
Opcode::Iadd => {
|
||||||
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
match ty.unwrap() {
|
||||||
let ty = ty.unwrap();
|
ty if ty.is_vector() => {
|
||||||
if !ty.is_vector() {
|
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||||
let mul_insn =
|
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||||
if let Some(mul_insn) = maybe_input_insn(ctx, inputs[1], Opcode::Imul) {
|
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||||
|
ctx.emit(Inst::VecRRR {
|
||||||
|
rd,
|
||||||
|
rn,
|
||||||
|
rm,
|
||||||
|
alu_op: VecALUOp::Add,
|
||||||
|
size: VectorSize::from_ty(ty),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
I128 => {
|
||||||
|
let lhs = put_input_in_regs(ctx, inputs[0]);
|
||||||
|
let rhs = put_input_in_regs(ctx, inputs[1]);
|
||||||
|
let dst = get_output_reg(ctx, outputs[0]);
|
||||||
|
assert_eq!(lhs.len(), 2);
|
||||||
|
assert_eq!(rhs.len(), 2);
|
||||||
|
assert_eq!(dst.len(), 2);
|
||||||
|
|
||||||
|
// adds x0, x0, x2
|
||||||
|
// adc x1, x1, x3
|
||||||
|
|
||||||
|
ctx.emit(Inst::AluRRR {
|
||||||
|
alu_op: ALUOp::AddS64,
|
||||||
|
rd: dst.regs()[0],
|
||||||
|
rn: lhs.regs()[0],
|
||||||
|
rm: rhs.regs()[0],
|
||||||
|
});
|
||||||
|
ctx.emit(Inst::AluRRR {
|
||||||
|
alu_op: ALUOp::Adc64,
|
||||||
|
rd: dst.regs()[1],
|
||||||
|
rn: lhs.regs()[1],
|
||||||
|
rm: rhs.regs()[1],
|
||||||
|
});
|
||||||
|
}
|
||||||
|
ty => {
|
||||||
|
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||||
|
let mul_insn = if let Some(mul_insn) =
|
||||||
|
maybe_input_insn(ctx, inputs[1], Opcode::Imul)
|
||||||
|
{
|
||||||
Some((mul_insn, 0))
|
Some((mul_insn, 0))
|
||||||
} else if let Some(mul_insn) = maybe_input_insn(ctx, inputs[0], Opcode::Imul) {
|
} else if let Some(mul_insn) = maybe_input_insn(ctx, inputs[0], Opcode::Imul) {
|
||||||
Some((mul_insn, 1))
|
Some((mul_insn, 1))
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
};
|
};
|
||||||
// If possible combine mul + add into madd.
|
// If possible combine mul + add into madd.
|
||||||
if let Some((insn, addend_idx)) = mul_insn {
|
if let Some((insn, addend_idx)) = mul_insn {
|
||||||
let alu_op = choose_32_64(ty, ALUOp3::MAdd32, ALUOp3::MAdd64);
|
let alu_op = choose_32_64(ty, ALUOp3::MAdd32, ALUOp3::MAdd64);
|
||||||
let rn_input = InsnInput { insn, input: 0 };
|
let rn_input = InsnInput { insn, input: 0 };
|
||||||
let rm_input = InsnInput { insn, input: 1 };
|
let rm_input = InsnInput { insn, input: 1 };
|
||||||
|
|
||||||
let rn = put_input_in_reg(ctx, rn_input, NarrowValueMode::None);
|
let rn = put_input_in_reg(ctx, rn_input, NarrowValueMode::None);
|
||||||
let rm = put_input_in_reg(ctx, rm_input, NarrowValueMode::None);
|
let rm = put_input_in_reg(ctx, rm_input, NarrowValueMode::None);
|
||||||
let ra = put_input_in_reg(ctx, inputs[addend_idx], NarrowValueMode::None);
|
let ra = put_input_in_reg(ctx, inputs[addend_idx], NarrowValueMode::None);
|
||||||
|
|
||||||
ctx.emit(Inst::AluRRRR {
|
ctx.emit(Inst::AluRRRR {
|
||||||
alu_op,
|
alu_op,
|
||||||
rd,
|
rd,
|
||||||
rn,
|
rn,
|
||||||
rm,
|
rm,
|
||||||
ra,
|
ra,
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||||
|
let (rm, negated) = put_input_in_rse_imm12_maybe_negated(
|
||||||
|
ctx,
|
||||||
|
inputs[1],
|
||||||
|
ty_bits(ty),
|
||||||
|
NarrowValueMode::None,
|
||||||
|
);
|
||||||
|
let alu_op = if !negated {
|
||||||
|
choose_32_64(ty, ALUOp::Add32, ALUOp::Add64)
|
||||||
|
} else {
|
||||||
|
choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64)
|
||||||
|
};
|
||||||
|
ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Opcode::Isub => {
|
||||||
|
let ty = ty.unwrap();
|
||||||
|
if ty == I128 {
|
||||||
|
let lhs = put_input_in_regs(ctx, inputs[0]);
|
||||||
|
let rhs = put_input_in_regs(ctx, inputs[1]);
|
||||||
|
let dst = get_output_reg(ctx, outputs[0]);
|
||||||
|
assert_eq!(lhs.len(), 2);
|
||||||
|
assert_eq!(rhs.len(), 2);
|
||||||
|
assert_eq!(dst.len(), 2);
|
||||||
|
|
||||||
|
// subs x0, x0, x2
|
||||||
|
// sbc x1, x1, x3
|
||||||
|
|
||||||
|
ctx.emit(Inst::AluRRR {
|
||||||
|
alu_op: ALUOp::SubS64,
|
||||||
|
rd: dst.regs()[0],
|
||||||
|
rn: lhs.regs()[0],
|
||||||
|
rm: rhs.regs()[0],
|
||||||
|
});
|
||||||
|
ctx.emit(Inst::AluRRR {
|
||||||
|
alu_op: ALUOp::Sbc64,
|
||||||
|
rd: dst.regs()[1],
|
||||||
|
rn: lhs.regs()[1],
|
||||||
|
rm: rhs.regs()[1],
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||||
|
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||||
|
if !ty.is_vector() {
|
||||||
let (rm, negated) = put_input_in_rse_imm12_maybe_negated(
|
let (rm, negated) = put_input_in_rse_imm12_maybe_negated(
|
||||||
ctx,
|
ctx,
|
||||||
inputs[1],
|
inputs[1],
|
||||||
@@ -101,50 +183,21 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
NarrowValueMode::None,
|
NarrowValueMode::None,
|
||||||
);
|
);
|
||||||
let alu_op = if !negated {
|
let alu_op = if !negated {
|
||||||
choose_32_64(ty, ALUOp::Add32, ALUOp::Add64)
|
|
||||||
} else {
|
|
||||||
choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64)
|
choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64)
|
||||||
|
} else {
|
||||||
|
choose_32_64(ty, ALUOp::Add32, ALUOp::Add64)
|
||||||
};
|
};
|
||||||
ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
|
ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
|
||||||
}
|
|
||||||
} else {
|
|
||||||
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
|
||||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
|
||||||
ctx.emit(Inst::VecRRR {
|
|
||||||
rd,
|
|
||||||
rn,
|
|
||||||
rm,
|
|
||||||
alu_op: VecALUOp::Add,
|
|
||||||
size: VectorSize::from_ty(ty),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Opcode::Isub => {
|
|
||||||
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
|
||||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
|
||||||
let ty = ty.unwrap();
|
|
||||||
if !ty.is_vector() {
|
|
||||||
let (rm, negated) = put_input_in_rse_imm12_maybe_negated(
|
|
||||||
ctx,
|
|
||||||
inputs[1],
|
|
||||||
ty_bits(ty),
|
|
||||||
NarrowValueMode::None,
|
|
||||||
);
|
|
||||||
let alu_op = if !negated {
|
|
||||||
choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64)
|
|
||||||
} else {
|
} else {
|
||||||
choose_32_64(ty, ALUOp::Add32, ALUOp::Add64)
|
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||||
};
|
ctx.emit(Inst::VecRRR {
|
||||||
ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
|
rd,
|
||||||
} else {
|
rn,
|
||||||
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
rm,
|
||||||
ctx.emit(Inst::VecRRR {
|
alu_op: VecALUOp::Sub,
|
||||||
rd,
|
size: VectorSize::from_ty(ty),
|
||||||
rn,
|
});
|
||||||
rm,
|
}
|
||||||
alu_op: VecALUOp::Sub,
|
|
||||||
size: VectorSize::from_ty(ty),
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Opcode::UaddSat | Opcode::SaddSat | Opcode::UsubSat | Opcode::SsubSat => {
|
Opcode::UaddSat | Opcode::SaddSat | Opcode::UsubSat | Opcode::SsubSat => {
|
||||||
@@ -191,21 +244,70 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
Opcode::Imul => {
|
Opcode::Imul => {
|
||||||
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
let lhs = put_input_in_regs(ctx, inputs[0]);
|
||||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
let rhs = put_input_in_regs(ctx, inputs[1]);
|
||||||
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
let dst = get_output_reg(ctx, outputs[0]);
|
||||||
|
|
||||||
|
let rd = dst.regs()[0];
|
||||||
|
let rn = lhs.regs()[0];
|
||||||
|
let rm = rhs.regs()[0];
|
||||||
|
|
||||||
let ty = ty.unwrap();
|
let ty = ty.unwrap();
|
||||||
if !ty.is_vector() {
|
match ty {
|
||||||
let alu_op = choose_32_64(ty, ALUOp3::MAdd32, ALUOp3::MAdd64);
|
I128 => {
|
||||||
ctx.emit(Inst::AluRRRR {
|
assert_eq!(lhs.len(), 2);
|
||||||
alu_op,
|
assert_eq!(rhs.len(), 2);
|
||||||
rd,
|
assert_eq!(dst.len(), 2);
|
||||||
rn,
|
|
||||||
rm,
|
// 128bit mul formula:
|
||||||
ra: zero_reg(),
|
// dst_lo = lhs_lo * rhs_lo
|
||||||
});
|
// dst_hi = umulhi(lhs_lo, rhs_lo) + (lhs_lo * rhs_hi) + (lhs_hi * rhs_lo)
|
||||||
} else {
|
//
|
||||||
if ty == I64X2 {
|
// We can convert the above formula into the following
|
||||||
|
// umulh dst_hi, lhs_lo, rhs_lo
|
||||||
|
// madd dst_hi, lhs_lo, rhs_hi, dst_hi
|
||||||
|
// madd dst_hi, lhs_hi, rhs_lo, dst_hi
|
||||||
|
// mul dst_lo, lhs_lo, rhs_lo
|
||||||
|
|
||||||
|
ctx.emit(Inst::AluRRR {
|
||||||
|
alu_op: ALUOp::UMulH,
|
||||||
|
rd: dst.regs()[1],
|
||||||
|
rn: lhs.regs()[0],
|
||||||
|
rm: rhs.regs()[0],
|
||||||
|
});
|
||||||
|
ctx.emit(Inst::AluRRRR {
|
||||||
|
alu_op: ALUOp3::MAdd64,
|
||||||
|
rd: dst.regs()[1],
|
||||||
|
rn: lhs.regs()[0],
|
||||||
|
rm: rhs.regs()[1],
|
||||||
|
ra: dst.regs()[1].to_reg(),
|
||||||
|
});
|
||||||
|
ctx.emit(Inst::AluRRRR {
|
||||||
|
alu_op: ALUOp3::MAdd64,
|
||||||
|
rd: dst.regs()[1],
|
||||||
|
rn: lhs.regs()[1],
|
||||||
|
rm: rhs.regs()[0],
|
||||||
|
ra: dst.regs()[1].to_reg(),
|
||||||
|
});
|
||||||
|
ctx.emit(Inst::AluRRRR {
|
||||||
|
alu_op: ALUOp3::MAdd64,
|
||||||
|
rd: dst.regs()[0],
|
||||||
|
rn: lhs.regs()[0],
|
||||||
|
rm: rhs.regs()[0],
|
||||||
|
ra: zero_reg(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
ty if !ty.is_vector() => {
|
||||||
|
let alu_op = choose_32_64(ty, ALUOp3::MAdd32, ALUOp3::MAdd64);
|
||||||
|
ctx.emit(Inst::AluRRRR {
|
||||||
|
alu_op,
|
||||||
|
rd,
|
||||||
|
rn,
|
||||||
|
rm,
|
||||||
|
ra: zero_reg(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
I64X2 => {
|
||||||
let tmp1 = ctx.alloc_tmp(I64X2).only_reg().unwrap();
|
let tmp1 = ctx.alloc_tmp(I64X2).only_reg().unwrap();
|
||||||
let tmp2 = ctx.alloc_tmp(I64X2).only_reg().unwrap();
|
let tmp2 = ctx.alloc_tmp(I64X2).only_reg().unwrap();
|
||||||
|
|
||||||
@@ -310,7 +412,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
rm: tmp1.to_reg(),
|
rm: tmp1.to_reg(),
|
||||||
size: VectorSize::Size32x2,
|
size: VectorSize::Size32x2,
|
||||||
});
|
});
|
||||||
} else {
|
}
|
||||||
|
ty if ty.is_vector() => {
|
||||||
ctx.emit(Inst::VecRRR {
|
ctx.emit(Inst::VecRRR {
|
||||||
alu_op: VecALUOp::Mul,
|
alu_op: VecALUOp::Mul,
|
||||||
rd,
|
rd,
|
||||||
@@ -319,6 +422,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
size: VectorSize::from_ty(ty),
|
size: VectorSize::from_ty(ty),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
_ => panic!("Unable to emit mul for {}", ty),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,78 @@
|
|||||||
|
test run
|
||||||
|
target aarch64
|
||||||
|
|
||||||
|
; i128 tests
|
||||||
|
; TODO: Cleanup these tests when we have native support for i128 immediates in CLIF's parser
|
||||||
|
function %i128_const_0() -> i64, i64 {
|
||||||
|
block0:
|
||||||
|
v1 = iconst.i128 0
|
||||||
|
v2, v3 = isplit v1
|
||||||
|
return v2, v3
|
||||||
|
}
|
||||||
|
; run: %i128_const_0() == [0, 0]
|
||||||
|
|
||||||
|
function %add_i128(i64, i64, i64, i64) -> i64, i64 {
|
||||||
|
block0(v0: i64,v1: i64,v2: i64,v3: i64):
|
||||||
|
v4 = iconcat v0, v1
|
||||||
|
v5 = iconcat v2, v3
|
||||||
|
|
||||||
|
v6 = iadd v4, v5
|
||||||
|
|
||||||
|
v7, v8 = isplit v6
|
||||||
|
return v7, v8
|
||||||
|
}
|
||||||
|
; run: %add_i128(0, 0, 0, 0) == [0, 0]
|
||||||
|
; run: %add_i128(0, -1, -1, 0) == [-1, -1]
|
||||||
|
; run: %add_i128(1, 0, 0, 0) == [1, 0]
|
||||||
|
; run: %add_i128(1, 0, 1, 0) == [2, 0]
|
||||||
|
; run: %add_i128(1, 0, -1, -1) == [0, 0]
|
||||||
|
; run: %add_i128(-1, 0, 1, 0) == [0, 1]
|
||||||
|
|
||||||
|
; run: %add_i128(0x01234567_89ABCDEF, 0x01234567_89ABCDEF, 0xFEDCBA98_76543210, 0xFEDCBA98_76543210) == [-1, -1]
|
||||||
|
; run: %add_i128(0x06060606_06060606, 0xA00A00A0_0A00A00A, 0x30303030_30303030, 0x0BB0BB0B_B0BB0BB0) == [0x36363636_36363636, 0xABBABBAB_BABBABBA]
|
||||||
|
; run: %add_i128(0xC0FFEEEE_C0FFEEEE, 0xC0FFEEEE_C0FFEEEE, 0x1DCB1111_1DCB1111, 0x1DCB1111_1DCB1111) == [0xDECAFFFF_DECAFFFF, 0xDECAFFFF_DECAFFFF]
|
||||||
|
|
||||||
|
function %sub_i128(i64, i64, i64, i64) -> i64, i64 {
|
||||||
|
block0(v0: i64,v1: i64,v2: i64,v3: i64):
|
||||||
|
v4 = iconcat v0, v1
|
||||||
|
v5 = iconcat v2, v3
|
||||||
|
|
||||||
|
v6 = isub v4, v5
|
||||||
|
|
||||||
|
v7, v8 = isplit v6
|
||||||
|
return v7, v8
|
||||||
|
}
|
||||||
|
; run: %sub_i128(0, 0, 0, 0) == [0, 0]
|
||||||
|
; run: %sub_i128(1, 0, 1, 0) == [0, 0]
|
||||||
|
; run: %sub_i128(1, 0, 0, 0) == [1, 0]
|
||||||
|
; run: %sub_i128(0, 0, 1, 0) == [-1, -1]
|
||||||
|
; run: %sub_i128(0, 0, -1, -1) == [1, 0]
|
||||||
|
|
||||||
|
; run: %sub_i128(-1, -1, 0xFEDCBA98_76543210, 0xFEDCBA98_76543210) == [0x01234567_89ABCDEF, 0x01234567_89ABCDEF]
|
||||||
|
; run: %sub_i128(0x36363636_36363636, 0xABBABBAB_BABBABBA, 0x30303030_30303030, 0x0BB0BB0B_B0BB0BB0) == [0x06060606_06060606, 0xA00A00A0_0A00A00A]
|
||||||
|
; run: %sub_i128(0xDECAFFFF_DECAFFFF, 0xDECAFFFF_DECAFFFF, 0x1DCB1111_1DCB1111, 0x1DCB1111_1DCB1111) == [0xC0FFEEEE_C0FFEEEE, 0xC0FFEEEE_C0FFEEEE]
|
||||||
|
|
||||||
|
|
||||||
|
function %mul_i128(i64, i64, i64, i64) -> i64, i64 {
|
||||||
|
block0(v0: i64,v1: i64,v2: i64,v3: i64):
|
||||||
|
v4 = iconcat v0, v1
|
||||||
|
v5 = iconcat v2, v3
|
||||||
|
|
||||||
|
v6 = imul v4, v5
|
||||||
|
|
||||||
|
v7, v8 = isplit v6
|
||||||
|
return v7, v8
|
||||||
|
}
|
||||||
|
; run: %mul_i128(0, 0, 0, 0) == [0, 0]
|
||||||
|
; run: %mul_i128(1, 0, 1, 0) == [1, 0]
|
||||||
|
; run: %mul_i128(1, 0, 0, 0) == [0, 0]
|
||||||
|
; run: %mul_i128(0, 0, 1, 0) == [0, 0]
|
||||||
|
; run: %mul_i128(2, 0, 1, 0) == [2, 0]
|
||||||
|
; run: %mul_i128(2, 0, 2, 0) == [4, 0]
|
||||||
|
; run: %mul_i128(1, 0, -1, -1) == [-1, -1]
|
||||||
|
; run: %mul_i128(2, 0, -1, -1) == [-2, -1]
|
||||||
|
|
||||||
|
; run: %mul_i128(0x01010101_01010101, 0x01010101_01010101, 13, 0) == [0x0D0D0D0D_0D0D0D0D, 0x0D0D0D0D_0D0D0D0D]
|
||||||
|
; run: %mul_i128(13, 0, 0x01010101_01010101, 0x01010101_01010101) == [0x0D0D0D0D_0D0D0D0D, 0x0D0D0D0D_0D0D0D0D]
|
||||||
|
; run: %mul_i128(0x00000000_01234567, 0x89ABCDEF_00000000, 0x00000000_FEDCBA98, 0x76543210_00000000) == [0x0121FA00_23E20B28, 0xE2946058_00000000]
|
||||||
|
; run: %mul_i128(0xC0FFEEEE_C0FFEEEE, 0xC0FFEEEE_C0FFEEEE, 0xDECAFFFF_DECAFFFF, 0xDECAFFFF_DECAFFFF) == [0xDB6B1E48_19BA1112, 0x5ECD38B5_9D1C2B7E]
|
||||||
@@ -425,3 +425,46 @@ block0(v0: i8x16):
|
|||||||
; nextln: ushl v0.16b, v0.16b, v1.16b
|
; nextln: ushl v0.16b, v0.16b, v1.16b
|
||||||
; nextln: ldp fp, lr, [sp], #16
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
; nextln: ret
|
; nextln: ret
|
||||||
|
|
||||||
|
|
||||||
|
function %add_i128(i128, i128) -> i128 {
|
||||||
|
block0(v0: i128, v1: i128):
|
||||||
|
v2 = iadd v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: adds x0, x0, x2
|
||||||
|
; nextln: adc x1, x1, x3
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %sub_i128(i128, i128) -> i128 {
|
||||||
|
block0(v0: i128, v1: i128):
|
||||||
|
v2 = isub v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: subs x0, x0, x2
|
||||||
|
; nextln: sbc x1, x1, x3
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
function %mul_i128(i128, i128) -> i128 {
|
||||||
|
block0(v0: i128, v1: i128):
|
||||||
|
v2 = imul v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
|
; nextln: mov fp, sp
|
||||||
|
; nextln: umulh x4, x0, x2
|
||||||
|
; nextln: madd x3, x0, x3, x4
|
||||||
|
; nextln: madd x1, x1, x2, x3
|
||||||
|
; nextln: madd x0, x0, x2, xzr
|
||||||
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
; nextln: ret
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user