aarch64: Implement iadd for i128 operands

This commit is contained in:
Afonso Bordado
2021-05-14 18:12:13 +01:00
parent b8fd632fb5
commit d3b525fa29
6 changed files with 179 additions and 67 deletions

View File

@@ -597,6 +597,8 @@ impl MachInstEmit for Inst {
let top11 = match alu_op {
ALUOp::Add32 => 0b00001011_000,
ALUOp::Add64 => 0b10001011_000,
ALUOp::Adc32 => 0b00011010_000,
ALUOp::Adc64 => 0b10011010_000,
ALUOp::Sub32 => 0b01001011_000,
ALUOp::Sub64 => 0b11001011_000,
ALUOp::Orr32 => 0b00101010_000,

View File

@@ -50,6 +50,26 @@ fn test_aarch64_binemit() {
"A400068B",
"add x4, x5, x6",
));
insns.push((
Inst::AluRRR {
alu_op: ALUOp::Adc32,
rd: writable_xreg(1),
rn: xreg(2),
rm: xreg(3),
},
"4100031A",
"adc w1, w2, w3",
));
insns.push((
Inst::AluRRR {
alu_op: ALUOp::Adc64,
rd: writable_xreg(4),
rn: xreg(5),
rm: xreg(6),
},
"A400069A",
"adc x4, x5, x6",
));
insns.push((
Inst::AluRRR {
alu_op: ALUOp::Sub32,

View File

@@ -84,6 +84,9 @@ pub enum ALUOp {
Asr64,
Lsl32,
Lsl64,
/// Add with carry
Adc32,
Adc64,
}
/// An ALU operation with three arguments.
@@ -1365,6 +1368,23 @@ impl Inst {
}
}
/// Create instructions that load a 128-bit constant.
pub fn load_constant128(to_regs: ValueRegs<Writable<Reg>>, value: u128) -> SmallVec<[Inst; 4]> {
assert_eq!(to_regs.len(), 2, "Expected to load i128 into two registers");
let lower = value as u64;
let upper = (value >> 64) as u64;
let lower_reg = to_regs.regs()[0];
let upper_reg = to_regs.regs()[1];
let mut load_ins = Inst::load_constant(lower_reg, lower);
let load_upper = Inst::load_constant(upper_reg, upper);
load_ins.extend(load_upper.into_iter());
load_ins
}
/// Create instructions that load a 32-bit floating-point constant.
pub fn load_fp_constant32<F: FnMut(Type) -> Writable<Reg>>(
rd: Writable<Reg>,
@@ -3033,30 +3053,15 @@ impl MachInst for Inst {
ty: Type,
alloc_tmp: F,
) -> SmallVec<[Inst; 4]> {
let to_reg = to_regs
.only_reg()
.expect("multi-reg values not supported yet");
let value = value as u64;
if ty == F64 {
Inst::load_fp_constant64(to_reg, value, alloc_tmp)
} else if ty == F32 {
Inst::load_fp_constant32(to_reg, value as u32, alloc_tmp)
} else {
// Must be an integer type.
debug_assert!(
ty == B1
|| ty == I8
|| ty == B8
|| ty == I16
|| ty == B16
|| ty == I32
|| ty == B32
|| ty == I64
|| ty == B64
|| ty == R32
|| ty == R64
);
Inst::load_constant(to_reg, value)
let to_reg = to_regs.only_reg();
match ty {
F64 => Inst::load_fp_constant64(to_reg.unwrap(), value as u64, alloc_tmp),
F32 => Inst::load_fp_constant32(to_reg.unwrap(), value as u32, alloc_tmp),
B1 | B8 | B16 | B32 | B64 | I8 | I16 | I32 | I64 | R32 | R64 => {
Inst::load_constant(to_reg.unwrap(), value as u64)
}
I128 => Inst::load_constant128(to_regs, value),
_ => panic!("Cannot generate constant for type: {}", ty),
}
}
@@ -3202,6 +3207,8 @@ impl Inst {
ALUOp::Asr64 => ("asr", OperandSize::Size64),
ALUOp::Lsl32 => ("lsl", OperandSize::Size32),
ALUOp::Lsl64 => ("lsl", OperandSize::Size64),
ALUOp::Adc32 => ("adc", OperandSize::Size32),
ALUOp::Adc64 => ("adc", OperandSize::Size64),
}
}

View File

@@ -64,11 +64,49 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
lower_constant_f64(ctx, rd, value);
}
Opcode::Iadd => {
match ty.unwrap() {
ty if ty.is_vector() => {
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let ty = ty.unwrap();
if !ty.is_vector() {
let mul_insn =
if let Some(mul_insn) = maybe_input_insn(ctx, inputs[1], Opcode::Imul) {
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
ctx.emit(Inst::VecRRR {
rd,
rn,
rm,
alu_op: VecALUOp::Add,
size: VectorSize::from_ty(ty),
});
}
I128 => {
let lhs = put_input_in_regs(ctx, inputs[0]);
let rhs = put_input_in_regs(ctx, inputs[1]);
let dst = get_output_reg(ctx, outputs[0]);
assert_eq!(lhs.len(), 2);
assert_eq!(rhs.len(), 2);
assert_eq!(dst.len(), 2);
// adds x0, x0, x1
// adc x1, x1, x3
// Add lower
ctx.emit(Inst::AluRRR {
alu_op: ALUOp::AddS64,
rd: dst.regs()[0],
rn: lhs.regs()[0],
rm: rhs.regs()[0],
});
ctx.emit(Inst::AluRRR {
alu_op: ALUOp::Adc64,
rd: dst.regs()[1],
rn: lhs.regs()[1],
rm: rhs.regs()[1],
});
}
ty => {
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let mul_insn = if let Some(mul_insn) =
maybe_input_insn(ctx, inputs[1], Opcode::Imul)
{
Some((mul_insn, 0))
} else if let Some(mul_insn) = maybe_input_insn(ctx, inputs[0], Opcode::Imul) {
Some((mul_insn, 1))
@@ -107,16 +145,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
};
ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
}
} else {
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
ctx.emit(Inst::VecRRR {
rd,
rn,
rm,
alu_op: VecALUOp::Add,
size: VectorSize::from_ty(ty),
});
}
}
}
Opcode::Isub => {

View File

@@ -0,0 +1,39 @@
test run
target aarch64
; i128 tests
; TODO: It would be nice if we had native support for i128 immediates in CLIF's parser
function %i128_const_0() -> i64, i64 {
block0:
v1 = iconst.i128 0
v2, v3 = isplit v1
return v2, v3
}
; run: %i128_const_0() == [0, 0]
; TODO: Blocked by https://github.com/bytecodealliance/wasmtime/issues/2906
;function %i128_const_neg_1() -> i64, i64 {
;block0:
; v1 = iconst.i128 -1
; v2, v3 = isplit v1
; return v2, v3
;}
; r-un: %i128_const_neg_1() == [0xffffffff_ffffffff, 0xffffffff_ffffffff]
function %add_i128(i64, i64, i64, i64) -> i64, i64 {
block0(v0: i64,v1: i64,v2: i64,v3: i64):
v4 = iconcat v0, v1
v5 = iconcat v2, v3
v6 = iadd v4, v5
v7, v8 = isplit v6
return v7, v8
}
; run: %add_i128(0, 0, 0, 0) == [0, 0]
; run: %add_i128(0, -1, -1, 0) == [-1, -1]
; run: %add_i128(1, 0, 0, 0) == [1, 0]
; run: %add_i128(1, 0, 1, 0) == [2, 0]
; run: %add_i128(1, 0, -1, -1) == [0, 0]
; run: %add_i128(-1, 0, 1, 0) == [0, 1]

View File

@@ -425,3 +425,18 @@ block0(v0: i8x16):
; nextln: ushl v0.16b, v0.16b, v1.16b
; nextln: ldp fp, lr, [sp], #16
; nextln: ret
function %add_i128(i128, i128) -> i128 {
block0(v0: i128, v1: i128):
v2 = iadd v0, v1
return v2
}
; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: adds x0, x0, x2
; nextln: adc x1, x1, x3
; nextln: ldp fp, lr, [sp], #16
; nextln: ret