AArch64: Implement SIMD floating-point arithmetic

Copyright (c) 2020, Arm Limited.
This commit is contained in:
Anton Kirilov
2020-07-24 11:50:50 +01:00
parent 2c1d370465
commit adf25d27c2
5 changed files with 248 additions and 38 deletions

View File

@@ -1123,6 +1123,18 @@ impl MachInstEmit for Inst {
VecMisc2::Not => (0b1, 0b00101, 0b00),
VecMisc2::Neg => (0b1, 0b01011, enc_size),
VecMisc2::Abs => (0b0, 0b01011, enc_size),
VecMisc2::Fabs => {
debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
(0b0, 0b01111, enc_size)
}
VecMisc2::Fneg => {
debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
(0b1, 0b01111, enc_size)
}
VecMisc2::Fsqrt => {
debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
(0b1, 0b11111, enc_size)
}
};
sink.put4(enc_vec_rr_misc(u, size, bits_12_16, rd, rn));
}
@@ -1363,9 +1375,22 @@ impl MachInstEmit for Inst {
VectorSize::Size64x2 => 0b11,
_ => 0,
};
let enc_size_for_fcmp = match size {
VectorSize::Size32x4 => 0b0,
VectorSize::Size64x2 => 0b1,
let is_float = match alu_op {
VecALUOp::Fcmeq
| VecALUOp::Fcmgt
| VecALUOp::Fcmge
| VecALUOp::Fadd
| VecALUOp::Fsub
| VecALUOp::Fdiv
| VecALUOp::Fmax
| VecALUOp::Fmin
| VecALUOp::Fmul => true,
_ => false,
};
let enc_float_size = match (is_float, size) {
(true, VectorSize::Size32x4) => 0b0,
(true, VectorSize::Size64x2) => 0b1,
(true, _) => unimplemented!(),
_ => 0,
};
@@ -1379,9 +1404,9 @@ impl MachInstEmit for Inst {
VecALUOp::Cmgt => (0b010_01110_00_1 | enc_size << 1, 0b001101),
VecALUOp::Cmhi => (0b011_01110_00_1 | enc_size << 1, 0b001101),
VecALUOp::Cmhs => (0b011_01110_00_1 | enc_size << 1, 0b001111),
VecALUOp::Fcmeq => (0b010_01110_00_1 | enc_size_for_fcmp << 1, 0b111001),
VecALUOp::Fcmgt => (0b011_01110_10_1 | enc_size_for_fcmp << 1, 0b111001),
VecALUOp::Fcmge => (0b011_01110_00_1 | enc_size_for_fcmp << 1, 0b111001),
VecALUOp::Fcmeq => (0b010_01110_00_1, 0b111001),
VecALUOp::Fcmgt => (0b011_01110_10_1, 0b111001),
VecALUOp::Fcmge => (0b011_01110_00_1, 0b111001),
// The following logical instructions operate on bytes, so are not encoded differently
// for the different vector types.
VecALUOp::And => (0b010_01110_00_1, 0b000111),
@@ -1403,6 +1428,17 @@ impl MachInstEmit for Inst {
VecALUOp::Umax => (0b011_01110_00_1 | enc_size << 1, 0b011001),
VecALUOp::Smax => (0b010_01110_00_1 | enc_size << 1, 0b011001),
VecALUOp::Urhadd => (0b011_01110_00_1 | enc_size << 1, 0b000101),
VecALUOp::Fadd => (0b010_01110_00_1, 0b110101),
VecALUOp::Fsub => (0b010_01110_10_1, 0b110101),
VecALUOp::Fdiv => (0b011_01110_00_1, 0b111111),
VecALUOp::Fmax => (0b010_01110_00_1, 0b111101),
VecALUOp::Fmin => (0b010_01110_10_1, 0b111101),
VecALUOp::Fmul => (0b011_01110_00_1, 0b110111),
};
let top11 = if is_float {
top11 | enc_float_size << 1
} else {
top11
};
sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd));
}

View File

@@ -2953,6 +2953,78 @@ fn test_aarch64_binemit() {
"urhadd v8.4s, v12.4s, v14.4s",
));
insns.push((
Inst::VecRRR {
alu_op: VecALUOp::Fadd,
rd: writable_vreg(31),
rn: vreg(0),
rm: vreg(16),
size: VectorSize::Size32x4,
},
"1FD4304E",
"fadd v31.4s, v0.4s, v16.4s",
));
insns.push((
Inst::VecRRR {
alu_op: VecALUOp::Fsub,
rd: writable_vreg(8),
rn: vreg(7),
rm: vreg(15),
size: VectorSize::Size64x2,
},
"E8D4EF4E",
"fsub v8.2d, v7.2d, v15.2d",
));
insns.push((
Inst::VecRRR {
alu_op: VecALUOp::Fdiv,
rd: writable_vreg(1),
rn: vreg(3),
rm: vreg(4),
size: VectorSize::Size32x4,
},
"61FC246E",
"fdiv v1.4s, v3.4s, v4.4s",
));
insns.push((
Inst::VecRRR {
alu_op: VecALUOp::Fmax,
rd: writable_vreg(31),
rn: vreg(16),
rm: vreg(0),
size: VectorSize::Size64x2,
},
"1FF6604E",
"fmax v31.2d, v16.2d, v0.2d",
));
insns.push((
Inst::VecRRR {
alu_op: VecALUOp::Fmin,
rd: writable_vreg(5),
rn: vreg(19),
rm: vreg(26),
size: VectorSize::Size32x4,
},
"65F6BA4E",
"fmin v5.4s, v19.4s, v26.4s",
));
insns.push((
Inst::VecRRR {
alu_op: VecALUOp::Fmul,
rd: writable_vreg(2),
rn: vreg(0),
rm: vreg(5),
size: VectorSize::Size64x2,
},
"02DC656E",
"fmul v2.2d, v0.2d, v5.2d",
));
insns.push((
Inst::VecMisc {
op: VecMisc2::Not,
@@ -3052,6 +3124,39 @@ fn test_aarch64_binemit() {
"abs v1.2d, v10.2d",
));
insns.push((
Inst::VecMisc {
op: VecMisc2::Fabs,
rd: writable_vreg(15),
rn: vreg(16),
size: VectorSize::Size32x4,
},
"0FFAA04E",
"fabs v15.4s, v16.4s",
));
insns.push((
Inst::VecMisc {
op: VecMisc2::Fneg,
rd: writable_vreg(31),
rn: vreg(0),
size: VectorSize::Size32x4,
},
"1FF8A06E",
"fneg v31.4s, v0.4s",
));
insns.push((
Inst::VecMisc {
op: VecMisc2::Fsqrt,
rd: writable_vreg(7),
rn: vreg(18),
size: VectorSize::Size64x2,
},
"47FAE16E",
"fsqrt v7.2d, v18.2d",
));
insns.push((
Inst::VecLanes {
op: VecLanesOp::Uminv,

View File

@@ -271,6 +271,18 @@ pub enum VecALUOp {
Smax,
/// Unsigned rounding halving add
Urhadd,
/// Floating-point add
Fadd,
/// Floating-point subtract
Fsub,
/// Floating-point divide
Fdiv,
/// Floating-point maximum
Fmax,
/// Floating-point minimum
Fmin,
/// Floating-point multiply
Fmul,
}
/// A Vector miscellaneous operation with two registers.
@@ -282,6 +294,12 @@ pub enum VecMisc2 {
Neg,
/// Absolute value
Abs,
/// Floating-point absolute value
Fabs,
/// Floating-point negate
Fneg,
/// Floating-point square root
Fsqrt,
}
/// An operation across the lanes of vectors.
@@ -2810,6 +2828,12 @@ impl Inst {
VecALUOp::Umax => ("umax", size),
VecALUOp::Smax => ("smax", size),
VecALUOp::Urhadd => ("urhadd", size),
VecALUOp::Fadd => ("fadd", size),
VecALUOp::Fsub => ("fsub", size),
VecALUOp::Fdiv => ("fdiv", size),
VecALUOp::Fmax => ("fmax", size),
VecALUOp::Fmin => ("fmin", size),
VecALUOp::Fmul => ("fmul", size),
};
let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
let rn = show_vreg_vector(rn, mb_rru, size);
@@ -2821,6 +2845,9 @@ impl Inst {
VecMisc2::Not => ("mvn", VectorSize::Size8x16),
VecMisc2::Neg => ("neg", size),
VecMisc2::Abs => ("abs", size),
VecMisc2::Fabs => ("fabs", size),
VecMisc2::Fneg => ("fneg", size),
VecMisc2::Fsqrt => ("fsqrt", size),
};
let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);