arm64: Implement saturating SIMD arithmetic
Copyright (c) 2020, Arm Limited.
This commit is contained in:
2
build.rs
2
build.rs
@@ -189,8 +189,10 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
|
|||||||
("simd", "simd_f64x2_cmp") => return false,
|
("simd", "simd_f64x2_cmp") => return false,
|
||||||
("simd", "simd_i8x16_arith") => return false,
|
("simd", "simd_i8x16_arith") => return false,
|
||||||
("simd", "simd_i8x16_cmp") => return false,
|
("simd", "simd_i8x16_cmp") => return false,
|
||||||
|
("simd", "simd_i8x16_sat_arith") => return false,
|
||||||
("simd", "simd_i16x8_arith") => return false,
|
("simd", "simd_i16x8_arith") => return false,
|
||||||
("simd", "simd_i16x8_cmp") => return false,
|
("simd", "simd_i16x8_cmp") => return false,
|
||||||
|
("simd", "simd_i16x8_sat_arith") => return false,
|
||||||
("simd", "simd_i32x4_arith") => return false,
|
("simd", "simd_i32x4_arith") => return false,
|
||||||
("simd", "simd_i32x4_cmp") => return false,
|
("simd", "simd_i32x4_cmp") => return false,
|
||||||
("simd", "simd_load_extend") => return false,
|
("simd", "simd_load_extend") => return false,
|
||||||
|
|||||||
@@ -1311,18 +1311,22 @@ impl MachInstEmit for Inst {
|
|||||||
debug_assert_eq!(I64, ty);
|
debug_assert_eq!(I64, ty);
|
||||||
(0b010_11110_11_1, 0b000011)
|
(0b010_11110_11_1, 0b000011)
|
||||||
}
|
}
|
||||||
|
VecALUOp::Sqadd => (0b010_01110_00_1 | enc_size << 1, 0b000011),
|
||||||
VecALUOp::SQSubScalar => {
|
VecALUOp::SQSubScalar => {
|
||||||
debug_assert_eq!(I64, ty);
|
debug_assert_eq!(I64, ty);
|
||||||
(0b010_11110_11_1, 0b001011)
|
(0b010_11110_11_1, 0b001011)
|
||||||
}
|
}
|
||||||
|
VecALUOp::Sqsub => (0b010_01110_00_1 | enc_size << 1, 0b001011),
|
||||||
VecALUOp::UQAddScalar => {
|
VecALUOp::UQAddScalar => {
|
||||||
debug_assert_eq!(I64, ty);
|
debug_assert_eq!(I64, ty);
|
||||||
(0b011_11110_11_1, 0b000011)
|
(0b011_11110_11_1, 0b000011)
|
||||||
}
|
}
|
||||||
|
VecALUOp::Uqadd => (0b011_01110_00_1 | enc_size << 1, 0b000011),
|
||||||
VecALUOp::UQSubScalar => {
|
VecALUOp::UQSubScalar => {
|
||||||
debug_assert_eq!(I64, ty);
|
debug_assert_eq!(I64, ty);
|
||||||
(0b011_11110_11_1, 0b001011)
|
(0b011_11110_11_1, 0b001011)
|
||||||
}
|
}
|
||||||
|
VecALUOp::Uqsub => (0b011_01110_00_1 | enc_size << 1, 0b001011),
|
||||||
VecALUOp::Cmeq => (0b011_01110_00_1 | enc_size << 1, 0b100011),
|
VecALUOp::Cmeq => (0b011_01110_00_1 | enc_size << 1, 0b100011),
|
||||||
VecALUOp::Cmge => (0b010_01110_00_1 | enc_size << 1, 0b001111),
|
VecALUOp::Cmge => (0b010_01110_00_1 | enc_size << 1, 0b001111),
|
||||||
VecALUOp::Cmgt => (0b010_01110_00_1 | enc_size << 1, 0b001101),
|
VecALUOp::Cmgt => (0b010_01110_00_1 | enc_size << 1, 0b001101),
|
||||||
|
|||||||
@@ -2049,6 +2049,198 @@ fn test_aarch64_binemit() {
|
|||||||
"sqsub d21, d22, d23",
|
"sqsub d21, d22, d23",
|
||||||
));
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Sqadd,
|
||||||
|
rd: writable_vreg(1),
|
||||||
|
rn: vreg(2),
|
||||||
|
rm: vreg(8),
|
||||||
|
ty: I8X16,
|
||||||
|
},
|
||||||
|
"410C284E",
|
||||||
|
"sqadd v1.16b, v2.16b, v8.16b",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Sqadd,
|
||||||
|
rd: writable_vreg(1),
|
||||||
|
rn: vreg(12),
|
||||||
|
rm: vreg(28),
|
||||||
|
ty: I16X8,
|
||||||
|
},
|
||||||
|
"810D7C4E",
|
||||||
|
"sqadd v1.8h, v12.8h, v28.8h",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Sqadd,
|
||||||
|
rd: writable_vreg(12),
|
||||||
|
rn: vreg(2),
|
||||||
|
rm: vreg(6),
|
||||||
|
ty: I32X4,
|
||||||
|
},
|
||||||
|
"4C0CA64E",
|
||||||
|
"sqadd v12.4s, v2.4s, v6.4s",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Sqadd,
|
||||||
|
rd: writable_vreg(20),
|
||||||
|
rn: vreg(7),
|
||||||
|
rm: vreg(13),
|
||||||
|
ty: I64X2,
|
||||||
|
},
|
||||||
|
"F40CED4E",
|
||||||
|
"sqadd v20.2d, v7.2d, v13.2d",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Sqsub,
|
||||||
|
rd: writable_vreg(1),
|
||||||
|
rn: vreg(2),
|
||||||
|
rm: vreg(8),
|
||||||
|
ty: I8X16,
|
||||||
|
},
|
||||||
|
"412C284E",
|
||||||
|
"sqsub v1.16b, v2.16b, v8.16b",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Sqsub,
|
||||||
|
rd: writable_vreg(1),
|
||||||
|
rn: vreg(12),
|
||||||
|
rm: vreg(28),
|
||||||
|
ty: I16X8,
|
||||||
|
},
|
||||||
|
"812D7C4E",
|
||||||
|
"sqsub v1.8h, v12.8h, v28.8h",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Sqsub,
|
||||||
|
rd: writable_vreg(12),
|
||||||
|
rn: vreg(2),
|
||||||
|
rm: vreg(6),
|
||||||
|
ty: I32X4,
|
||||||
|
},
|
||||||
|
"4C2CA64E",
|
||||||
|
"sqsub v12.4s, v2.4s, v6.4s",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Sqsub,
|
||||||
|
rd: writable_vreg(20),
|
||||||
|
rn: vreg(7),
|
||||||
|
rm: vreg(13),
|
||||||
|
ty: I64X2,
|
||||||
|
},
|
||||||
|
"F42CED4E",
|
||||||
|
"sqsub v20.2d, v7.2d, v13.2d",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Uqadd,
|
||||||
|
rd: writable_vreg(1),
|
||||||
|
rn: vreg(2),
|
||||||
|
rm: vreg(8),
|
||||||
|
ty: I8X16,
|
||||||
|
},
|
||||||
|
"410C286E",
|
||||||
|
"uqadd v1.16b, v2.16b, v8.16b",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Uqadd,
|
||||||
|
rd: writable_vreg(1),
|
||||||
|
rn: vreg(12),
|
||||||
|
rm: vreg(28),
|
||||||
|
ty: I16X8,
|
||||||
|
},
|
||||||
|
"810D7C6E",
|
||||||
|
"uqadd v1.8h, v12.8h, v28.8h",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Uqadd,
|
||||||
|
rd: writable_vreg(12),
|
||||||
|
rn: vreg(2),
|
||||||
|
rm: vreg(6),
|
||||||
|
ty: I32X4,
|
||||||
|
},
|
||||||
|
"4C0CA66E",
|
||||||
|
"uqadd v12.4s, v2.4s, v6.4s",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Uqadd,
|
||||||
|
rd: writable_vreg(20),
|
||||||
|
rn: vreg(7),
|
||||||
|
rm: vreg(13),
|
||||||
|
ty: I64X2,
|
||||||
|
},
|
||||||
|
"F40CED6E",
|
||||||
|
"uqadd v20.2d, v7.2d, v13.2d",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Uqsub,
|
||||||
|
rd: writable_vreg(1),
|
||||||
|
rn: vreg(2),
|
||||||
|
rm: vreg(8),
|
||||||
|
ty: I8X16,
|
||||||
|
},
|
||||||
|
"412C286E",
|
||||||
|
"uqsub v1.16b, v2.16b, v8.16b",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Uqsub,
|
||||||
|
rd: writable_vreg(1),
|
||||||
|
rn: vreg(12),
|
||||||
|
rm: vreg(28),
|
||||||
|
ty: I16X8,
|
||||||
|
},
|
||||||
|
"812D7C6E",
|
||||||
|
"uqsub v1.8h, v12.8h, v28.8h",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Uqsub,
|
||||||
|
rd: writable_vreg(12),
|
||||||
|
rn: vreg(2),
|
||||||
|
rm: vreg(6),
|
||||||
|
ty: I32X4,
|
||||||
|
},
|
||||||
|
"4C2CA66E",
|
||||||
|
"uqsub v12.4s, v2.4s, v6.4s",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Uqsub,
|
||||||
|
rd: writable_vreg(20),
|
||||||
|
rn: vreg(7),
|
||||||
|
rm: vreg(13),
|
||||||
|
ty: I64X2,
|
||||||
|
},
|
||||||
|
"F42CED6E",
|
||||||
|
"uqsub v20.2d, v7.2d, v13.2d",
|
||||||
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::VecRRR {
|
Inst::VecRRR {
|
||||||
alu_op: VecALUOp::Cmeq,
|
alu_op: VecALUOp::Cmeq,
|
||||||
|
|||||||
@@ -209,12 +209,16 @@ pub enum VecExtendOp {
|
|||||||
pub enum VecALUOp {
|
pub enum VecALUOp {
|
||||||
/// Signed saturating add
|
/// Signed saturating add
|
||||||
SQAddScalar,
|
SQAddScalar,
|
||||||
|
Sqadd,
|
||||||
/// Unsigned saturating add
|
/// Unsigned saturating add
|
||||||
UQAddScalar,
|
UQAddScalar,
|
||||||
|
Uqadd,
|
||||||
/// Signed saturating subtract
|
/// Signed saturating subtract
|
||||||
SQSubScalar,
|
SQSubScalar,
|
||||||
|
Sqsub,
|
||||||
/// Unsigned saturating subtract
|
/// Unsigned saturating subtract
|
||||||
UQSubScalar,
|
UQSubScalar,
|
||||||
|
Uqsub,
|
||||||
/// Compare bitwise equal
|
/// Compare bitwise equal
|
||||||
Cmeq,
|
Cmeq,
|
||||||
/// Compare signed greater than or equal
|
/// Compare signed greater than or equal
|
||||||
@@ -2734,9 +2738,13 @@ impl ShowWithRRU for Inst {
|
|||||||
} => {
|
} => {
|
||||||
let (op, vector, ty) = match alu_op {
|
let (op, vector, ty) = match alu_op {
|
||||||
VecALUOp::SQAddScalar => ("sqadd", false, ty),
|
VecALUOp::SQAddScalar => ("sqadd", false, ty),
|
||||||
|
VecALUOp::Sqadd => ("sqadd", true, ty),
|
||||||
VecALUOp::UQAddScalar => ("uqadd", false, ty),
|
VecALUOp::UQAddScalar => ("uqadd", false, ty),
|
||||||
|
VecALUOp::Uqadd => ("uqadd", true, ty),
|
||||||
VecALUOp::SQSubScalar => ("sqsub", false, ty),
|
VecALUOp::SQSubScalar => ("sqsub", false, ty),
|
||||||
|
VecALUOp::Sqsub => ("sqsub", true, ty),
|
||||||
VecALUOp::UQSubScalar => ("uqsub", false, ty),
|
VecALUOp::UQSubScalar => ("uqsub", false, ty),
|
||||||
|
VecALUOp::Uqsub => ("uqsub", true, ty),
|
||||||
VecALUOp::Cmeq => ("cmeq", true, ty),
|
VecALUOp::Cmeq => ("cmeq", true, ty),
|
||||||
VecALUOp::Cmge => ("cmge", true, ty),
|
VecALUOp::Cmge => ("cmge", true, ty),
|
||||||
VecALUOp::Cmgt => ("cmgt", true, ty),
|
VecALUOp::Cmgt => ("cmgt", true, ty),
|
||||||
|
|||||||
@@ -93,25 +93,29 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Opcode::UaddSat | Opcode::SaddSat => {
|
Opcode::UaddSat | Opcode::SaddSat | Opcode::UsubSat | Opcode::SsubSat => {
|
||||||
// We use the vector instruction set's saturating adds (UQADD /
|
// We use the vector instruction set's saturating adds (UQADD /
|
||||||
// SQADD), which require vector registers.
|
// SQADD), which require vector registers.
|
||||||
let is_signed = op == Opcode::SaddSat;
|
let is_signed = op == Opcode::SaddSat || op == Opcode::SsubSat;
|
||||||
|
let ty = ty.unwrap();
|
||||||
|
let rd = get_output_reg(ctx, outputs[0]);
|
||||||
|
if ty_bits(ty) < 128 {
|
||||||
let narrow_mode = if is_signed {
|
let narrow_mode = if is_signed {
|
||||||
NarrowValueMode::SignExtend64
|
NarrowValueMode::SignExtend64
|
||||||
} else {
|
} else {
|
||||||
NarrowValueMode::ZeroExtend64
|
NarrowValueMode::ZeroExtend64
|
||||||
};
|
};
|
||||||
let alu_op = if is_signed {
|
let alu_op = match op {
|
||||||
VecALUOp::SQAddScalar
|
Opcode::UaddSat => VecALUOp::UQAddScalar,
|
||||||
} else {
|
Opcode::SaddSat => VecALUOp::SQAddScalar,
|
||||||
VecALUOp::UQAddScalar
|
Opcode::UsubSat => VecALUOp::UQSubScalar,
|
||||||
|
Opcode::SsubSat => VecALUOp::SQSubScalar,
|
||||||
|
_ => unreachable!(),
|
||||||
};
|
};
|
||||||
let va = ctx.alloc_tmp(RegClass::V128, I128);
|
let va = ctx.alloc_tmp(RegClass::V128, I128);
|
||||||
let vb = ctx.alloc_tmp(RegClass::V128, I128);
|
let vb = ctx.alloc_tmp(RegClass::V128, I128);
|
||||||
let ra = put_input_in_reg(ctx, inputs[0], narrow_mode);
|
let ra = put_input_in_reg(ctx, inputs[0], narrow_mode);
|
||||||
let rb = put_input_in_reg(ctx, inputs[1], narrow_mode);
|
let rb = put_input_in_reg(ctx, inputs[1], narrow_mode);
|
||||||
let rd = get_output_reg(ctx, outputs[0]);
|
|
||||||
ctx.emit(Inst::MovToVec64 { rd: va, rn: ra });
|
ctx.emit(Inst::MovToVec64 { rd: va, rn: ra });
|
||||||
ctx.emit(Inst::MovToVec64 { rd: vb, rn: rb });
|
ctx.emit(Inst::MovToVec64 { rd: vb, rn: rb });
|
||||||
ctx.emit(Inst::VecRRR {
|
ctx.emit(Inst::VecRRR {
|
||||||
@@ -127,40 +131,26 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
idx: 0,
|
idx: 0,
|
||||||
ty: I64,
|
ty: I64,
|
||||||
});
|
});
|
||||||
|
} else {
|
||||||
|
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||||
|
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||||
|
|
||||||
|
let alu_op = match op {
|
||||||
|
Opcode::UaddSat => VecALUOp::Uqadd,
|
||||||
|
Opcode::SaddSat => VecALUOp::Sqadd,
|
||||||
|
Opcode::UsubSat => VecALUOp::Uqsub,
|
||||||
|
Opcode::SsubSat => VecALUOp::Sqsub,
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
|
||||||
|
ctx.emit(Inst::VecRRR {
|
||||||
|
rd,
|
||||||
|
rn,
|
||||||
|
rm,
|
||||||
|
alu_op,
|
||||||
|
ty,
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::UsubSat | Opcode::SsubSat => {
|
|
||||||
let is_signed = op == Opcode::SsubSat;
|
|
||||||
let narrow_mode = if is_signed {
|
|
||||||
NarrowValueMode::SignExtend64
|
|
||||||
} else {
|
|
||||||
NarrowValueMode::ZeroExtend64
|
|
||||||
};
|
|
||||||
let alu_op = if is_signed {
|
|
||||||
VecALUOp::SQSubScalar
|
|
||||||
} else {
|
|
||||||
VecALUOp::UQSubScalar
|
|
||||||
};
|
|
||||||
let va = ctx.alloc_tmp(RegClass::V128, I128);
|
|
||||||
let vb = ctx.alloc_tmp(RegClass::V128, I128);
|
|
||||||
let ra = put_input_in_reg(ctx, inputs[0], narrow_mode);
|
|
||||||
let rb = put_input_in_reg(ctx, inputs[1], narrow_mode);
|
|
||||||
let rd = get_output_reg(ctx, outputs[0]);
|
|
||||||
ctx.emit(Inst::MovToVec64 { rd: va, rn: ra });
|
|
||||||
ctx.emit(Inst::MovToVec64 { rd: vb, rn: rb });
|
|
||||||
ctx.emit(Inst::VecRRR {
|
|
||||||
rd: va,
|
|
||||||
rn: va.to_reg(),
|
|
||||||
rm: vb.to_reg(),
|
|
||||||
alu_op,
|
|
||||||
ty: I64,
|
|
||||||
});
|
|
||||||
ctx.emit(Inst::MovFromVec {
|
|
||||||
rd,
|
|
||||||
rn: va.to_reg(),
|
|
||||||
idx: 0,
|
|
||||||
ty: I64,
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::Ineg => {
|
Opcode::Ineg => {
|
||||||
|
|||||||
Reference in New Issue
Block a user