Port AvgRound & SqmulRoundSat to ISLE (AArch64) (#4639)
Ported the existing implementations of the following opcodes on AArch64 to ISLE: - `AvgRound` - Also introduced support for `i64x2` vectors, as per the docs. - `SqmulRoundSat` Copyright (c) 2022 Arm Limited
This commit is contained in:
@@ -592,6 +592,8 @@ fn define_simd_arithmetic(
|
|||||||
"avg_round",
|
"avg_round",
|
||||||
r#"
|
r#"
|
||||||
Unsigned average with rounding: `a := (x + y + 1) // 2`
|
Unsigned average with rounding: `a := (x + y + 1) // 2`
|
||||||
|
|
||||||
|
The addition does not lose any information (such as from overflow).
|
||||||
"#,
|
"#,
|
||||||
&formats.binary,
|
&formats.binary,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -1551,6 +1551,13 @@
|
|||||||
(_ Unit (emit (MInst.VecLanes op dst src size))))
|
(_ Unit (emit (MInst.VecLanes op dst src size))))
|
||||||
dst))
|
dst))
|
||||||
|
|
||||||
|
;; Helper for emitting `MInst.VecShiftImm` instructions.
|
||||||
|
(decl vec_shift_imm (VecShiftImmOp u8 Reg VectorSize) Reg)
|
||||||
|
(rule (vec_shift_imm op imm src size)
|
||||||
|
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
||||||
|
(_ Unit (emit (MInst.VecShiftImm op dst src size imm))))
|
||||||
|
dst))
|
||||||
|
|
||||||
;; Helper for emitting `MInst.VecDup` instructions.
|
;; Helper for emitting `MInst.VecDup` instructions.
|
||||||
(decl vec_dup (Reg VectorSize) Reg)
|
(decl vec_dup (Reg VectorSize) Reg)
|
||||||
(rule (vec_dup src size)
|
(rule (vec_dup src size)
|
||||||
|
|||||||
@@ -1976,31 +1976,34 @@ impl MachInstEmit for Inst {
|
|||||||
} => {
|
} => {
|
||||||
let rd = allocs.next_writable(rd);
|
let rd = allocs.next_writable(rd);
|
||||||
let rn = allocs.next(rn);
|
let rn = allocs.next(rn);
|
||||||
let (is_shr, template) = match op {
|
let (is_shr, mut template) = match op {
|
||||||
VecShiftImmOp::Ushr => (true, 0b_011_011110_0000_000_000001_00000_00000_u32),
|
VecShiftImmOp::Ushr => (true, 0b_001_011110_0000_000_000001_00000_00000_u32),
|
||||||
VecShiftImmOp::Sshr => (true, 0b_010_011110_0000_000_000001_00000_00000_u32),
|
VecShiftImmOp::Sshr => (true, 0b_000_011110_0000_000_000001_00000_00000_u32),
|
||||||
VecShiftImmOp::Shl => (false, 0b_010_011110_0000_000_010101_00000_00000_u32),
|
VecShiftImmOp::Shl => (false, 0b_000_011110_0000_000_010101_00000_00000_u32),
|
||||||
};
|
};
|
||||||
|
if size.is_128bits() {
|
||||||
|
template |= 0b1 << 30;
|
||||||
|
}
|
||||||
let imm = imm as u32;
|
let imm = imm as u32;
|
||||||
// Deal with the somewhat strange encoding scheme for, and limits on,
|
// Deal with the somewhat strange encoding scheme for, and limits on,
|
||||||
// the shift amount.
|
// the shift amount.
|
||||||
let immh_immb = match (size, is_shr) {
|
let immh_immb = match (size.lane_size(), is_shr) {
|
||||||
(VectorSize::Size64x2, true) if imm >= 1 && imm <= 64 => {
|
(ScalarSize::Size64, true) if imm >= 1 && imm <= 64 => {
|
||||||
0b_1000_000_u32 | (64 - imm)
|
0b_1000_000_u32 | (64 - imm)
|
||||||
}
|
}
|
||||||
(VectorSize::Size32x4, true) if imm >= 1 && imm <= 32 => {
|
(ScalarSize::Size32, true) if imm >= 1 && imm <= 32 => {
|
||||||
0b_0100_000_u32 | (32 - imm)
|
0b_0100_000_u32 | (32 - imm)
|
||||||
}
|
}
|
||||||
(VectorSize::Size16x8, true) if imm >= 1 && imm <= 16 => {
|
(ScalarSize::Size16, true) if imm >= 1 && imm <= 16 => {
|
||||||
0b_0010_000_u32 | (16 - imm)
|
0b_0010_000_u32 | (16 - imm)
|
||||||
}
|
}
|
||||||
(VectorSize::Size8x16, true) if imm >= 1 && imm <= 8 => {
|
(ScalarSize::Size8, true) if imm >= 1 && imm <= 8 => {
|
||||||
0b_0001_000_u32 | (8 - imm)
|
0b_0001_000_u32 | (8 - imm)
|
||||||
}
|
}
|
||||||
(VectorSize::Size64x2, false) if imm <= 63 => 0b_1000_000_u32 | imm,
|
(ScalarSize::Size64, false) if imm <= 63 => 0b_1000_000_u32 | imm,
|
||||||
(VectorSize::Size32x4, false) if imm <= 31 => 0b_0100_000_u32 | imm,
|
(ScalarSize::Size32, false) if imm <= 31 => 0b_0100_000_u32 | imm,
|
||||||
(VectorSize::Size16x8, false) if imm <= 15 => 0b_0010_000_u32 | imm,
|
(ScalarSize::Size16, false) if imm <= 15 => 0b_0010_000_u32 | imm,
|
||||||
(VectorSize::Size8x16, false) if imm <= 7 => 0b_0001_000_u32 | imm,
|
(ScalarSize::Size8, false) if imm <= 7 => 0b_0001_000_u32 | imm,
|
||||||
_ => panic!(
|
_ => panic!(
|
||||||
"aarch64: Inst::VecShiftImm: emit: invalid op/size/imm {:?}, {:?}, {:?}",
|
"aarch64: Inst::VecShiftImm: emit: invalid op/size/imm {:?}, {:?}, {:?}",
|
||||||
op, size, imm
|
op, size, imm
|
||||||
|
|||||||
@@ -3946,6 +3946,18 @@ fn test_aarch64_binemit() {
|
|||||||
"smax v8.4s, v12.4s, v14.4s",
|
"smax v8.4s, v12.4s, v14.4s",
|
||||||
));
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Urhadd,
|
||||||
|
rd: writable_vreg(8),
|
||||||
|
rn: vreg(1),
|
||||||
|
rm: vreg(3),
|
||||||
|
size: VectorSize::Size8x8,
|
||||||
|
},
|
||||||
|
"2814232E",
|
||||||
|
"urhadd v8.8b, v1.8b, v3.8b",
|
||||||
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::VecRRR {
|
Inst::VecRRR {
|
||||||
alu_op: VecALUOp::Urhadd,
|
alu_op: VecALUOp::Urhadd,
|
||||||
@@ -3958,6 +3970,18 @@ fn test_aarch64_binemit() {
|
|||||||
"urhadd v8.16b, v1.16b, v3.16b",
|
"urhadd v8.16b, v1.16b, v3.16b",
|
||||||
));
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Urhadd,
|
||||||
|
rd: writable_vreg(2),
|
||||||
|
rn: vreg(13),
|
||||||
|
rm: vreg(6),
|
||||||
|
size: VectorSize::Size16x4,
|
||||||
|
},
|
||||||
|
"A215662E",
|
||||||
|
"urhadd v2.4h, v13.4h, v6.4h",
|
||||||
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::VecRRR {
|
Inst::VecRRR {
|
||||||
alu_op: VecALUOp::Urhadd,
|
alu_op: VecALUOp::Urhadd,
|
||||||
@@ -3970,6 +3994,18 @@ fn test_aarch64_binemit() {
|
|||||||
"urhadd v2.8h, v13.8h, v6.8h",
|
"urhadd v2.8h, v13.8h, v6.8h",
|
||||||
));
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Urhadd,
|
||||||
|
rd: writable_vreg(8),
|
||||||
|
rn: vreg(12),
|
||||||
|
rm: vreg(14),
|
||||||
|
size: VectorSize::Size32x2,
|
||||||
|
},
|
||||||
|
"8815AE2E",
|
||||||
|
"urhadd v8.2s, v12.2s, v14.2s",
|
||||||
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::VecRRR {
|
Inst::VecRRR {
|
||||||
alu_op: VecALUOp::Urhadd,
|
alu_op: VecALUOp::Urhadd,
|
||||||
@@ -5123,6 +5159,126 @@ fn test_aarch64_binemit() {
|
|||||||
"sshr v3.8h, v19.8h, #1",
|
"sshr v3.8h, v19.8h, #1",
|
||||||
));
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecShiftImm {
|
||||||
|
op: VecShiftImmOp::Ushr,
|
||||||
|
rd: writable_vreg(25),
|
||||||
|
rn: vreg(6),
|
||||||
|
imm: 8,
|
||||||
|
size: VectorSize::Size8x8,
|
||||||
|
},
|
||||||
|
"D904082F",
|
||||||
|
"ushr v25.8b, v6.8b, #8",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecShiftImm {
|
||||||
|
op: VecShiftImmOp::Ushr,
|
||||||
|
rd: writable_vreg(5),
|
||||||
|
rn: vreg(21),
|
||||||
|
imm: 1,
|
||||||
|
size: VectorSize::Size8x8,
|
||||||
|
},
|
||||||
|
"A5060F2F",
|
||||||
|
"ushr v5.8b, v21.8b, #1",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecShiftImm {
|
||||||
|
op: VecShiftImmOp::Ushr,
|
||||||
|
rd: writable_vreg(25),
|
||||||
|
rn: vreg(6),
|
||||||
|
imm: 8,
|
||||||
|
size: VectorSize::Size8x16,
|
||||||
|
},
|
||||||
|
"D904086F",
|
||||||
|
"ushr v25.16b, v6.16b, #8",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecShiftImm {
|
||||||
|
op: VecShiftImmOp::Ushr,
|
||||||
|
rd: writable_vreg(5),
|
||||||
|
rn: vreg(21),
|
||||||
|
imm: 1,
|
||||||
|
size: VectorSize::Size8x16,
|
||||||
|
},
|
||||||
|
"A5060F6F",
|
||||||
|
"ushr v5.16b, v21.16b, #1",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecShiftImm {
|
||||||
|
op: VecShiftImmOp::Ushr,
|
||||||
|
rd: writable_vreg(25),
|
||||||
|
rn: vreg(6),
|
||||||
|
imm: 16,
|
||||||
|
size: VectorSize::Size16x4,
|
||||||
|
},
|
||||||
|
"D904102F",
|
||||||
|
"ushr v25.4h, v6.4h, #16",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecShiftImm {
|
||||||
|
op: VecShiftImmOp::Ushr,
|
||||||
|
rd: writable_vreg(5),
|
||||||
|
rn: vreg(21),
|
||||||
|
imm: 1,
|
||||||
|
size: VectorSize::Size16x4,
|
||||||
|
},
|
||||||
|
"A5061F2F",
|
||||||
|
"ushr v5.4h, v21.4h, #1",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecShiftImm {
|
||||||
|
op: VecShiftImmOp::Ushr,
|
||||||
|
rd: writable_vreg(25),
|
||||||
|
rn: vreg(6),
|
||||||
|
imm: 16,
|
||||||
|
size: VectorSize::Size16x8,
|
||||||
|
},
|
||||||
|
"D904106F",
|
||||||
|
"ushr v25.8h, v6.8h, #16",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecShiftImm {
|
||||||
|
op: VecShiftImmOp::Ushr,
|
||||||
|
rd: writable_vreg(5),
|
||||||
|
rn: vreg(21),
|
||||||
|
imm: 1,
|
||||||
|
size: VectorSize::Size16x8,
|
||||||
|
},
|
||||||
|
"A5061F6F",
|
||||||
|
"ushr v5.8h, v21.8h, #1",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecShiftImm {
|
||||||
|
op: VecShiftImmOp::Ushr,
|
||||||
|
rd: writable_vreg(25),
|
||||||
|
rn: vreg(6),
|
||||||
|
imm: 32,
|
||||||
|
size: VectorSize::Size32x2,
|
||||||
|
},
|
||||||
|
"D904202F",
|
||||||
|
"ushr v25.2s, v6.2s, #32",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecShiftImm {
|
||||||
|
op: VecShiftImmOp::Ushr,
|
||||||
|
rd: writable_vreg(5),
|
||||||
|
rn: vreg(21),
|
||||||
|
imm: 1,
|
||||||
|
size: VectorSize::Size32x2,
|
||||||
|
},
|
||||||
|
"A5063F2F",
|
||||||
|
"ushr v5.2s, v21.2s, #1",
|
||||||
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::VecShiftImm {
|
Inst::VecShiftImm {
|
||||||
op: VecShiftImmOp::Ushr,
|
op: VecShiftImmOp::Ushr,
|
||||||
@@ -5147,6 +5303,30 @@ fn test_aarch64_binemit() {
|
|||||||
"ushr v5.4s, v21.4s, #1",
|
"ushr v5.4s, v21.4s, #1",
|
||||||
));
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecShiftImm {
|
||||||
|
op: VecShiftImmOp::Ushr,
|
||||||
|
rd: writable_vreg(25),
|
||||||
|
rn: vreg(6),
|
||||||
|
imm: 64,
|
||||||
|
size: VectorSize::Size64x2,
|
||||||
|
},
|
||||||
|
"D904406F",
|
||||||
|
"ushr v25.2d, v6.2d, #64",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecShiftImm {
|
||||||
|
op: VecShiftImmOp::Ushr,
|
||||||
|
rd: writable_vreg(5),
|
||||||
|
rn: vreg(21),
|
||||||
|
imm: 1,
|
||||||
|
size: VectorSize::Size64x2,
|
||||||
|
},
|
||||||
|
"A5067F6F",
|
||||||
|
"ushr v5.2d, v21.2d, #1",
|
||||||
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::VecShiftImm {
|
Inst::VecShiftImm {
|
||||||
op: VecShiftImmOp::Shl,
|
op: VecShiftImmOp::Shl,
|
||||||
|
|||||||
@@ -233,6 +233,27 @@
|
|||||||
(rule (lower (has_type (fits_in_32 ty) (iabs x)))
|
(rule (lower (has_type (fits_in_32 ty) (iabs x)))
|
||||||
(abs (OperandSize.Size32) (put_in_reg_sext32 x)))
|
(abs (OperandSize.Size32) (put_in_reg_sext32 x)))
|
||||||
|
|
||||||
|
;;;; Rules for `avg_round` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
(rule (lower (has_type $I64X2 (avg_round x y)))
|
||||||
|
(let ((one Reg (splat_const 1 (VectorSize.Size64x2)))
|
||||||
|
(c Reg (orr_vec x y (VectorSize.Size64x2)))
|
||||||
|
(c Reg (and_vec c one (VectorSize.Size64x2)))
|
||||||
|
(x Reg (vec_shift_imm (VecShiftImmOp.Ushr) 1 x
|
||||||
|
(VectorSize.Size64x2)))
|
||||||
|
(y Reg (vec_shift_imm (VecShiftImmOp.Ushr) 1 y
|
||||||
|
(VectorSize.Size64x2)))
|
||||||
|
(sum Reg (add_vec x y (VectorSize.Size64x2))))
|
||||||
|
(add_vec c sum (VectorSize.Size64x2))))
|
||||||
|
|
||||||
|
(rule (lower (has_type (lane_fits_in_32 ty) (avg_round x y)))
|
||||||
|
(vec_rrr (VecALUOp.Urhadd) x y (vector_size ty)))
|
||||||
|
|
||||||
|
;;;; Rules for `sqmul_round_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
(rule (lower (has_type ty @ (multi_lane _ _) (sqmul_round_sat x y)))
|
||||||
|
(vec_rrr (VecALUOp.Sqrdmulh) x y (vector_size ty)))
|
||||||
|
|
||||||
;;;; Rules for `fadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;; Rules for `fadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
(rule (lower (has_type ty @ (multi_lane _ _) (fadd rn rm)))
|
(rule (lower (has_type ty @ (multi_lane _ _) (fadd rn rm)))
|
||||||
|
|||||||
@@ -1502,27 +1502,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
Opcode::Iabs => implemented_in_isle(ctx),
|
Opcode::Iabs => implemented_in_isle(ctx),
|
||||||
Opcode::AvgRound => {
|
Opcode::AvgRound => implemented_in_isle(ctx),
|
||||||
let ty = ty.unwrap();
|
|
||||||
|
|
||||||
if ty.lane_bits() == 64 {
|
|
||||||
return Err(CodegenError::Unsupported(format!(
|
|
||||||
"AvgRound: Unsupported type: {:?}",
|
|
||||||
ty
|
|
||||||
)));
|
|
||||||
}
|
|
||||||
|
|
||||||
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
|
||||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
|
||||||
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
|
||||||
ctx.emit(Inst::VecRRR {
|
|
||||||
alu_op: VecALUOp::Urhadd,
|
|
||||||
rd,
|
|
||||||
rn,
|
|
||||||
rm,
|
|
||||||
size: VectorSize::from_ty(ty),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
Opcode::Snarrow | Opcode::Unarrow | Opcode::Uunarrow => implemented_in_isle(ctx),
|
Opcode::Snarrow | Opcode::Unarrow | Opcode::Uunarrow => implemented_in_isle(ctx),
|
||||||
|
|
||||||
@@ -1583,28 +1563,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
Opcode::SqmulRoundSat => {
|
Opcode::SqmulRoundSat => implemented_in_isle(ctx),
|
||||||
let ty = ty.unwrap();
|
|
||||||
|
|
||||||
if !ty.is_vector() || (ty.lane_type() != I16 && ty.lane_type() != I32) {
|
|
||||||
return Err(CodegenError::Unsupported(format!(
|
|
||||||
"SqmulRoundSat: Unsupported type: {:?}",
|
|
||||||
ty
|
|
||||||
)));
|
|
||||||
}
|
|
||||||
|
|
||||||
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
|
||||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
|
||||||
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
|
||||||
|
|
||||||
ctx.emit(Inst::VecRRR {
|
|
||||||
alu_op: VecALUOp::Sqrdmulh,
|
|
||||||
rd,
|
|
||||||
rn,
|
|
||||||
rm,
|
|
||||||
size: VectorSize::from_ty(ty),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
Opcode::FcvtLowFromSint => {
|
Opcode::FcvtLowFromSint => {
|
||||||
let ty = ty.unwrap();
|
let ty = ty.unwrap();
|
||||||
|
|||||||
@@ -0,0 +1,81 @@
|
|||||||
|
test compile precise-output
|
||||||
|
set unwind_info=false
|
||||||
|
target aarch64
|
||||||
|
|
||||||
|
function %average_rounding_i8x8(i8x8, i8x8) -> i8x8 {
|
||||||
|
block0(v0: i8x8, v1: i8x8):
|
||||||
|
v2 = avg_round v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; block0:
|
||||||
|
; urhadd v0.8b, v0.8b, v1.8b
|
||||||
|
; ret
|
||||||
|
|
||||||
|
function %average_rounding_i8x16(i8x16, i8x16) -> i8x16 {
|
||||||
|
block0(v0: i8x16, v1: i8x16):
|
||||||
|
v2 = avg_round v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; block0:
|
||||||
|
; urhadd v0.16b, v0.16b, v1.16b
|
||||||
|
; ret
|
||||||
|
|
||||||
|
function %average_rounding_i16x4(i16x4, i16x4) -> i16x4 {
|
||||||
|
block0(v0: i16x4, v1: i16x4):
|
||||||
|
v2 = avg_round v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; block0:
|
||||||
|
; urhadd v0.4h, v0.4h, v1.4h
|
||||||
|
; ret
|
||||||
|
|
||||||
|
function %average_rounding_i16x8(i16x8, i16x8) -> i16x8 {
|
||||||
|
block0(v0: i16x8, v1: i16x8):
|
||||||
|
v2 = avg_round v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; block0:
|
||||||
|
; urhadd v0.8h, v0.8h, v1.8h
|
||||||
|
; ret
|
||||||
|
|
||||||
|
function %average_rounding_i32x2(i32x2, i32x2) -> i32x2 {
|
||||||
|
block0(v0: i32x2, v1: i32x2):
|
||||||
|
v2 = avg_round v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; block0:
|
||||||
|
; urhadd v0.2s, v0.2s, v1.2s
|
||||||
|
; ret
|
||||||
|
|
||||||
|
function %average_rounding_i32x4(i32x4, i32x4) -> i32x4 {
|
||||||
|
block0(v0: i32x4, v1: i32x4):
|
||||||
|
v2 = avg_round v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; block0:
|
||||||
|
; urhadd v0.4s, v0.4s, v1.4s
|
||||||
|
; ret
|
||||||
|
|
||||||
|
function %average_rounding_i64x2(i64x2, i64x2) -> i64x2 {
|
||||||
|
block0(v0: i64x2, v1: i64x2):
|
||||||
|
v2 = avg_round v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; block0:
|
||||||
|
; movz x6, #1
|
||||||
|
; dup v6.2d, x6
|
||||||
|
; orr v17.16b, v0.16b, v1.16b
|
||||||
|
; and v19.16b, v17.16b, v6.16b
|
||||||
|
; ushr v21.2d, v0.2d, #1
|
||||||
|
; ushr v23.2d, v1.2d, #1
|
||||||
|
; add v25.2d, v21.2d, v23.2d
|
||||||
|
; add v0.2d, v19.2d, v25.2d
|
||||||
|
; ret
|
||||||
|
|
||||||
@@ -1,3 +1,5 @@
|
|||||||
|
; the interpreter does not currently support some of these instructions
|
||||||
|
; such as `avg_round` on SIMD values.
|
||||||
test run
|
test run
|
||||||
target aarch64
|
target aarch64
|
||||||
target s390x
|
target s390x
|
||||||
@@ -172,6 +174,13 @@ block0(v0: f32x4):
|
|||||||
}
|
}
|
||||||
; run: %fabs_f32x4([0x0.0 -0x1.0 0x2.0 -0x3.0]) == [0x0.0 0x1.0 0x2.0 0x3.0]
|
; run: %fabs_f32x4([0x0.0 -0x1.0 0x2.0 -0x3.0]) == [0x0.0 0x1.0 0x2.0 0x3.0]
|
||||||
|
|
||||||
|
function %average_rounding_i8x16(i8x16, i8x16) -> i8x16 {
|
||||||
|
block0(v0: i8x16, v1: i8x16):
|
||||||
|
v2 = avg_round v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
; run: %average_rounding_i8x16([0 0 0 1 42 19 -1 0xff 5 0 0 0 1 42 19 -1], [0 1 2 4 42 18 -1 0 10 0 1 2 4 42 18 -1]) == [0 1 1 3 42 19 -1 0x80 8 0 1 1 3 42 19 -1]
|
||||||
|
|
||||||
function %average_rounding_i16x8(i16x8, i16x8) -> i16x8 {
|
function %average_rounding_i16x8(i16x8, i16x8) -> i16x8 {
|
||||||
block0(v0: i16x8, v1: i16x8):
|
block0(v0: i16x8, v1: i16x8):
|
||||||
v2 = avg_round v0, v1
|
v2 = avg_round v0, v1
|
||||||
|
|||||||
51
cranelift/filetests/filetests/runtests/simd-avg-round.clif
Normal file
51
cranelift/filetests/filetests/runtests/simd-avg-round.clif
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
; the interpreter does not currently support SIMD `avg_round`.
|
||||||
|
test run
|
||||||
|
target aarch64
|
||||||
|
; x86_64 and s390x do not currently support 64-bit vectors, or
|
||||||
|
; `avg_round` on `i64x2` values.
|
||||||
|
; x86_64 also does not currently support `avg_round.i32x4`.
|
||||||
|
|
||||||
|
function %average_rounding_i8x8(i8x8, i8x8) -> i8x8 {
|
||||||
|
block0(v0: i8x8, v1: i8x8):
|
||||||
|
v2 = avg_round v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
; run: %average_rounding_i8x8([0 0 0 1 42 19 -1 0xff], [0 1 2 4 42 18 -1 0]) == [0 1 1 3 42 19 -1 0x80]
|
||||||
|
|
||||||
|
function %average_rounding_i16x4(i16x4, i16x4) -> i16x4 {
|
||||||
|
block0(v0: i16x4, v1: i16x4):
|
||||||
|
v2 = avg_round v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
; run: %average_rounding_i16x4([0 0 0 1], [0 1 2 4]) == [0 1 1 3]
|
||||||
|
; run: %average_rounding_i16x4([42 19 -1 0xffff], [42 18 -1 0]) == [42 19 -1 0x8000]
|
||||||
|
|
||||||
|
function %average_rounding_i32x2(i32x2, i32x2) -> i32x2 {
|
||||||
|
block0(v0: i32x2, v1: i32x2):
|
||||||
|
v2 = avg_round v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
; run: %average_rounding_i32x2([0 0], [0 1]) == [0 1]
|
||||||
|
; run: %average_rounding_i32x2([0 1], [2 4]) == [1 3]
|
||||||
|
; run: %average_rounding_i32x2([42 19], [42 18]) == [42 19]
|
||||||
|
; run: %average_rounding_i32x2([-1 0xffffffff], [-1 0]) == [-1 0x80000000]
|
||||||
|
; run: %average_rounding_i32x2([0xffffffff 0xfffffffd], [10 0xffffffff]) == [0x80000005 0xfffffffe]
|
||||||
|
|
||||||
|
function %average_rounding_i32x4(i32x4, i32x4) -> i32x4 {
|
||||||
|
block0(v0: i32x4, v1: i32x4):
|
||||||
|
v2 = avg_round v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
; run: %average_rounding_i32x4([0 0 0 0xffffffff], [0 1 2 0]) == [0 1 1 0x80000000]
|
||||||
|
; run: %average_rounding_i32x4([1 42 19 -1], [4 42 18 -1]) == [3 42 19 -1]
|
||||||
|
|
||||||
|
function %average_rounding_i64x2(i64x2, i64x2) -> i64x2 {
|
||||||
|
block0(v0: i64x2, v1: i64x2):
|
||||||
|
v2 = avg_round v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
; run: %average_rounding_i64x2([0 0], [0 1]) == [0 1]
|
||||||
|
; run: %average_rounding_i64x2([0 1], [2 4]) == [1 3]
|
||||||
|
; run: %average_rounding_i64x2([42 19], [42 18]) == [42 19]
|
||||||
|
; run: %average_rounding_i64x2([-1 0xffffffffffffffff], [-1 0]) == [-1 0x8000000000000000]
|
||||||
|
; run: %average_rounding_i64x2([0xffffffffffffffff 0xfffffffffffffffd], [10 0xffffffffffffffff]) == [0x8000000000000005 0xfffffffffffffffe]
|
||||||
Reference in New Issue
Block a user