Merge pull request #3361 from dheaton-arm/implement-vecops
Implement `VhighBits` & `Vselect` for interpreter
This commit is contained in:
53
cranelift/filetests/filetests/runtests/simd-vhighbits.clif
Normal file
53
cranelift/filetests/filetests/runtests/simd-vhighbits.clif
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
test interpret
|
||||||
|
test run
|
||||||
|
target aarch64
|
||||||
|
set enable_simd
|
||||||
|
target x86_64 machinst
|
||||||
|
|
||||||
|
function %vhighbits_i8x16(i8x16) -> i16 {
|
||||||
|
block0(v0: i8x16):
|
||||||
|
v1 = vhigh_bits.i16 v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
; run: %vhighbits_i8x16([0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]) == 0
|
||||||
|
; run: %vhighbits_i8x16([0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0]) == 0
|
||||||
|
; run: %vhighbits_i8x16([1 2 3 4 5 6 7 8 9 1 2 3 4 5 6 7]) == 0
|
||||||
|
; run: %vhighbits_i8x16([128 128 128 128 128 128 128 128 128 128 128 128 128 128 128 128]) == -1
|
||||||
|
; run: %vhighbits_i8x16([128 128 128 128 128 128 128 128 128 128 128 128 128 128 128 8]) == 32767
|
||||||
|
|
||||||
|
function %vhighbits_i16x8(i16x8) -> i8 {
|
||||||
|
block0(v0: i16x8):
|
||||||
|
v1 = vhigh_bits.i8 v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
; run: %vhighbits_i16x8([0 0 0 0 0 0 0 0]) == 0
|
||||||
|
; run: %vhighbits_i16x8([0 0 0 0 0 0 0 1]) == 0
|
||||||
|
; run: %vhighbits_i16x8([1 2 3 4 5 6 7 8]) == 0
|
||||||
|
; run: %vhighbits_i16x8([128 128 128 128 128 128 128 128]) == 0
|
||||||
|
; run: %vhighbits_i16x8([32768 32768 32768 32768 32768 32768 32768 0]) == 127
|
||||||
|
|
||||||
|
|
||||||
|
function %vhighbits_i32x4(i32x4) -> i8 {
|
||||||
|
block0(v0: i32x4):
|
||||||
|
v1 = vhigh_bits.i8 v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
; run: %vhighbits_i32x4([0 0 0 0]) == 0
|
||||||
|
; run: %vhighbits_i32x4([0 0 0 1]) == 0
|
||||||
|
; run: %vhighbits_i32x4([1 2 3 4]) == 0
|
||||||
|
; run: %vhighbits_i32x4([128 128 128 128]) == 0
|
||||||
|
; run: %vhighbits_i32x4([2147483648 2147483648 2147483648 2147483648]) == 15
|
||||||
|
; run: %vhighbits_i32x4([2147483648 0 2147483648 0]) == 5
|
||||||
|
|
||||||
|
|
||||||
|
function %vhighbits_i64x2(i64x2) -> i8 {
|
||||||
|
block0(v0: i64x2):
|
||||||
|
v1 = vhigh_bits.i8 v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
; run: %vhighbits_i64x2([0 0]) == 0
|
||||||
|
; run: %vhighbits_i64x2([0 1]) == 0
|
||||||
|
; run: %vhighbits_i64x2([1 2]) == 0
|
||||||
|
; run: %vhighbits_i64x2([128 128]) == 0
|
||||||
|
; run: %vhighbits_i64x2([18446744073709551615 18446744073709551615]) == 3
|
||||||
|
; run: %vhighbits_i64x2([18446744073709551615 0]) == 1
|
||||||
@@ -1,10 +1,9 @@
|
|||||||
|
test interpret
|
||||||
test run
|
test run
|
||||||
; target s390x TODO: Not yet implemented on s390x
|
; target s390x TODO: Not yet implemented on s390x
|
||||||
target aarch64
|
target aarch64
|
||||||
set enable_simd
|
set enable_simd
|
||||||
target x86_64 machinst
|
target x86_64 machinst
|
||||||
set enable_simd
|
|
||||||
target x86_64 legacy haswell
|
|
||||||
|
|
||||||
function %vselect_i8x16() -> i8x16 {
|
function %vselect_i8x16() -> i8x16 {
|
||||||
block0:
|
block0:
|
||||||
@@ -45,3 +44,31 @@ block0:
|
|||||||
return v4
|
return v4
|
||||||
}
|
}
|
||||||
; run: %vselect_i64x2() == [200 101]
|
; run: %vselect_i64x2() == [200 101]
|
||||||
|
|
||||||
|
function %vselect_p_i8x16(b8x16, i8x16, i8x16) -> i8x16 {
|
||||||
|
block0(v0: b8x16, v1: i8x16, v2: i8x16):
|
||||||
|
v3 = vselect v0, v1, v2
|
||||||
|
return v3
|
||||||
|
}
|
||||||
|
; run: %vselect_p_i8x16([true false true true true false false false true false true true true false false false], [1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16], [17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32]) == [1 18 3 4 5 22 23 24 9 26 11 12 13 30 31 32]
|
||||||
|
|
||||||
|
function %vselect_p_i16x8(b16x8, i16x8, i16x8) -> i16x8 {
|
||||||
|
block0(v0: b16x8, v1: i16x8, v2: i16x8):
|
||||||
|
v3 = vselect v0, v1, v2
|
||||||
|
return v3
|
||||||
|
}
|
||||||
|
; run: %vselect_p_i16x8([true false true true true false false false], [1 2 3 4 5 6 7 8], [17 18 19 20 21 22 23 24]) == [1 18 3 4 5 22 23 24]
|
||||||
|
|
||||||
|
function %vselect_p_i32x4(b32x4, i32x4, i32x4) -> i32x4 {
|
||||||
|
block0(v0: b32x4, v1: i32x4, v2: i32x4):
|
||||||
|
v3 = vselect v0, v1, v2
|
||||||
|
return v3
|
||||||
|
}
|
||||||
|
; run: %vselect_p_i32x4([true false true true], [1 2 3 4], [100000 200000 300000 400000]) == [1 200000 3 4]
|
||||||
|
|
||||||
|
function %vselect_p_i64x2(b64x2, i64x2, i64x2) -> i64x2 {
|
||||||
|
block0(v0: b64x2, v1: i64x2, v2: i64x2):
|
||||||
|
v3 = vselect v0, v1, v2
|
||||||
|
return v3
|
||||||
|
}
|
||||||
|
; run: %vselect_p_i64x2([true false], [1 2], [100000000000 200000000000]) == [1 200000000000]
|
||||||
|
|||||||
@@ -873,10 +873,37 @@ where
|
|||||||
let lanes = extractlanes(&arg(0)?, ctrl_ty.lane_type())?;
|
let lanes = extractlanes(&arg(0)?, ctrl_ty.lane_type())?;
|
||||||
assign(lanes[idx].clone())
|
assign(lanes[idx].clone())
|
||||||
}
|
}
|
||||||
Opcode::VhighBits => unimplemented!("VhighBits"),
|
Opcode::VhighBits => {
|
||||||
|
// `ctrl_ty` controls the return type for this, so the input type
|
||||||
|
// must be retrieved via `inst_context`.
|
||||||
|
let lane_type = inst_context
|
||||||
|
.type_of(inst_context.args()[0])
|
||||||
|
.unwrap()
|
||||||
|
.lane_type();
|
||||||
|
let a = extractlanes(&arg(0)?, lane_type)?;
|
||||||
|
let mut result: i128 = 0;
|
||||||
|
for (i, val) in a.into_iter().enumerate() {
|
||||||
|
let val = val.reverse_bits()?.into_int()?; // MSB -> LSB
|
||||||
|
result |= (val & 1) << i;
|
||||||
|
}
|
||||||
|
assign(Value::int(result, ctrl_ty)?)
|
||||||
|
}
|
||||||
Opcode::Vsplit => unimplemented!("Vsplit"),
|
Opcode::Vsplit => unimplemented!("Vsplit"),
|
||||||
Opcode::Vconcat => unimplemented!("Vconcat"),
|
Opcode::Vconcat => unimplemented!("Vconcat"),
|
||||||
Opcode::Vselect => unimplemented!("Vselect"),
|
Opcode::Vselect => {
|
||||||
|
let c = extractlanes(&arg(0)?, ctrl_ty.lane_type())?;
|
||||||
|
let x = extractlanes(&arg(1)?, ctrl_ty.lane_type())?;
|
||||||
|
let y = extractlanes(&arg(2)?, ctrl_ty.lane_type())?;
|
||||||
|
let mut new_vec = SimdVec::new();
|
||||||
|
for (c, (x, y)) in c.into_iter().zip(x.into_iter().zip(y.into_iter())) {
|
||||||
|
if Value::eq(&c, &Value::int(0, ctrl_ty.lane_type())?)? {
|
||||||
|
new_vec.push(y);
|
||||||
|
} else {
|
||||||
|
new_vec.push(x);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assign(vectorizelanes(&new_vec, ctrl_ty)?)
|
||||||
|
}
|
||||||
Opcode::VanyTrue => assign(fold_vector(
|
Opcode::VanyTrue => assign(fold_vector(
|
||||||
arg(0)?,
|
arg(0)?,
|
||||||
ctrl_ty,
|
ctrl_ty,
|
||||||
|
|||||||
Reference in New Issue
Block a user