diff --git a/cranelift/filetests/filetests/runtests/simd-umulhi-aarch64.clif b/cranelift/filetests/filetests/runtests/simd-umulhi-aarch64.clif new file mode 100644 index 0000000000..db94f2f648 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-umulhi-aarch64.clif @@ -0,0 +1,40 @@ +test interpret +test run +target aarch64 +; x86_64 only supports `i16`, `i32`, and `i64` + +function %umulhi_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = umulhi v0, v1 + return v2 +} +; run: %umulhi_i8(2, 4) == 0 +; run: %umulhi_i8(255, 255) == 254 + +function %umulhi_i8x16(i8x16, i8x16) -> i8x16 { +block0(v0: i8x16, v1: i8x16): + v2 = umulhi v0, v1 + return v2 +} +; run: %umulhi_i8x16([1 2 3 4 5 6 7 8 255 255 255 255 255 255 255 255], [9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 255]) == [0 0 0 0 0 0 0 0 16 17 18 19 20 21 22 254] + +function %umulhi_i16x8(i16x8, i16x8) -> i16x8 { +block0(v0: i16x8, v1: i16x8): + v2 = umulhi v0, v1 + return v2 +} +; run: %umulhi_i16x8([1 2 255 255 255 255 65535 65535], [3 4 5 6 7 8 9 65535]) == [0 0 0 0 0 0 8 65534] + +function %umulhi_i32x4(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = umulhi v0, v1 + return v2 +} +; run: %umulhi_i32x4([1 255 65535 4294967295], [2 65535 4294967295 4294967295]) == [0 0 65534 4294967294] + +function %umulhi_i64x2(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2, v1: i64x2): + v2 = umulhi v0, v1 + return v2 +} +; run: %umulhi_i64x2([1 18446744073709551615], [2 18446744073709551615]) == [0 18446744073709551614] \ No newline at end of file diff --git a/cranelift/filetests/filetests/runtests/simd-umulhi.clif b/cranelift/filetests/filetests/runtests/simd-umulhi.clif new file mode 100644 index 0000000000..fd643151ab --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-umulhi.clif @@ -0,0 +1,29 @@ +test interpret +test run +target aarch64 +set enable_simd +target x86_64 + +function %umulhi_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = umulhi v0, v1 + return v2 +} +; run: %umulhi_i16(2, 4) == 0 +; run: %umulhi_i16(65535, 65535) == 65534 + +function %umulhi_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = umulhi v0, v1 + return v2 +} +; run: %umulhi_i32(500, 700) == 0 +; run: %umulhi_i32(4294967295, 4294967295) == 4294967294 + +function %umulhi_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = umulhi v0, v1 + return v2 +} +; run: %umulhi_i64(4294967295, 4294967295) == 0 +; run: %umulhi_i64(18446744073709551615, 18446744073709551615) == 18446744073709551614 diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs index 497ffb4173..d85a13b7bf 100644 --- a/cranelift/interpreter/src/step.rs +++ b/cranelift/interpreter/src/step.rs @@ -516,7 +516,52 @@ where Opcode::Ineg => binary(Value::sub, Value::int(0, ctrl_ty)?, arg(0)?)?, Opcode::Iabs => unimplemented!("Iabs"), Opcode::Imul => binary(Value::mul, arg(0)?, arg(1)?)?, - Opcode::Umulhi => unimplemented!("Umulhi"), + Opcode::Umulhi => { + if ctrl_ty.is_vector() { + let double_length = match ctrl_ty.lane_bits() { + 8 => types::I16, + 16 => types::I32, + 32 => types::I64, + 64 => types::I128, + _ => unimplemented!("Unsupported integer length {}", ctrl_ty.bits()), + }; + let mut new_vec = SimdVec::new(); + let arg0 = extractlanes(&arg(0)?, ctrl_ty.lane_type())?; + let arg1 = extractlanes(&arg(1)?, ctrl_ty.lane_type())?; + for (x, y) in arg0.into_iter().zip(arg1) { + let x: V = Value::int(x, double_length)?; + let y: V = Value::int(y, double_length)?; + new_vec.push( + Value::mul(x, y)? + .convert(ValueConversionKind::ExtractUpper(ctrl_ty.lane_type()))? + .into_int()?, + ) + } + assign(vectorizelanes(&new_vec, ctrl_ty)?) + } else { + let double_length = match ctrl_ty.bits() { + 8 => types::I16, + 16 => types::I32, + 32 => types::I64, + 64 => types::I128, + _ => unimplemented!("Unsupported integer length {}", ctrl_ty.bits()), + }; + let x: V = Value::int( + arg(0)? + .convert(ValueConversionKind::ToUnsigned)? + .into_int()?, + double_length, + )?; + let y: V = Value::int( + arg(1)? + .convert(ValueConversionKind::ToUnsigned)? + .into_int()?, + double_length, + )?; + let z = Value::mul(x, y)?.convert(ValueConversionKind::ExtractUpper(ctrl_ty))?; + assign(z) + } + } Opcode::Smulhi => unimplemented!("Smulhi"), Opcode::Udiv => binary_unsigned_can_trap(Value::div, arg(0)?, arg(1)?)?, Opcode::Sdiv => binary_can_trap(Value::div, arg(0)?, arg(1)?)?, diff --git a/cranelift/interpreter/src/value.rs b/cranelift/interpreter/src/value.rs index 0e81d804a7..023d009dca 100644 --- a/cranelift/interpreter/src/value.rs +++ b/cranelift/interpreter/src/value.rs @@ -273,7 +273,7 @@ impl Value for DataValue { kind ); - let shift_amt = 128 - (ty.bytes() * 8); + let shift_amt = (self.ty().bytes() * 8) - (ty.bytes() * 8); let mask = (1 << (ty.bytes() * 8)) - 1i128; let shifted_mask = mask << shift_amt;