Introduce the Cranelift IR instruction LoadSplat

It corresponds to WebAssembly's `load*_splat` operations, which
were previously represented as a combination of `Load` and `Splat`
instructions. However, there are architectures such as Armv8-A
that have a single machine instruction equivalent to the Wasm
operations. In order to generate it, it is necessary to merge the
`Load` and the `Splat` in the backend, which is not possible
because the load may have side effects. The new IR instruction
works around this limitation.

The AArch64 backend leverages the new instruction to improve code
generation.

Copyright (c) 2020, Arm Limited.
This commit is contained in:
Anton Kirilov
2020-10-07 11:29:55 +01:00
parent e659d5cecd
commit e0b911a4df
9 changed files with 237 additions and 75 deletions

View File

@@ -1380,19 +1380,17 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
| Operator::V128Load16Splat { memarg }
| Operator::V128Load32Splat { memarg }
| Operator::V128Load64Splat { memarg } => {
// TODO: For spec compliance, this is initially implemented as a combination of `load +
// splat` but could be implemented eventually as a single instruction (`load_splat`).
// See https://github.com/bytecodealliance/wasmtime/issues/1175.
translate_load(
let opcode = ir::Opcode::LoadSplat;
let result_ty = type_of(op);
let (flags, base, offset) = prepare_load(
memarg,
ir::Opcode::Load,
type_of(op).lane_type(),
mem_op_size(opcode, result_ty.lane_type()),
builder,
state,
environ,
)?;
let splatted = builder.ins().splat(type_of(op), state.pop1());
state.push1(splatted)
let (load, dfg) = builder.ins().Load(opcode, result_ty, flags, offset, base);
state.push1(dfg.first_result(load))
}
Operator::I8x16ExtractLaneS { lane } | Operator::I16x8ExtractLaneS { lane } => {
let vector = pop1_with_bitcast(state, type_of(op), builder);
@@ -2040,7 +2038,7 @@ fn mem_op_size(opcode: ir::Opcode, ty: Type) -> u32 {
ir::Opcode::Istore8 | ir::Opcode::Sload8 | ir::Opcode::Uload8 => 1,
ir::Opcode::Istore16 | ir::Opcode::Sload16 | ir::Opcode::Uload16 => 2,
ir::Opcode::Istore32 | ir::Opcode::Sload32 | ir::Opcode::Uload32 => 4,
ir::Opcode::Store | ir::Opcode::Load => ty.bytes(),
ir::Opcode::Store | ir::Opcode::Load | ir::Opcode::LoadSplat => ty.bytes(),
_ => panic!("unknown size of mem op for {:?}", opcode),
}
}