[machinst x64]: implement load*_zero for x64
This commit is contained in:
@@ -4095,6 +4095,53 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
emit_extract_lane(ctx, src, dst, lane, ty);
|
emit_extract_lane(ctx, src, dst, lane, ty);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Opcode::ScalarToVector => {
|
||||||
|
// When moving a scalar value to a vector register, we must be handle several
|
||||||
|
// situations:
|
||||||
|
// 1. a scalar float is already in an XMM register, so we simply move it
|
||||||
|
// 2. a scalar of any other type resides in a GPR register: MOVD moves the bits to an
|
||||||
|
// XMM register and zeroes the upper bits
|
||||||
|
// 3. a scalar (float or otherwise) that has previously been loaded from memory (e.g.
|
||||||
|
// the default lowering of Wasm's `load[32|64]_zero`) can be lowered to a single
|
||||||
|
// MOVSS/MOVSD instruction; to do this, we rely on `input_to_reg_mem` to sink the
|
||||||
|
// unused load.
|
||||||
|
let src = input_to_reg_mem(ctx, inputs[0]);
|
||||||
|
let src_ty = ctx.input_ty(insn, 0);
|
||||||
|
let dst = get_output_reg(ctx, outputs[0]);
|
||||||
|
let dst_ty = ty.unwrap();
|
||||||
|
assert!(src_ty == dst_ty.lane_type() && dst_ty.bits() == 128);
|
||||||
|
match src {
|
||||||
|
RegMem::Reg { reg } => {
|
||||||
|
if src_ty.is_float() {
|
||||||
|
// Case 1: when moving a scalar float, we simply move from one XMM register
|
||||||
|
// to another, expecting the register allocator to elide this. Here we
|
||||||
|
// assume that the upper bits of a scalar float have not been munged with
|
||||||
|
// (the same assumption the old backend makes).
|
||||||
|
ctx.emit(Inst::gen_move(dst, reg, dst_ty));
|
||||||
|
} else {
|
||||||
|
// Case 2: when moving a scalar value of any other type, use MOVD to zero
|
||||||
|
// the upper lanes.
|
||||||
|
let src_size = match src_ty.bits() {
|
||||||
|
32 => OperandSize::Size32,
|
||||||
|
64 => OperandSize::Size64,
|
||||||
|
_ => unimplemented!("invalid source size for type: {}", src_ty),
|
||||||
|
};
|
||||||
|
ctx.emit(Inst::gpr_to_xmm(SseOpcode::Movd, src, src_size, dst));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
RegMem::Mem { .. } => {
|
||||||
|
// Case 3: when presented with `load + scalar_to_vector`, coalesce into a single
|
||||||
|
// MOVSS/MOVSD instruction.
|
||||||
|
let opcode = match src_ty.bits() {
|
||||||
|
32 => SseOpcode::Movss,
|
||||||
|
64 => SseOpcode::Movsd,
|
||||||
|
_ => unimplemented!("unable to move scalar to vector for type: {}", src_ty),
|
||||||
|
};
|
||||||
|
ctx.emit(Inst::xmm_mov(opcode, src, dst));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Opcode::Splat => {
|
Opcode::Splat => {
|
||||||
let ty = ty.unwrap();
|
let ty = ty.unwrap();
|
||||||
assert_eq!(ty.bits(), 128);
|
assert_eq!(ty.bits(), 128);
|
||||||
|
|||||||
@@ -91,3 +91,34 @@ block0(v0: f64):
|
|||||||
; check: uninit %xmm1
|
; check: uninit %xmm1
|
||||||
; nextln: movsd %xmm0, %xmm1
|
; nextln: movsd %xmm0, %xmm1
|
||||||
; nextln: movlhps %xmm0, %xmm1
|
; nextln: movlhps %xmm0, %xmm1
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
;; load*_zero
|
||||||
|
|
||||||
|
; Verify that a `load` followed by a `scalar_to_vector` (the CLIF translation of `load32_zero`) is
|
||||||
|
; lowered to a single MOVSS instruction.
|
||||||
|
function %load32_zero_coalesced(i64) -> i32x4 {
|
||||||
|
block0(v0: i64):
|
||||||
|
v1 = load.i32 v0
|
||||||
|
v2 = scalar_to_vector.i32x4 v1
|
||||||
|
; check: movss 0(%rdi), %xmm0
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
;; Verify that `scalar_to_vector` (used by `load32_zero`), lowers as expected.
|
||||||
|
function %load32_zero_int(i32) -> i32x4 {
|
||||||
|
block0(v0: i32):
|
||||||
|
v1 = scalar_to_vector.i32x4 v0
|
||||||
|
; check: movd %edi, %xmm0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
function %load32_zero_float(f32) -> f32x4 {
|
||||||
|
block0(v0: f32):
|
||||||
|
v1 = scalar_to_vector.f32x4 v0
|
||||||
|
; regex: MOV=movap*
|
||||||
|
; check: pushq
|
||||||
|
; not: $MOV
|
||||||
|
; check: ret
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user