[machinst x64]: refactor using added RegMem::from(Writable<Reg>)
This commit is contained in:
@@ -5,7 +5,7 @@ use super::EmitState;
|
|||||||
use crate::ir::condcodes::{FloatCC, IntCC};
|
use crate::ir::condcodes::{FloatCC, IntCC};
|
||||||
use crate::machinst::*;
|
use crate::machinst::*;
|
||||||
use core::fmt::Debug;
|
use core::fmt::Debug;
|
||||||
use regalloc::{RealRegUniverse, Reg, RegClass, RegUsageCollector, RegUsageMapper};
|
use regalloc::{RealRegUniverse, Reg, RegClass, RegUsageCollector, RegUsageMapper, Writable};
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::string::{String, ToString};
|
use std::string::{String, ToString};
|
||||||
|
|
||||||
@@ -265,6 +265,12 @@ impl RegMem {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl From<Writable<Reg>> for RegMem {
|
||||||
|
fn from(r: Writable<Reg>) -> Self {
|
||||||
|
RegMem::reg(r.to_reg())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl ShowWithRRU for RegMem {
|
impl ShowWithRRU for RegMem {
|
||||||
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
|
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
|
||||||
self.show_rru_sized(mb_rru, 8)
|
self.show_rru_sized(mb_rru, 8)
|
||||||
|
|||||||
@@ -2685,8 +2685,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
// After loading the constructed mask in a temporary register, we use this to
|
// After loading the constructed mask in a temporary register, we use this to
|
||||||
// shuffle the `dst` register (remember that, in this case, it is the same as
|
// shuffle the `dst` register (remember that, in this case, it is the same as
|
||||||
// `src` so we disregard this register).
|
// `src` so we disregard this register).
|
||||||
let tmp = RegMem::reg(tmp.to_reg());
|
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pshufb, RegMem::from(tmp), dst));
|
||||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pshufb, tmp, dst));
|
|
||||||
} else {
|
} else {
|
||||||
// If `lhs` and `rhs` are different, we must shuffle each separately and then OR
|
// If `lhs` and `rhs` are different, we must shuffle each separately and then OR
|
||||||
// them together. This is necessary due to PSHUFB semantics. As in the case above,
|
// them together. This is necessary due to PSHUFB semantics. As in the case above,
|
||||||
@@ -2698,8 +2697,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
let constructed_mask = mask.iter().cloned().map(zero_unknown_lane_index).collect();
|
let constructed_mask = mask.iter().cloned().map(zero_unknown_lane_index).collect();
|
||||||
let tmp1 = ctx.alloc_tmp(RegClass::V128, types::I8X16);
|
let tmp1 = ctx.alloc_tmp(RegClass::V128, types::I8X16);
|
||||||
ctx.emit(Inst::xmm_load_const_seq(constructed_mask, tmp1, ty));
|
ctx.emit(Inst::xmm_load_const_seq(constructed_mask, tmp1, ty));
|
||||||
let tmp1 = RegMem::reg(tmp1.to_reg());
|
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pshufb, RegMem::from(tmp1), tmp0));
|
||||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pshufb, tmp1, tmp0));
|
|
||||||
|
|
||||||
// PSHUFB the second argument, placing zeroes for unused lanes.
|
// PSHUFB the second argument, placing zeroes for unused lanes.
|
||||||
let constructed_mask = mask
|
let constructed_mask = mask
|
||||||
@@ -2709,13 +2707,11 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
.collect();
|
.collect();
|
||||||
let tmp2 = ctx.alloc_tmp(RegClass::V128, types::I8X16);
|
let tmp2 = ctx.alloc_tmp(RegClass::V128, types::I8X16);
|
||||||
ctx.emit(Inst::xmm_load_const_seq(constructed_mask, tmp2, ty));
|
ctx.emit(Inst::xmm_load_const_seq(constructed_mask, tmp2, ty));
|
||||||
let tmp2 = RegMem::reg(tmp2.to_reg());
|
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pshufb, RegMem::from(tmp2), dst));
|
||||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pshufb, tmp2, dst));
|
|
||||||
|
|
||||||
// OR the shuffled registers (the mechanism and lane-size for OR-ing the registers
|
// OR the shuffled registers (the mechanism and lane-size for OR-ing the registers
|
||||||
// is not important).
|
// is not important).
|
||||||
let tmp0 = RegMem::reg(tmp0.to_reg());
|
ctx.emit(Inst::xmm_rm_r(SseOpcode::Orps, RegMem::from(tmp0), dst));
|
||||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Orps, tmp0, dst));
|
|
||||||
|
|
||||||
// TODO when AVX512 is enabled we should replace this sequence with a single VPERMB
|
// TODO when AVX512 is enabled we should replace this sequence with a single VPERMB
|
||||||
}
|
}
|
||||||
@@ -2744,13 +2740,19 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
ctx.emit(Inst::xmm_load_const_seq(zero_mask_value, zero_mask, ty));
|
ctx.emit(Inst::xmm_load_const_seq(zero_mask_value, zero_mask, ty));
|
||||||
|
|
||||||
// Use the `zero_mask` on a writable `swizzle_mask`.
|
// Use the `zero_mask` on a writable `swizzle_mask`.
|
||||||
let zero_mask = RegMem::reg(zero_mask.to_reg());
|
|
||||||
let swizzle_mask = Writable::from_reg(swizzle_mask);
|
let swizzle_mask = Writable::from_reg(swizzle_mask);
|
||||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Paddusb, zero_mask, swizzle_mask));
|
ctx.emit(Inst::xmm_rm_r(
|
||||||
|
SseOpcode::Paddusb,
|
||||||
|
RegMem::from(zero_mask),
|
||||||
|
swizzle_mask,
|
||||||
|
));
|
||||||
|
|
||||||
// Shuffle `dst` using the fixed-up `swizzle_mask`.
|
// Shuffle `dst` using the fixed-up `swizzle_mask`.
|
||||||
let swizzle_mask = RegMem::reg(swizzle_mask.to_reg());
|
ctx.emit(Inst::xmm_rm_r(
|
||||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pshufb, swizzle_mask, dst));
|
SseOpcode::Pshufb,
|
||||||
|
RegMem::from(swizzle_mask),
|
||||||
|
dst,
|
||||||
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::Insertlane => {
|
Opcode::Insertlane => {
|
||||||
|
|||||||
Reference in New Issue
Block a user