Avoid extra register movement when lowering the x86 scalar_to_vector of a float value
This commit is contained in:
@@ -264,6 +264,27 @@ impl LaneType {
|
|||||||
ValueType::Vector(VectorType::new(*self, lanes.into()))
|
ValueType::Vector(VectorType::new(*self, lanes.into()))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn is_float(&self) -> bool {
|
||||||
|
match self {
|
||||||
|
LaneType::FloatType(_) => true,
|
||||||
|
_ => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_int(&self) -> bool {
|
||||||
|
match self {
|
||||||
|
LaneType::IntType(_) => true,
|
||||||
|
_ => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_bool(&self) -> bool {
|
||||||
|
match self {
|
||||||
|
LaneType::BoolType(_) => true,
|
||||||
|
_ => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Display for LaneType {
|
impl fmt::Display for LaneType {
|
||||||
|
|||||||
@@ -4,8 +4,8 @@ use std::collections::HashMap;
|
|||||||
|
|
||||||
use crate::cdsl::encodings::{Encoding, EncodingBuilder};
|
use crate::cdsl::encodings::{Encoding, EncodingBuilder};
|
||||||
use crate::cdsl::instructions::{
|
use crate::cdsl::instructions::{
|
||||||
BoundInstruction, InstSpec, Instruction, InstructionGroup, InstructionPredicate,
|
InstSpec, Instruction, InstructionGroup, InstructionPredicate, InstructionPredicateNode,
|
||||||
InstructionPredicateNode, InstructionPredicateRegistry,
|
InstructionPredicateRegistry,
|
||||||
};
|
};
|
||||||
use crate::cdsl::recipes::{EncodingRecipe, EncodingRecipeNumber, Recipes};
|
use crate::cdsl::recipes::{EncodingRecipe, EncodingRecipeNumber, Recipes};
|
||||||
use crate::cdsl::settings::{SettingGroup, SettingPredicateNumber};
|
use crate::cdsl::settings::{SettingGroup, SettingPredicateNumber};
|
||||||
@@ -279,6 +279,17 @@ impl PerCpuModeEncodings {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Add the same encoding/recipe pairing to both X86_32 and X86_64
|
||||||
|
fn enc_32_64_rec(
|
||||||
|
&mut self,
|
||||||
|
inst: impl Clone + Into<InstSpec>,
|
||||||
|
recipe: &EncodingRecipe,
|
||||||
|
bits: u16,
|
||||||
|
) {
|
||||||
|
self.enc32_rec(inst.clone(), recipe, bits);
|
||||||
|
self.enc64_rec(inst, recipe, bits);
|
||||||
|
}
|
||||||
|
|
||||||
/// Add the same encoding to both X86_32 and X86_64; assumes configuration (e.g. REX, operand
|
/// Add the same encoding to both X86_32 and X86_64; assumes configuration (e.g. REX, operand
|
||||||
/// binding) has already happened.
|
/// binding) has already happened.
|
||||||
fn enc_32_64_maybe_isap(
|
fn enc_32_64_maybe_isap(
|
||||||
@@ -1761,6 +1772,9 @@ pub(crate) fn define(
|
|||||||
// written to the low doubleword of the register and the regiser is zero-extended to 128 bits."
|
// written to the low doubleword of the register and the regiser is zero-extended to 128 bits."
|
||||||
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||||
let instruction = scalar_to_vector.bind_vector_from_lane(ty, sse_vector_size);
|
let instruction = scalar_to_vector.bind_vector_from_lane(ty, sse_vector_size);
|
||||||
|
if ty.is_float() {
|
||||||
|
e.enc_32_64_rec(instruction, rec_null_fpr, 0);
|
||||||
|
} else {
|
||||||
let template = rec_frurm.opcodes(vec![0x66, 0x0f, 0x6e]); // MOVD/MOVQ
|
let template = rec_frurm.opcodes(vec![0x66, 0x0f, 0x6e]); // MOVD/MOVQ
|
||||||
if ty.lane_bits() < 64 {
|
if ty.lane_bits() < 64 {
|
||||||
// no 32-bit encodings for 64-bit widths
|
// no 32-bit encodings for 64-bit widths
|
||||||
@@ -1768,6 +1782,7 @@ pub(crate) fn define(
|
|||||||
}
|
}
|
||||||
e.enc_x86_64(instruction, template);
|
e.enc_x86_64(instruction, template);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// SIMD insertlane
|
// SIMD insertlane
|
||||||
let mut insertlane_mapping: HashMap<u64, (Vec<u8>, Option<SettingPredicateNumber>)> =
|
let mut insertlane_mapping: HashMap<u64, (Vec<u8>, Option<SettingPredicateNumber>)> =
|
||||||
@@ -1811,37 +1826,34 @@ pub(crate) fn define(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// helper for generating null encodings for FPRs on both 32- and 64-bit architectures
|
|
||||||
let mut null_encode_32_64 = |instruction: BoundInstruction| {
|
|
||||||
e.enc32_rec(instruction.clone(), rec_null_fpr, 0);
|
|
||||||
e.enc64_rec(instruction, rec_null_fpr, 0);
|
|
||||||
};
|
|
||||||
|
|
||||||
// SIMD bitcast all 128-bit vectors to each other (for legalizing splat.x16x8)
|
// SIMD bitcast all 128-bit vectors to each other (for legalizing splat.x16x8)
|
||||||
for from_type in ValueType::all_lane_types().filter(allowed_simd_type) {
|
for from_type in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||||
for to_type in
|
for to_type in
|
||||||
ValueType::all_lane_types().filter(|t| allowed_simd_type(t) && *t != from_type)
|
ValueType::all_lane_types().filter(|t| allowed_simd_type(t) && *t != from_type)
|
||||||
{
|
{
|
||||||
null_encode_32_64(
|
let instruction = raw_bitcast
|
||||||
raw_bitcast
|
|
||||||
.bind_vector_from_lane(to_type, sse_vector_size)
|
.bind_vector_from_lane(to_type, sse_vector_size)
|
||||||
.bind_vector_from_lane(from_type, sse_vector_size),
|
.bind_vector_from_lane(from_type, sse_vector_size);
|
||||||
);
|
e.enc_32_64_rec(instruction, rec_null_fpr, 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// SIMD raw bitcast floats to vector (and back); assumes that floats are already stored in an XMM register
|
// SIMD raw bitcast floats to vector (and back); assumes that floats are already stored in an XMM register
|
||||||
for float_type in &[F32, F64] {
|
for float_type in &[F32, F64] {
|
||||||
for lane_type in ValueType::all_lane_types().filter(allowed_simd_type) {
|
for lane_type in ValueType::all_lane_types().filter(allowed_simd_type) {
|
||||||
null_encode_32_64(
|
e.enc_32_64_rec(
|
||||||
raw_bitcast
|
raw_bitcast
|
||||||
.bind_vector_from_lane(lane_type, sse_vector_size)
|
.bind_vector_from_lane(lane_type, sse_vector_size)
|
||||||
.bind(*float_type),
|
.bind(*float_type),
|
||||||
|
rec_null_fpr,
|
||||||
|
0,
|
||||||
);
|
);
|
||||||
null_encode_32_64(
|
e.enc_32_64_rec(
|
||||||
raw_bitcast
|
raw_bitcast
|
||||||
.bind(*float_type)
|
.bind(*float_type)
|
||||||
.bind_vector_from_lane(lane_type, sse_vector_size),
|
.bind_vector_from_lane(lane_type, sse_vector_size),
|
||||||
|
rec_null_fpr,
|
||||||
|
0,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -24,8 +24,7 @@ ebb0:
|
|||||||
v0 = f32const 0x42.42
|
v0 = f32const 0x42.42
|
||||||
v1 = vconst.f32x4 [0x00.00 0x00.00 0x00.00 0x42.42]
|
v1 = vconst.f32x4 [0x00.00 0x00.00 0x00.00 0x42.42]
|
||||||
v2 = extractlane v1, 3
|
v2 = extractlane v1, 3
|
||||||
v10 = f32const 0x42.42 ; TODO this should not be necessary, v0 should be re-usable
|
v3 = fcmp eq v2, v0
|
||||||
v3 = fcmp eq v2, v10
|
|
||||||
return v3
|
return v3
|
||||||
}
|
}
|
||||||
; run
|
; run
|
||||||
|
|||||||
@@ -17,10 +17,10 @@ ebb0:
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
function %test_scalar_to_vector_f32() {
|
function %test_scalar_to_vector_b32() {
|
||||||
ebb0:
|
ebb0:
|
||||||
[-, %rcx] v0 = f32const 0x0.42
|
[-, %rcx] v0 = bconst.b32 false
|
||||||
[-, %xmm3] v1 = scalar_to_vector.f32x4 v0 ; bin: 66 0f 6e d9
|
[-, %xmm3] v1 = scalar_to_vector.b32x4 v0 ; bin: 66 0f 6e d9
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -0,0 +1,19 @@
|
|||||||
|
test compile
|
||||||
|
set opt_level=best
|
||||||
|
set probestack_enabled=false
|
||||||
|
set enable_simd
|
||||||
|
target x86_64
|
||||||
|
|
||||||
|
; ensure that scalar_to_vector emits no instructions for floats (already exist in an XMM register)
|
||||||
|
function %test_scalar_to_vector_f32() -> f32x4 baldrdash_system_v {
|
||||||
|
ebb0:
|
||||||
|
v0 = f32const 0x0.42
|
||||||
|
v1 = scalar_to_vector.f32x4 v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
|
||||||
|
; check: ebb0
|
||||||
|
; nextln: v2 = iconst.i32 0x3e84_0000
|
||||||
|
; nextln: v0 = bitcast.f32 v2
|
||||||
|
; nextln: [null_fpr#00,%xmm0] v1 = scalar_to_vector.f32x4 v0
|
||||||
|
; nextln: return v1
|
||||||
Reference in New Issue
Block a user