Bind constant vectors to vconst; fixes #1052 (#1217)

This commit is contained in:
Andrew Brown
2019-11-12 15:57:59 -08:00
committed by Dan Gohman
parent 9080a02e10
commit 04db2a9f39
2 changed files with 9 additions and 14 deletions

View File

@@ -2,7 +2,6 @@ use crate::cdsl::ast::{constant, var, ExprBuilder, Literal};
use crate::cdsl::instructions::{vector, Bindable, InstructionGroup}; use crate::cdsl::instructions::{vector, Bindable, InstructionGroup};
use crate::cdsl::types::{LaneType, ValueType}; use crate::cdsl::types::{LaneType, ValueType};
use crate::cdsl::xform::TransformGroupBuilder; use crate::cdsl::xform::TransformGroupBuilder;
use crate::shared::types::Float::F64;
use crate::shared::types::Int::{I16, I32, I64, I8}; use crate::shared::types::Int::{I16, I32, I64, I8};
use crate::shared::Definitions as SharedDefinitions; use crate::shared::Definitions as SharedDefinitions;
@@ -30,7 +29,6 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
let clz = insts.by_name("clz"); let clz = insts.by_name("clz");
let ctz = insts.by_name("ctz"); let ctz = insts.by_name("ctz");
let extractlane = insts.by_name("extractlane"); let extractlane = insts.by_name("extractlane");
let f64const = insts.by_name("f64const");
let fcmp = insts.by_name("fcmp"); let fcmp = insts.by_name("fcmp");
let fcvt_from_uint = insts.by_name("fcvt_from_uint"); let fcvt_from_uint = insts.by_name("fcvt_from_uint");
let fcvt_to_sint = insts.by_name("fcvt_to_sint"); let fcvt_to_sint = insts.by_name("fcvt_to_sint");
@@ -332,7 +330,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
// SIMD // SIMD
let uimm8_zero = Literal::constant(&imm.uimm8, 0x00); let uimm8_zero = Literal::constant(&imm.uimm8, 0x00);
let uimm8_one = Literal::constant(&imm.uimm8, 0x01); let uimm8_one = Literal::constant(&imm.uimm8, 0x01);
let ieee64_zero = Literal::constant(&imm.ieee64, 0x00); let u128_zeroes = constant(vec![0x00; 16]);
let b = var("b"); let b = var("b");
let c = var("c"); let c = var("c");
let d = var("d"); let d = var("d");
@@ -344,15 +342,14 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
// SIMD splat: 8-bits // SIMD splat: 8-bits
for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 8) { for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 8) {
let splat_any8x16 = splat.bind(vector(ty, sse_vector_size)); let splat_any8x16 = splat.bind(vector(ty, sse_vector_size));
let bitcast_f64_to_any8x16 = raw_bitcast.bind(vector(ty, sse_vector_size)).bind(F64);
narrow.legalize( narrow.legalize(
def!(y = splat_any8x16(x)), def!(y = splat_any8x16(x)),
vec![ vec![
def!(a = scalar_to_vector(x)), // move into the lowest 8 bits of an XMM register def!(a = scalar_to_vector(x)), // move into the lowest 8 bits of an XMM register
// TODO replace the following two instructions with `vconst(0)` when this is possible; see https://github.com/bytecodealliance/cranelift/issues/1052 def!(b = vconst(u128_zeroes)), // zero out a different XMM register; the shuffle mask
def!(b = f64const(ieee64_zero)), // zero out a different XMM register; the shuffle mask for moving the lowest byte to all other byte lanes is 0x0 // for moving the lowest byte to all other byte lanes is 0x0
def!(c = bitcast_f64_to_any8x16(b)), // no instruction emitted; informs the SSA that the 0 in b can be used as a vector of this type def!(y = x86_pshufb(a, b)), // PSHUFB takes two XMM operands, one of which is a
def!(y = x86_pshufb(a, c)), // PSHUFB takes two XMM operands, one of which is a shuffle mask (i.e. b) // shuffle mask (i.e. b)
], ],
); );
} }
@@ -466,7 +463,6 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
} }
// SIMD vall_true // SIMD vall_true
let zeroes = constant(vec![0x00; 16]);
let eq = Literal::enumerator_for(&imm.intcc, "eq"); let eq = Literal::enumerator_for(&imm.intcc, "eq");
for ty in ValueType::all_lane_types().filter(allowed_simd_type) { for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
let vall_true = vall_true.bind(vector(ty, sse_vector_size)); let vall_true = vall_true.bind(vector(ty, sse_vector_size));
@@ -475,7 +471,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
narrow.legalize( narrow.legalize(
def!(y = vall_true(x)), def!(y = vall_true(x)),
vec![ vec![
def!(a = vconst(zeroes)), def!(a = vconst(u128_zeroes)),
def!(c = icmp(eq, x, a)), def!(c = icmp(eq, x, a)),
def!(d = x86_ptest(c, c)), def!(d = x86_ptest(c, c)),
def!(y = trueif(eq, d)), def!(y = trueif(eq, d)),
@@ -489,7 +485,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
narrow.legalize( narrow.legalize(
def!(y = vall_true(x)), def!(y = vall_true(x)),
vec![ vec![
def!(a = vconst(zeroes)), def!(a = vconst(u128_zeroes)),
def!(b = raw_bitcast_to_int(x)), def!(b = raw_bitcast_to_int(x)),
def!(c = icmp(eq, b, a)), def!(c = icmp(eq, b, a)),
def!(d = x86_ptest(c, c)), def!(d = x86_ptest(c, c)),

View File

@@ -67,7 +67,6 @@ ebb0:
; nextln: v2 = iconst.i32 42 ; nextln: v2 = iconst.i32 42
; nextln: v0 = ireduce.i8 v2 ; nextln: v0 = ireduce.i8 v2
; nextln: v3 = scalar_to_vector.i8x16 v0 ; nextln: v3 = scalar_to_vector.i8x16 v0
; nextln: v4 = f64const 0.0 ; nextln: v4 = vconst.i8x16 0x00
; nextln: v5 = raw_bitcast.i8x16 v4 ; nextln: v1 = x86_pshufb v3, v4
; nextln: v1 = x86_pshufb v3, v5
; nextln: return v1 ; nextln: return v1