Fix some 16- and 8-bit behavior in x64 backend related to rotates.
Uncovered by @bjorn3 (thanks!): 8- and 16-bit rotates were not working properly in recent versions of Cranelift with part of the lowering migrated to ISLE. This PR fixes a few issues: - 8- and 16-bit rotate-left needs to mask a constant amount, if any, because we use a 32-bit rotate instruction and so don't get the appropriate shift-amount masking for free from x86 semantics. - `operand_size_from_type` was incorrect: it only handled 32- and 64-bit types and silently returned `OperandSize::Size32` for everything else. Now uses the `OperandSize::from_ty(ty)` helper as the pre-ISLE code did. Our test coverage for narrow value types is not great; this PR adds some runtests for rotl/rotr but more would always be better!
This commit is contained in:
@@ -1,4 +1,4 @@
|
|||||||
src/clif.isle be1359b4b6b153f378517c1dd95cd80f4a6bed0c7b86eaba11c088fd71b7bfe80a3c868ace245b2da0bfbbd6ded262ea9576c8e0eeacbf89d03c34a17a709602
|
src/clif.isle be1359b4b6b153f378517c1dd95cd80f4a6bed0c7b86eaba11c088fd71b7bfe80a3c868ace245b2da0bfbbd6ded262ea9576c8e0eeacbf89d03c34a17a709602
|
||||||
src/prelude.isle d3d2a6a42fb778231a4cdca30995324e1293a9ca8073c5a27a501535759eb51f84a6718322a93dfba4b66ee4f0c9afce7dcec0428516ef0c5bc96e8c8b76925d
|
src/prelude.isle 75a46b97817ad6a4c34e618b81e60876eec6fd1c83ac3ee174851e42045c951644663b2cbc31f1749ce2bc3ad9eb94fb0b877eb2c3bc4885cab7d7e87e9df1d6
|
||||||
src/isa/aarch64/inst.isle 8e4b8e452cf06a368c2e1d930042027a5d3bd690ab46d498d959257e9b4461d17abf244838395cd80da1fe5e2e86fc43855fb5753ca4f1643538c2ae4b3b4a1e
|
src/isa/aarch64/inst.isle 8e4b8e452cf06a368c2e1d930042027a5d3bd690ab46d498d959257e9b4461d17abf244838395cd80da1fe5e2e86fc43855fb5753ca4f1643538c2ae4b3b4a1e
|
||||||
src/isa/aarch64/lower.isle bc3db9c1e6ac186b918cc04f4d26af398f99ec36c8cdc20ec4d02d18dd57dba12e3184fea031b4ac97051c5e194a69666afb5e204807c818e6688c177f9c1b91
|
src/isa/aarch64/lower.isle bc3db9c1e6ac186b918cc04f4d26af398f99ec36c8cdc20ec4d02d18dd57dba12e3184fea031b4ac97051c5e194a69666afb5e204807c818e6688c177f9c1b91
|
||||||
|
|||||||
@@ -34,6 +34,8 @@ pub trait Context {
|
|||||||
fn ty_bits_u16(&mut self, arg0: Type) -> u16;
|
fn ty_bits_u16(&mut self, arg0: Type) -> u16;
|
||||||
fn fits_in_32(&mut self, arg0: Type) -> Option<Type>;
|
fn fits_in_32(&mut self, arg0: Type) -> Option<Type>;
|
||||||
fn fits_in_64(&mut self, arg0: Type) -> Option<Type>;
|
fn fits_in_64(&mut self, arg0: Type) -> Option<Type>;
|
||||||
|
fn ty_32_or_64(&mut self, arg0: Type) -> Option<Type>;
|
||||||
|
fn ty_8_or_16(&mut self, arg0: Type) -> Option<Type>;
|
||||||
fn vec128(&mut self, arg0: Type) -> Option<Type>;
|
fn vec128(&mut self, arg0: Type) -> Option<Type>;
|
||||||
fn not_i64x2(&mut self, arg0: Type) -> Option<()>;
|
fn not_i64x2(&mut self, arg0: Type) -> Option<()>;
|
||||||
fn value_list_slice(&mut self, arg0: ValueList) -> ValueSlice;
|
fn value_list_slice(&mut self, arg0: ValueList) -> ValueSlice;
|
||||||
@@ -49,6 +51,7 @@ pub trait Context {
|
|||||||
fn first_result(&mut self, arg0: Inst) -> Option<Value>;
|
fn first_result(&mut self, arg0: Inst) -> Option<Value>;
|
||||||
fn inst_data(&mut self, arg0: Inst) -> InstructionData;
|
fn inst_data(&mut self, arg0: Inst) -> InstructionData;
|
||||||
fn value_type(&mut self, arg0: Value) -> Type;
|
fn value_type(&mut self, arg0: Value) -> Type;
|
||||||
|
fn ty_bits_mask(&mut self, arg0: Type) -> u64;
|
||||||
fn multi_lane(&mut self, arg0: Type) -> Option<(u8, u16)>;
|
fn multi_lane(&mut self, arg0: Type) -> Option<(u8, u16)>;
|
||||||
fn def_inst(&mut self, arg0: Value) -> Option<Inst>;
|
fn def_inst(&mut self, arg0: Value) -> Option<Inst>;
|
||||||
fn trap_code_division_by_zero(&mut self) -> TrapCode;
|
fn trap_code_division_by_zero(&mut self) -> TrapCode;
|
||||||
@@ -79,13 +82,13 @@ pub trait Context {
|
|||||||
fn safe_divisor_from_imm64(&mut self, arg0: Imm64) -> Option<u64>;
|
fn safe_divisor_from_imm64(&mut self, arg0: Imm64) -> Option<u64>;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Internal type ProducesFlags: defined at src/prelude.isle line 246.
|
/// Internal type ProducesFlags: defined at src/prelude.isle line 259.
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub enum ProducesFlags {
|
pub enum ProducesFlags {
|
||||||
ProducesFlags { inst: MInst, result: Reg },
|
ProducesFlags { inst: MInst, result: Reg },
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Internal type ConsumesFlags: defined at src/prelude.isle line 249.
|
/// Internal type ConsumesFlags: defined at src/prelude.isle line 262.
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub enum ConsumesFlags {
|
pub enum ConsumesFlags {
|
||||||
ConsumesFlags { inst: MInst, result: Reg },
|
ConsumesFlags { inst: MInst, result: Reg },
|
||||||
@@ -999,7 +1002,7 @@ pub fn constructor_with_flags<C: Context>(
|
|||||||
result: pattern3_1,
|
result: pattern3_1,
|
||||||
} = pattern2_0
|
} = pattern2_0
|
||||||
{
|
{
|
||||||
// Rule at src/prelude.isle line 259.
|
// Rule at src/prelude.isle line 272.
|
||||||
let expr0_0 = C::emit(ctx, &pattern1_0);
|
let expr0_0 = C::emit(ctx, &pattern1_0);
|
||||||
let expr1_0 = C::emit(ctx, &pattern3_0);
|
let expr1_0 = C::emit(ctx, &pattern3_0);
|
||||||
let expr2_0 = C::value_regs(ctx, pattern1_1, pattern3_1);
|
let expr2_0 = C::value_regs(ctx, pattern1_1, pattern3_1);
|
||||||
@@ -1027,7 +1030,7 @@ pub fn constructor_with_flags_1<C: Context>(
|
|||||||
result: pattern3_1,
|
result: pattern3_1,
|
||||||
} = pattern2_0
|
} = pattern2_0
|
||||||
{
|
{
|
||||||
// Rule at src/prelude.isle line 267.
|
// Rule at src/prelude.isle line 280.
|
||||||
let expr0_0 = C::emit(ctx, &pattern1_0);
|
let expr0_0 = C::emit(ctx, &pattern1_0);
|
||||||
let expr1_0 = C::emit(ctx, &pattern3_0);
|
let expr1_0 = C::emit(ctx, &pattern3_0);
|
||||||
return Some(pattern3_1);
|
return Some(pattern3_1);
|
||||||
@@ -1061,7 +1064,7 @@ pub fn constructor_with_flags_2<C: Context>(
|
|||||||
result: pattern5_1,
|
result: pattern5_1,
|
||||||
} = pattern4_0
|
} = pattern4_0
|
||||||
{
|
{
|
||||||
// Rule at src/prelude.isle line 277.
|
// Rule at src/prelude.isle line 290.
|
||||||
let expr0_0 = C::emit(ctx, &pattern1_0);
|
let expr0_0 = C::emit(ctx, &pattern1_0);
|
||||||
let expr1_0 = C::emit(ctx, &pattern3_0);
|
let expr1_0 = C::emit(ctx, &pattern3_0);
|
||||||
let expr2_0 = C::emit(ctx, &pattern5_0);
|
let expr2_0 = C::emit(ctx, &pattern5_0);
|
||||||
|
|||||||
@@ -379,6 +379,10 @@
|
|||||||
(decl imm8_from_value (Imm8Reg) Value)
|
(decl imm8_from_value (Imm8Reg) Value)
|
||||||
(extern extractor imm8_from_value imm8_from_value)
|
(extern extractor imm8_from_value imm8_from_value)
|
||||||
|
|
||||||
|
;; Mask an `Imm8Reg.Imm8`.
|
||||||
|
(decl mask_imm8_const (Imm8Reg u64) Imm8Reg)
|
||||||
|
(extern constructor mask_imm8_const mask_imm8_const)
|
||||||
|
|
||||||
;; Extract a constant `RegMemImm.Imm` from a value operand.
|
;; Extract a constant `RegMemImm.Imm` from a value operand.
|
||||||
(decl simm32_from_value (RegMemImm) Value)
|
(decl simm32_from_value (RegMemImm) Value)
|
||||||
(extern extractor simm32_from_value simm32_from_value)
|
(extern extractor simm32_from_value simm32_from_value)
|
||||||
|
|||||||
@@ -633,15 +633,26 @@
|
|||||||
|
|
||||||
;;;; Rules for `rotl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;; Rules for `rotl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
;; `i64` and smaller.
|
;; `i16` and `i8`: we need to extend the shift amount, or mask the
|
||||||
|
;; constant.
|
||||||
|
|
||||||
(rule (lower (has_type (fits_in_64 ty) (rotl src amt)))
|
(rule (lower (has_type (ty_8_or_16 ty) (rotl src amt)))
|
||||||
|
(let ((amt_ Reg (extend_to_reg amt $I32 (ExtendKind.Zero))))
|
||||||
|
(value_reg (m_rotl ty (put_in_reg src) (Imm8Reg.Reg amt_)))))
|
||||||
|
|
||||||
|
(rule (lower (has_type (ty_8_or_16 ty) (rotl src (imm8_from_value amt))))
|
||||||
|
(value_reg (m_rotl ty (put_in_reg src) (mask_imm8_const amt (ty_bits_mask ty)))))
|
||||||
|
|
||||||
|
;; `i64` and `i32`: we can rely on x86's rotate-amount masking since
|
||||||
|
;; we operate on the whole register.
|
||||||
|
|
||||||
|
(rule (lower (has_type (ty_32_or_64 ty) (rotl src amt)))
|
||||||
;; NB: Only the low bits of `amt` matter since we logically mask the
|
;; NB: Only the low bits of `amt` matter since we logically mask the
|
||||||
;; shift amount to the value's bit width.
|
;; shift amount to the value's bit width.
|
||||||
(let ((amt_ Reg (lo_reg amt)))
|
(let ((amt_ Reg (lo_reg amt)))
|
||||||
(value_reg (m_rotl ty (put_in_reg src) (Imm8Reg.Reg amt_)))))
|
(value_reg (m_rotl ty (put_in_reg src) (Imm8Reg.Reg amt_)))))
|
||||||
|
|
||||||
(rule (lower (has_type (fits_in_64 ty) (rotl src (imm8_from_value amt))))
|
(rule (lower (has_type (ty_32_or_64 ty) (rotl src (imm8_from_value amt))))
|
||||||
(value_reg (m_rotl ty (put_in_reg src) amt)))
|
(value_reg (m_rotl ty (put_in_reg src) amt)))
|
||||||
|
|
||||||
;; `i128`.
|
;; `i128`.
|
||||||
|
|||||||
@@ -57,11 +57,7 @@ where
|
|||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn operand_size_of_type(&mut self, ty: Type) -> OperandSize {
|
fn operand_size_of_type(&mut self, ty: Type) -> OperandSize {
|
||||||
if ty.bits() == 64 {
|
OperandSize::from_ty(ty)
|
||||||
OperandSize::Size64
|
|
||||||
} else {
|
|
||||||
OperandSize::Size32
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn put_in_reg_mem(&mut self, val: Value) -> RegMem {
|
fn put_in_reg_mem(&mut self, val: Value) -> RegMem {
|
||||||
@@ -125,6 +121,16 @@ where
|
|||||||
Some(Imm8Reg::Imm8 { imm })
|
Some(Imm8Reg::Imm8 { imm })
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn mask_imm8_const(&mut self, imm8: &Imm8Reg, mask: u64) -> Imm8Reg {
|
||||||
|
match imm8 {
|
||||||
|
&Imm8Reg::Reg { reg } => Imm8Reg::Reg { reg },
|
||||||
|
&Imm8Reg::Imm8 { imm } => Imm8Reg::Imm8 {
|
||||||
|
imm: imm & (mask as u8),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn simm32_from_value(&mut self, val: Value) -> Option<RegMemImm> {
|
fn simm32_from_value(&mut self, val: Value) -> Option<RegMemImm> {
|
||||||
let inst = self.lower_ctx.dfg().value_def(val).inst()?;
|
let inst = self.lower_ctx.dfg().value_def(val).inst()?;
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
src/clif.isle be1359b4b6b153f378517c1dd95cd80f4a6bed0c7b86eaba11c088fd71b7bfe80a3c868ace245b2da0bfbbd6ded262ea9576c8e0eeacbf89d03c34a17a709602
|
src/clif.isle be1359b4b6b153f378517c1dd95cd80f4a6bed0c7b86eaba11c088fd71b7bfe80a3c868ace245b2da0bfbbd6ded262ea9576c8e0eeacbf89d03c34a17a709602
|
||||||
src/prelude.isle d3d2a6a42fb778231a4cdca30995324e1293a9ca8073c5a27a501535759eb51f84a6718322a93dfba4b66ee4f0c9afce7dcec0428516ef0c5bc96e8c8b76925d
|
src/prelude.isle 75a46b97817ad6a4c34e618b81e60876eec6fd1c83ac3ee174851e42045c951644663b2cbc31f1749ce2bc3ad9eb94fb0b877eb2c3bc4885cab7d7e87e9df1d6
|
||||||
src/isa/x64/inst.isle b151120df3c356ac697122a8557becd8857eb725851506e844edeb85d831d461322a96d280ad84f9a23518e1e4efb607aebc0e249004148675e4cc19e89f0655
|
src/isa/x64/inst.isle 3b9c5c81e40b4de04169ac10e5b57d8de14dfefb104e565d24a303e6ccf28416acbdf585b1cae5a90c7e37310d7cdc1534054202597a7b8d2181c8eece08c29e
|
||||||
src/isa/x64/lower.isle c9b408df0a089fb4f207838973ac775b0f9b56c86f056867c28e6bae317873d3844f74f713f9acd6fed98d3d11a2f9d19d392fe5049169dad33b1fc703b9b766
|
src/isa/x64/lower.isle c7943201b32e9eb9726466e8cc417f7e84c4c4052de31e05ab6e0ad7502a587cf1d7d9835703c4ff5a506390f7a0668741e7f3feaa1edda6396571a425949fc9
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -95,6 +95,11 @@ macro_rules! isle_prelude_methods {
|
|||||||
ty.bits().try_into().unwrap()
|
ty.bits().try_into().unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn ty_bits_mask(&mut self, ty: Type) -> u64 {
|
||||||
|
(1 << (self.ty_bits(ty) as u64)) - 1
|
||||||
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn ty_bits_u16(&mut self, ty: Type) -> u16 {
|
fn ty_bits_u16(&mut self, ty: Type) -> u16 {
|
||||||
ty.bits()
|
ty.bits()
|
||||||
@@ -118,6 +123,24 @@ macro_rules! isle_prelude_methods {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn ty_32_or_64(&mut self, ty: Type) -> Option<Type> {
|
||||||
|
if ty.bits() == 32 || ty.bits() == 64 {
|
||||||
|
Some(ty)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn ty_8_or_16(&mut self, ty: Type) -> Option<Type> {
|
||||||
|
if ty.bits() == 8 || ty.bits() == 16 {
|
||||||
|
Some(ty)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn vec128(&mut self, ty: Type) -> Option<Type> {
|
fn vec128(&mut self, ty: Type) -> Option<Type> {
|
||||||
if ty.is_vector() && ty.bits() == 128 {
|
if ty.is_vector() && ty.bits() == 128 {
|
||||||
Some(ty)
|
Some(ty)
|
||||||
|
|||||||
@@ -156,6 +156,14 @@
|
|||||||
(decl fits_in_64 (Type) Type)
|
(decl fits_in_64 (Type) Type)
|
||||||
(extern extractor fits_in_64 fits_in_64)
|
(extern extractor fits_in_64 fits_in_64)
|
||||||
|
|
||||||
|
;; An extractor that maches 32- and 64-bit types only.
|
||||||
|
(decl ty_32_or_64 (Type) Type)
|
||||||
|
(extern extractor ty_32_or_64 ty_32_or_64)
|
||||||
|
|
||||||
|
;; An extractor that maches 8- and 16-bit types only.
|
||||||
|
(decl ty_8_or_16 (Type) Type)
|
||||||
|
(extern extractor ty_8_or_16 ty_8_or_16)
|
||||||
|
|
||||||
;; An extractor that only matches 128-bit vector types.
|
;; An extractor that only matches 128-bit vector types.
|
||||||
(decl vec128 (Type) Type)
|
(decl vec128 (Type) Type)
|
||||||
(extern extractor vec128 vec128)
|
(extern extractor vec128 vec128)
|
||||||
@@ -230,6 +238,11 @@
|
|||||||
(and (result_type ty)
|
(and (result_type ty)
|
||||||
inst))
|
inst))
|
||||||
|
|
||||||
|
;; Return a bitmask that will mask off a count to be within `ty`'s
|
||||||
|
;; bit-width. Used for shifts/rotates.
|
||||||
|
(decl ty_bits_mask (Type) u64)
|
||||||
|
(extern constructor ty_bits_mask ty_bits_mask)
|
||||||
|
|
||||||
;; Match a multi-lane type, extracting (# bits per lane, # lanes) from the given
|
;; Match a multi-lane type, extracting (# bits per lane, # lanes) from the given
|
||||||
;; type. Will only match when there is more than one lane.
|
;; type. Will only match when there is more than one lane.
|
||||||
(decl multi_lane (u8 u16) Type)
|
(decl multi_lane (u8 u16) Type)
|
||||||
|
|||||||
@@ -318,3 +318,25 @@ block0(v0: i32, v1: i8):
|
|||||||
; run: %sshr_i32_i8(0x40000000, 32) == 0x40000000
|
; run: %sshr_i32_i8(0x40000000, 32) == 0x40000000
|
||||||
; run: %sshr_i32_i8(0x40000000, 33) == 0x20000000
|
; run: %sshr_i32_i8(0x40000000, 33) == 0x20000000
|
||||||
; run: %sshr_i32_i8(0x40000000, 34) == 0x10000000
|
; run: %sshr_i32_i8(0x40000000, 34) == 0x10000000
|
||||||
|
|
||||||
|
function %rotl_i8_const_37(i8) -> i8 {
|
||||||
|
block0(v0: i8):
|
||||||
|
v1 = iconst.i8 37
|
||||||
|
v2 = rotl.i8 v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; run: %rotl_i8_const_37(0x00) == 0x00
|
||||||
|
; run: %rotl_i8_const_37(0x01) == 0x20
|
||||||
|
; run: %rotl_i8_const_37(0x12) == 0x42
|
||||||
|
|
||||||
|
function %rotr_i8_const_37(i8) -> i8 {
|
||||||
|
block0(v0: i8):
|
||||||
|
v1 = iconst.i8 37
|
||||||
|
v2 = rotr.i8 v0, v1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
; run: %rotr_i8_const_37(0x00) == 0x00
|
||||||
|
; run: %rotr_i8_const_37(0x01) == 0x08
|
||||||
|
; run: %rotr_i8_const_37(0x12) == 0x90
|
||||||
|
|||||||
Reference in New Issue
Block a user