s390x: Add z14 support
* Add support for processor features (including auto-detection). * Move base architecture set requirement back to z14. * Add z15 feature sets and re-enable z15-specific code generation when required features are available.
This commit is contained in:
@@ -5,6 +5,7 @@ use crate::ir::Inst as IRInst;
|
||||
use crate::ir::{types, Endianness, InstructionData, MemFlags, Opcode, TrapCode, Type};
|
||||
use crate::isa::s390x::abi::*;
|
||||
use crate::isa::s390x::inst::*;
|
||||
use crate::isa::s390x::settings as s390x_settings;
|
||||
use crate::isa::s390x::S390xBackend;
|
||||
use crate::machinst::lower::*;
|
||||
use crate::machinst::*;
|
||||
@@ -548,6 +549,70 @@ fn lower_constant_f64<C: LowerCtx<I = Inst>>(ctx: &mut C, rd: Writable<Reg>, val
|
||||
ctx.emit(Inst::load_fp_constant64(rd, value));
|
||||
}
|
||||
|
||||
//============================================================================
|
||||
// Lowering: miscellaneous helpers.
|
||||
|
||||
/// Emit code to invert the value of type ty in register rd.
|
||||
fn lower_bnot<C: LowerCtx<I = Inst>>(ctx: &mut C, ty: Type, rd: Writable<Reg>) {
|
||||
let alu_op = choose_32_64(ty, ALUOp::Xor32, ALUOp::Xor64);
|
||||
ctx.emit(Inst::AluRUImm32Shifted {
|
||||
alu_op,
|
||||
rd,
|
||||
imm: UImm32Shifted::maybe_from_u64(0xffff_ffff).unwrap(),
|
||||
});
|
||||
if ty_bits(ty) > 32 {
|
||||
ctx.emit(Inst::AluRUImm32Shifted {
|
||||
alu_op,
|
||||
rd,
|
||||
imm: UImm32Shifted::maybe_from_u64(0xffff_ffff_0000_0000).unwrap(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/// Emit code to bitcast between integer and floating-point values.
|
||||
fn lower_bitcast<C: LowerCtx<I = Inst>>(
|
||||
ctx: &mut C,
|
||||
rd: Writable<Reg>,
|
||||
output_ty: Type,
|
||||
rn: Reg,
|
||||
input_ty: Type,
|
||||
) {
|
||||
match (input_ty, output_ty) {
|
||||
(types::I64, types::F64) => {
|
||||
ctx.emit(Inst::MovToFpr { rd, rn });
|
||||
}
|
||||
(types::F64, types::I64) => {
|
||||
ctx.emit(Inst::MovFromFpr { rd, rn });
|
||||
}
|
||||
(types::I32, types::F32) => {
|
||||
let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap();
|
||||
ctx.emit(Inst::ShiftRR {
|
||||
shift_op: ShiftOp::LShL64,
|
||||
rd: tmp,
|
||||
rn,
|
||||
shift_imm: SImm20::maybe_from_i64(32).unwrap(),
|
||||
shift_reg: None,
|
||||
});
|
||||
ctx.emit(Inst::MovToFpr {
|
||||
rd,
|
||||
rn: tmp.to_reg(),
|
||||
});
|
||||
}
|
||||
(types::F32, types::I32) => {
|
||||
let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap();
|
||||
ctx.emit(Inst::MovFromFpr { rd: tmp, rn });
|
||||
ctx.emit(Inst::ShiftRR {
|
||||
shift_op: ShiftOp::LShR64,
|
||||
rd,
|
||||
rn: tmp.to_reg(),
|
||||
shift_imm: SImm20::maybe_from_i64(32).unwrap(),
|
||||
shift_reg: None,
|
||||
});
|
||||
}
|
||||
_ => unreachable!("invalid bitcast from {:?} to {:?}", input_ty, output_ty),
|
||||
}
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// Lowering: comparisons
|
||||
|
||||
@@ -760,6 +825,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
ctx: &mut C,
|
||||
insn: IRInst,
|
||||
flags: &Flags,
|
||||
isa_flags: &s390x_settings::Flags,
|
||||
) -> CodegenResult<()> {
|
||||
let op = ctx.data(insn).opcode();
|
||||
let inputs: SmallVec<[InsnInput; 4]> = (0..ctx.num_inputs(insn))
|
||||
@@ -1447,15 +1513,19 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
|
||||
Opcode::Bnot => {
|
||||
let ty = ty.unwrap();
|
||||
let alu_op = choose_32_64(ty, ALUOp::OrrNot32, ALUOp::OrrNot64);
|
||||
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
ctx.emit(Inst::AluRRR {
|
||||
alu_op,
|
||||
rd,
|
||||
rn,
|
||||
rm: rn,
|
||||
});
|
||||
if isa_flags.has_mie2() {
|
||||
ctx.emit(Inst::AluRRR {
|
||||
alu_op: choose_32_64(ty, ALUOp::OrrNot32, ALUOp::OrrNot64),
|
||||
rd,
|
||||
rn,
|
||||
rm: rn,
|
||||
});
|
||||
} else {
|
||||
ctx.emit(Inst::gen_move(rd, rn, ty));
|
||||
lower_bnot(ctx, ty, rd);
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::Band => {
|
||||
@@ -1517,16 +1587,22 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
|
||||
Opcode::BandNot | Opcode::BorNot | Opcode::BxorNot => {
|
||||
let ty = ty.unwrap();
|
||||
let alu_op = match op {
|
||||
Opcode::BandNot => choose_32_64(ty, ALUOp::AndNot32, ALUOp::AndNot64),
|
||||
Opcode::BorNot => choose_32_64(ty, ALUOp::OrrNot32, ALUOp::OrrNot64),
|
||||
Opcode::BxorNot => choose_32_64(ty, ALUOp::XorNot32, ALUOp::XorNot64),
|
||||
let alu_op = match (op, isa_flags.has_mie2()) {
|
||||
(Opcode::BandNot, true) => choose_32_64(ty, ALUOp::AndNot32, ALUOp::AndNot64),
|
||||
(Opcode::BorNot, true) => choose_32_64(ty, ALUOp::OrrNot32, ALUOp::OrrNot64),
|
||||
(Opcode::BxorNot, true) => choose_32_64(ty, ALUOp::XorNot32, ALUOp::XorNot64),
|
||||
(Opcode::BandNot, false) => choose_32_64(ty, ALUOp::And32, ALUOp::And64),
|
||||
(Opcode::BorNot, false) => choose_32_64(ty, ALUOp::Orr32, ALUOp::Orr64),
|
||||
(Opcode::BxorNot, false) => choose_32_64(ty, ALUOp::Xor32, ALUOp::Xor64),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||
ctx.emit(Inst::AluRRR { alu_op, rd, rn, rm });
|
||||
if !isa_flags.has_mie2() {
|
||||
lower_bnot(ctx, ty, rd);
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::Bitselect => {
|
||||
@@ -1542,12 +1618,22 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
rn,
|
||||
rm: rcond,
|
||||
});
|
||||
ctx.emit(Inst::AluRRR {
|
||||
alu_op: choose_32_64(ty, ALUOp::AndNot32, ALUOp::AndNot64),
|
||||
rd,
|
||||
rn: rm,
|
||||
rm: rcond,
|
||||
});
|
||||
if isa_flags.has_mie2() {
|
||||
ctx.emit(Inst::AluRRR {
|
||||
alu_op: choose_32_64(ty, ALUOp::AndNot32, ALUOp::AndNot64),
|
||||
rd,
|
||||
rn: rm,
|
||||
rm: rcond,
|
||||
});
|
||||
} else {
|
||||
ctx.emit(Inst::AluRRR {
|
||||
alu_op: choose_32_64(ty, ALUOp::And32, ALUOp::And64),
|
||||
rd,
|
||||
rn: rm,
|
||||
rm: rcond,
|
||||
});
|
||||
lower_bnot(ctx, ty, rd);
|
||||
}
|
||||
ctx.emit(Inst::AluRRR {
|
||||
alu_op: choose_32_64(ty, ALUOp::Orr32, ALUOp::Orr64),
|
||||
rd,
|
||||
@@ -1804,13 +1890,45 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
rd,
|
||||
rn,
|
||||
});
|
||||
} else {
|
||||
} else if isa_flags.has_mie2() {
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::ZeroExtend64);
|
||||
ctx.emit(Inst::UnaryRR {
|
||||
op: UnaryOp::PopcntReg,
|
||||
rd,
|
||||
rn,
|
||||
});
|
||||
} else {
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
ctx.emit(Inst::UnaryRR {
|
||||
op: UnaryOp::PopcntByte,
|
||||
rd,
|
||||
rn,
|
||||
});
|
||||
let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap();
|
||||
let mut shift = ty_bits(ty) as u8;
|
||||
while shift > 8 {
|
||||
shift = shift / 2;
|
||||
ctx.emit(Inst::ShiftRR {
|
||||
shift_op: choose_32_64(ty, ShiftOp::LShL32, ShiftOp::LShL64),
|
||||
rd: tmp,
|
||||
rn: rd.to_reg(),
|
||||
shift_imm: SImm20::maybe_from_i64(shift.into()).unwrap(),
|
||||
shift_reg: None,
|
||||
});
|
||||
ctx.emit(Inst::AluRR {
|
||||
alu_op: choose_32_64(ty, ALUOp::Add32, ALUOp::Add64),
|
||||
rd,
|
||||
rm: tmp.to_reg(),
|
||||
});
|
||||
}
|
||||
let shift = ty_bits(ty) as u8 - 8;
|
||||
ctx.emit(Inst::ShiftRR {
|
||||
shift_op: choose_32_64(ty, ShiftOp::LShR32, ShiftOp::LShR64),
|
||||
rd,
|
||||
rn: rd.to_reg(),
|
||||
shift_imm: SImm20::maybe_from_i64(shift.into()).unwrap(),
|
||||
shift_reg: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2027,40 +2145,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let input_ty = ctx.input_ty(insn, 0);
|
||||
let output_ty = ctx.output_ty(insn, 0);
|
||||
match (input_ty, output_ty) {
|
||||
(types::I64, types::F64) => {
|
||||
ctx.emit(Inst::MovToFpr { rd, rn });
|
||||
}
|
||||
(types::F64, types::I64) => {
|
||||
ctx.emit(Inst::MovFromFpr { rd, rn });
|
||||
}
|
||||
(types::I32, types::F32) => {
|
||||
let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap();
|
||||
ctx.emit(Inst::ShiftRR {
|
||||
shift_op: ShiftOp::LShL64,
|
||||
rd: tmp,
|
||||
rn,
|
||||
shift_imm: SImm20::maybe_from_i64(32).unwrap(),
|
||||
shift_reg: None,
|
||||
});
|
||||
ctx.emit(Inst::MovToFpr {
|
||||
rd,
|
||||
rn: tmp.to_reg(),
|
||||
});
|
||||
}
|
||||
(types::F32, types::I32) => {
|
||||
let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap();
|
||||
ctx.emit(Inst::MovFromFpr { rd: tmp, rn });
|
||||
ctx.emit(Inst::ShiftRR {
|
||||
shift_op: ShiftOp::LShR64,
|
||||
rd,
|
||||
rn: tmp.to_reg(),
|
||||
shift_imm: SImm20::maybe_from_i64(32).unwrap(),
|
||||
shift_reg: None,
|
||||
});
|
||||
}
|
||||
_ => unreachable!("invalid bitcast from {:?} to {:?}", input_ty, output_ty),
|
||||
}
|
||||
lower_bitcast(ctx, rd, output_ty, rn, input_ty);
|
||||
}
|
||||
|
||||
Opcode::Load
|
||||
@@ -2130,21 +2215,18 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
(64, 32, true, _) => Inst::Load64SExt32 { rd, mem },
|
||||
_ => panic!("Unsupported size in load"),
|
||||
});
|
||||
} else {
|
||||
ctx.emit(match (ext_bits, from_bits, sign_extend, is_float) {
|
||||
(32, 32, _, true) => Inst::FpuLoadRev32 { rd, mem },
|
||||
(64, 64, _, true) => Inst::FpuLoadRev64 { rd, mem },
|
||||
(_, 16, _, false) => Inst::LoadRev16 { rd, mem },
|
||||
(_, 32, _, false) => Inst::LoadRev32 { rd, mem },
|
||||
(_, 64, _, false) => Inst::LoadRev64 { rd, mem },
|
||||
(32, 8, false, _) => Inst::Load32ZExt8 { rd, mem },
|
||||
(32, 8, true, _) => Inst::Load32SExt8 { rd, mem },
|
||||
(64, 8, false, _) => Inst::Load64ZExt8 { rd, mem },
|
||||
(64, 8, true, _) => Inst::Load64SExt8 { rd, mem },
|
||||
} else if !is_float {
|
||||
ctx.emit(match (ext_bits, from_bits, sign_extend) {
|
||||
(_, 16, _) => Inst::LoadRev16 { rd, mem },
|
||||
(_, 32, _) => Inst::LoadRev32 { rd, mem },
|
||||
(_, 64, _) => Inst::LoadRev64 { rd, mem },
|
||||
(32, 8, false) => Inst::Load32ZExt8 { rd, mem },
|
||||
(32, 8, true) => Inst::Load32SExt8 { rd, mem },
|
||||
(64, 8, false) => Inst::Load64ZExt8 { rd, mem },
|
||||
(64, 8, true) => Inst::Load64SExt8 { rd, mem },
|
||||
_ => panic!("Unsupported size in load"),
|
||||
});
|
||||
if to_bits > from_bits && from_bits > 8 {
|
||||
assert!(is_float == false);
|
||||
ctx.emit(Inst::Extend {
|
||||
rd,
|
||||
rn: rd.to_reg(),
|
||||
@@ -2153,6 +2235,26 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
to_bits: to_bits as u8,
|
||||
});
|
||||
}
|
||||
} else if isa_flags.has_vxrs_ext2() {
|
||||
ctx.emit(match from_bits {
|
||||
32 => Inst::FpuLoadRev32 { rd, mem },
|
||||
64 => Inst::FpuLoadRev64 { rd, mem },
|
||||
_ => panic!("Unsupported size in load"),
|
||||
});
|
||||
} else {
|
||||
match from_bits {
|
||||
32 => {
|
||||
let tmp = ctx.alloc_tmp(types::I32).only_reg().unwrap();
|
||||
ctx.emit(Inst::LoadRev32 { rd: tmp, mem });
|
||||
lower_bitcast(ctx, rd, elem_ty, tmp.to_reg(), types::I32);
|
||||
}
|
||||
64 => {
|
||||
let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap();
|
||||
ctx.emit(Inst::LoadRev64 { rd: tmp, mem });
|
||||
lower_bitcast(ctx, rd, elem_ty, tmp.to_reg(), types::I64);
|
||||
}
|
||||
_ => panic!("Unsupported size in load"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2179,13 +2281,39 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
|
||||
if ty_is_float(elem_ty) {
|
||||
let rd = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
ctx.emit(match (endianness, ty_bits(elem_ty)) {
|
||||
(Endianness::Big, 32) => Inst::FpuStore32 { rd, mem },
|
||||
(Endianness::Big, 64) => Inst::FpuStore64 { rd, mem },
|
||||
(Endianness::Little, 32) => Inst::FpuStoreRev32 { rd, mem },
|
||||
(Endianness::Little, 64) => Inst::FpuStoreRev64 { rd, mem },
|
||||
_ => panic!("Unsupported size in store"),
|
||||
});
|
||||
if endianness == Endianness::Big {
|
||||
ctx.emit(match ty_bits(elem_ty) {
|
||||
32 => Inst::FpuStore32 { rd, mem },
|
||||
64 => Inst::FpuStore64 { rd, mem },
|
||||
_ => panic!("Unsupported size in store"),
|
||||
});
|
||||
} else if isa_flags.has_vxrs_ext2() {
|
||||
ctx.emit(match ty_bits(elem_ty) {
|
||||
32 => Inst::FpuStoreRev32 { rd, mem },
|
||||
64 => Inst::FpuStoreRev64 { rd, mem },
|
||||
_ => panic!("Unsupported size in store"),
|
||||
});
|
||||
} else {
|
||||
match ty_bits(elem_ty) {
|
||||
32 => {
|
||||
let tmp = ctx.alloc_tmp(types::I32).only_reg().unwrap();
|
||||
lower_bitcast(ctx, tmp, types::I32, rd, elem_ty);
|
||||
ctx.emit(Inst::StoreRev32 {
|
||||
rd: tmp.to_reg(),
|
||||
mem,
|
||||
});
|
||||
}
|
||||
64 => {
|
||||
let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap();
|
||||
lower_bitcast(ctx, tmp, types::I64, rd, elem_ty);
|
||||
ctx.emit(Inst::StoreRev64 {
|
||||
rd: tmp.to_reg(),
|
||||
mem,
|
||||
});
|
||||
}
|
||||
_ => panic!("Unsupported size in load"),
|
||||
}
|
||||
}
|
||||
} else if ty_bits(elem_ty) <= 16 {
|
||||
if let Some(imm) = input_matches_const(ctx, inputs[0]) {
|
||||
ctx.emit(match (endianness, ty_bits(elem_ty)) {
|
||||
@@ -2980,7 +3108,7 @@ impl LowerBackend for S390xBackend {
|
||||
type MInst = Inst;
|
||||
|
||||
fn lower<C: LowerCtx<I = Inst>>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()> {
|
||||
lower_insn_to_regs(ctx, ir_inst, &self.flags)
|
||||
lower_insn_to_regs(ctx, ir_inst, &self.flags, &self.isa_flags)
|
||||
}
|
||||
|
||||
fn lower_branch_group<C: LowerCtx<I = Inst>>(
|
||||
|
||||
Reference in New Issue
Block a user