s390x: use full vector register file for FP operations (#4360)

This defines the full set of 32 128-bit vector registers on s390x.
(Note that the VRs overlap the existing FPRs.)  In addition, this
adds support to use all 32 vector registers to implement floating-
point operations, by using vector floating-point instructions with
the 'W' bit set to operate only on the first element.

This part of the vector instruction set mostly matches the old FP
instruction set, with two exceptions:

- There is no vector version of the COPY SIGN instruction.  Instead,
  now use a VECTOR SELECT with an appropriate bit mask to implement
  the fcopysign operation.

- There are no vector version of the float <-> int conversion
  instructions where source and target differ in bit size.  Use
  appropriate multiple conversion steps instead.  This also requires
  use of explicit checking to implement correct overflow handling.
  As a side effect, this version now also implements the i8 / i16
  variants of all conversions, which had been missing so far.

For all operations except those two above, we continue to use the
old FP instruction if applicable (i.e. if all operands happen to
have been allocated to the original FP register set), and use the
vector instruction otherwise.
This commit is contained in:
Ulrich Weigand
2022-07-01 01:33:39 +02:00
committed by GitHub
parent f252ae34ec
commit ec83144c88
13 changed files with 3380 additions and 1100 deletions

View File

@@ -296,6 +296,38 @@ pub fn mem_imm16_emit(
}
}
pub fn mem_vrx_emit(
rd: Reg,
mem: &MemArg,
opcode: u16,
m3: u8,
add_trap: bool,
sink: &mut MachBuffer<Inst>,
emit_info: &EmitInfo,
state: &mut EmitState,
) {
let (mem_insts, mem) = mem_finalize(mem, state, true, false, false, true);
for inst in mem_insts.into_iter() {
inst.emit(&[], sink, emit_info, state);
}
if add_trap && mem.can_trap() {
let srcloc = state.cur_srcloc();
if srcloc != SourceLoc::default() {
sink.add_trap(TrapCode::HeapOutOfBounds);
}
}
match &mem {
&MemArg::BXD12 {
base, index, disp, ..
} => {
put(sink, &enc_vrx(opcode, rd, base, index, disp.bits(), m3));
}
_ => unreachable!(),
}
}
//=============================================================================
// Instructions and subcomponents: emission
@@ -304,15 +336,50 @@ fn machreg_to_gpr(m: Reg) -> u8 {
u8::try_from(m.to_real_reg().unwrap().hw_enc()).unwrap()
}
fn machreg_to_fpr(m: Reg) -> u8 {
fn machreg_to_vr(m: Reg) -> u8 {
assert_eq!(m.class(), RegClass::Float);
u8::try_from(m.to_real_reg().unwrap().hw_enc()).unwrap()
}
fn machreg_to_gpr_or_fpr(m: Reg) -> u8 {
fn machreg_to_fpr(m: Reg) -> u8 {
assert!(is_fpr(m));
u8::try_from(m.to_real_reg().unwrap().hw_enc()).unwrap()
}
fn machreg_to_gpr_or_fpr(m: Reg) -> u8 {
let reg = u8::try_from(m.to_real_reg().unwrap().hw_enc()).unwrap();
assert!(reg < 16);
reg
}
fn rxb(v1: Option<Reg>, v2: Option<Reg>, v3: Option<Reg>, v4: Option<Reg>) -> u8 {
let mut rxb = 0;
let is_high_vr = |reg| -> bool {
if let Some(reg) = reg {
if !is_fpr(reg) {
return true;
}
}
false
};
if is_high_vr(v1) {
rxb = rxb | 8;
}
if is_high_vr(v2) {
rxb = rxb | 4;
}
if is_high_vr(v3) {
rxb = rxb | 2;
}
if is_high_vr(v4) {
rxb = rxb | 1;
}
rxb
}
/// E-type instructions.
///
/// 15
@@ -785,19 +852,45 @@ fn enc_siy(opcode: u16, b1: Reg, d1: u32, i2: u8) -> [u8; 6] {
enc
}
/// VRR-type instructions.
/// VRRa-type instructions.
///
/// 47 39 35 31 23 19 15 11 7
/// opcode1 v1 v2 - m5 m3 m2 rxb opcode2
/// 40 36 32 24 20 16 12 8 0
///
fn enc_vrr_a(opcode: u16, v1: Reg, v2: Reg, m3: u8, m4: u8, m5: u8) -> [u8; 6] {
let opcode1 = ((opcode >> 8) & 0xff) as u8;
let opcode2 = (opcode & 0xff) as u8;
let rxb = rxb(Some(v1), Some(v2), None, None);
let v1 = machreg_to_vr(v1) & 0x0f;
let v2 = machreg_to_vr(v2) & 0x0f;
let m3 = m3 & 0x0f;
let m4 = m4 & 0x0f;
let m5 = m5 & 0x0f;
let mut enc: [u8; 6] = [0; 6];
enc[0] = opcode1;
enc[1] = v1 << 4 | v2;
enc[2] = 0;
enc[3] = m5 << 4 | m4;
enc[4] = m3 << 4 | rxb;
enc[5] = opcode2;
enc
}
/// VRRc-type instructions.
///
/// 47 39 35 31 27 23 19 15 11 7
/// opcode1 v1 v2 v3 - m6 m5 m4 rxb opcode2
/// 40 36 32 28 24 20 16 12 8 0
///
fn enc_vrr(opcode: u16, v1: Reg, v2: Reg, v3: Reg, m4: u8, m5: u8, m6: u8) -> [u8; 6] {
fn enc_vrr_c(opcode: u16, v1: Reg, v2: Reg, v3: Reg, m4: u8, m5: u8, m6: u8) -> [u8; 6] {
let opcode1 = ((opcode >> 8) & 0xff) as u8;
let opcode2 = (opcode & 0xff) as u8;
let rxb = 0; // FIXME
let v1 = machreg_to_fpr(v1) & 0x0f; // FIXME
let v2 = machreg_to_fpr(v2) & 0x0f; // FIXME
let v3 = machreg_to_fpr(v3) & 0x0f; // FIXME
let rxb = rxb(Some(v1), Some(v2), Some(v3), None);
let v1 = machreg_to_vr(v1) & 0x0f;
let v2 = machreg_to_vr(v2) & 0x0f;
let v3 = machreg_to_vr(v3) & 0x0f;
let m4 = m4 & 0x0f;
let m5 = m5 & 0x0f;
let m6 = m6 & 0x0f;
@@ -812,6 +905,87 @@ fn enc_vrr(opcode: u16, v1: Reg, v2: Reg, v3: Reg, m4: u8, m5: u8, m6: u8) -> [u
enc
}
/// VRRe-type instructions.
///
/// 47 39 35 31 27 23 19 15 11 7
/// opcode1 v1 v2 v3 m6 - m5 v4 rxb opcode2
/// 40 36 32 28 24 20 16 12 8 0
///
fn enc_vrr_e(opcode: u16, v1: Reg, v2: Reg, v3: Reg, v4: Reg, m5: u8, m6: u8) -> [u8; 6] {
let opcode1 = ((opcode >> 8) & 0xff) as u8;
let opcode2 = (opcode & 0xff) as u8;
let rxb = rxb(Some(v1), Some(v2), Some(v3), Some(v4));
let v1 = machreg_to_vr(v1) & 0x0f;
let v2 = machreg_to_vr(v2) & 0x0f;
let v3 = machreg_to_vr(v3) & 0x0f;
let v4 = machreg_to_vr(v4) & 0x0f;
let m5 = m5 & 0x0f;
let m6 = m6 & 0x0f;
let mut enc: [u8; 6] = [0; 6];
enc[0] = opcode1;
enc[1] = v1 << 4 | v2;
enc[2] = v3 << 4 | m6;
enc[3] = m5;
enc[4] = v4 << 4 | rxb;
enc[5] = opcode2;
enc
}
/// VRSb-type instructions.
///
/// 47 39 35 31 27 15 11 7
/// opcode1 v1 r3 b2 d2 m4 rxb opcode2
/// 40 36 32 28 16 12 8 0
///
fn enc_vrs_b(opcode: u16, v1: Reg, b2: Reg, d2: u32, r3: Reg, m4: u8) -> [u8; 6] {
let opcode1 = ((opcode >> 8) & 0xff) as u8;
let opcode2 = (opcode & 0xff) as u8;
let rxb = rxb(Some(v1), None, None, None);
let v1 = machreg_to_vr(v1) & 0x0f;
let b2 = machreg_to_gpr(b2) & 0x0f;
let r3 = machreg_to_gpr(r3) & 0x0f;
let d2_lo = (d2 & 0xff) as u8;
let d2_hi = ((d2 >> 8) & 0x0f) as u8;
let m4 = m4 & 0x0f;
let mut enc: [u8; 6] = [0; 6];
enc[0] = opcode1;
enc[1] = v1 << 4 | r3;
enc[2] = b2 << 4 | d2_hi;
enc[3] = d2_lo;
enc[4] = m4 << 4 | rxb;
enc[5] = opcode2;
enc
}
/// VRSc-type instructions.
///
/// 47 39 35 31 27 15 11 7
/// opcode1 r1 v3 b2 d2 m4 rxb opcode2
/// 40 36 32 28 16 12 8 0
///
fn enc_vrs_c(opcode: u16, r1: Reg, b2: Reg, d2: u32, v3: Reg, m4: u8) -> [u8; 6] {
let opcode1 = ((opcode >> 8) & 0xff) as u8;
let opcode2 = (opcode & 0xff) as u8;
let rxb = rxb(None, Some(v3), None, None);
let r1 = machreg_to_gpr(r1) & 0x0f;
let b2 = machreg_to_gpr(b2) & 0x0f;
let v3 = machreg_to_vr(v3) & 0x0f;
let d2_lo = (d2 & 0xff) as u8;
let d2_hi = ((d2 >> 8) & 0x0f) as u8;
let m4 = m4 & 0x0f;
let mut enc: [u8; 6] = [0; 6];
enc[0] = opcode1;
enc[1] = r1 << 4 | v3;
enc[2] = b2 << 4 | d2_hi;
enc[3] = d2_lo;
enc[4] = m4 << 4 | rxb;
enc[5] = opcode2;
enc
}
/// VRX-type instructions.
///
/// 47 39 35 31 27 15 11 7
@@ -821,8 +995,8 @@ fn enc_vrr(opcode: u16, v1: Reg, v2: Reg, v3: Reg, m4: u8, m5: u8, m6: u8) -> [u
fn enc_vrx(opcode: u16, v1: Reg, b2: Reg, x2: Reg, d2: u32, m3: u8) -> [u8; 6] {
let opcode1 = ((opcode >> 8) & 0xff) as u8;
let opcode2 = (opcode & 0xff) as u8;
let rxb = 0; // FIXME
let v1 = machreg_to_fpr(v1) & 0x0f; // FIXME
let rxb = rxb(Some(v1), None, None, None);
let v1 = machreg_to_vr(v1) & 0x0f;
let b2 = machreg_to_gpr(b2) & 0x0f;
let x2 = machreg_to_gpr(x2) & 0x0f;
let d2_lo = (d2 & 0xff) as u8;
@@ -1633,9 +1807,7 @@ impl MachInstEmit for Inst {
| &Inst::Load64SExt32 { rd, ref mem }
| &Inst::LoadRev16 { rd, ref mem }
| &Inst::LoadRev32 { rd, ref mem }
| &Inst::LoadRev64 { rd, ref mem }
| &Inst::FpuLoad32 { rd, ref mem }
| &Inst::FpuLoad64 { rd, ref mem } => {
| &Inst::LoadRev64 { rd, ref mem } => {
let rd = allocs.next_writable(rd);
let mem = mem.with_allocs(&mut allocs);
@@ -1655,8 +1827,6 @@ impl MachInstEmit for Inst {
&Inst::LoadRev16 { .. } => (None, Some(0xe31f), None), // LRVH
&Inst::LoadRev32 { .. } => (None, Some(0xe31e), None), // LRV
&Inst::LoadRev64 { .. } => (None, Some(0xe30f), None), // LRVG
&Inst::FpuLoad32 { .. } => (Some(0x78), Some(0xed64), None), // LE(Y)
&Inst::FpuLoad64 { .. } => (Some(0x68), Some(0xed65), None), // LD(Y)
_ => unreachable!(),
};
let rd = rd.to_reg();
@@ -1664,36 +1834,27 @@ impl MachInstEmit for Inst {
rd, &mem, opcode_rx, opcode_rxy, opcode_ril, true, sink, emit_info, state,
);
}
&Inst::FpuLoadRev32 { rd, ref mem } | &Inst::FpuLoadRev64 { rd, ref mem } => {
&Inst::FpuLoad32 { rd, ref mem }
| &Inst::FpuLoad64 { rd, ref mem }
| &Inst::FpuLoadRev32 { rd, ref mem }
| &Inst::FpuLoadRev64 { rd, ref mem } => {
let rd = allocs.next_writable(rd);
let mem = mem.with_allocs(&mut allocs);
let opcode = match self {
&Inst::FpuLoadRev32 { .. } => 0xe603, // VLEBRF
&Inst::FpuLoadRev64 { .. } => 0xe602, // VLEBRG
let (opcode_rx, opcode_rxy, opcode_vrx) = match self {
&Inst::FpuLoad32 { .. } => (Some(0x78), Some(0xed64), 0xe703), // LE(Y), VLEF
&Inst::FpuLoad64 { .. } => (Some(0x68), Some(0xed65), 0xe702), // LD(Y), VLEG
&Inst::FpuLoadRev32 { .. } => (None, None, 0xe603), // VLEBRF
&Inst::FpuLoadRev64 { .. } => (None, None, 0xe602), // VLEBRG
_ => unreachable!(),
};
let (mem_insts, mem) = mem_finalize(&mem, state, true, false, false, true);
for inst in mem_insts.into_iter() {
inst.emit(&[], sink, emit_info, state);
}
let srcloc = state.cur_srcloc();
if srcloc != SourceLoc::default() && mem.can_trap() {
sink.add_trap(TrapCode::HeapOutOfBounds);
}
match &mem {
&MemArg::BXD12 {
base, index, disp, ..
} => {
put(
sink,
&enc_vrx(opcode, rd.to_reg(), base, index, disp.bits(), 0),
);
}
_ => unreachable!(),
let rd = rd.to_reg();
if is_fpr(rd) && opcode_rx.is_some() {
mem_emit(
rd, &mem, opcode_rx, opcode_rxy, None, true, sink, emit_info, state,
);
} else {
mem_vrx_emit(rd, &mem, opcode_vrx, 0, true, sink, emit_info, state);
}
}
@@ -1703,9 +1864,7 @@ impl MachInstEmit for Inst {
| &Inst::Store64 { rd, ref mem }
| &Inst::StoreRev16 { rd, ref mem }
| &Inst::StoreRev32 { rd, ref mem }
| &Inst::StoreRev64 { rd, ref mem }
| &Inst::FpuStore32 { rd, ref mem }
| &Inst::FpuStore64 { rd, ref mem } => {
| &Inst::StoreRev64 { rd, ref mem } => {
let rd = allocs.next(rd);
let mem = mem.with_allocs(&mut allocs);
@@ -1717,8 +1876,6 @@ impl MachInstEmit for Inst {
&Inst::StoreRev16 { .. } => (None, Some(0xe33f), None), // STRVH
&Inst::StoreRev32 { .. } => (None, Some(0xe33e), None), // STRV
&Inst::StoreRev64 { .. } => (None, Some(0xe32f), None), // STRVG
&Inst::FpuStore32 { .. } => (Some(0x70), Some(0xed66), None), // STE(Y)
&Inst::FpuStore64 { .. } => (Some(0x60), Some(0xed67), None), // STD(Y)
_ => unreachable!(),
};
mem_emit(
@@ -1747,33 +1904,26 @@ impl MachInstEmit for Inst {
};
mem_imm16_emit(imm, &mem, opcode, true, sink, emit_info, state);
}
&Inst::FpuStoreRev32 { rd, ref mem } | &Inst::FpuStoreRev64 { rd, ref mem } => {
&Inst::FpuStore32 { rd, ref mem }
| &Inst::FpuStore64 { rd, ref mem }
| &Inst::FpuStoreRev32 { rd, ref mem }
| &Inst::FpuStoreRev64 { rd, ref mem } => {
let rd = allocs.next(rd);
let mem = mem.with_allocs(&mut allocs);
let opcode = match self {
&Inst::FpuStoreRev32 { .. } => 0xe60b, // VSTEBRF
&Inst::FpuStoreRev64 { .. } => 0xe60a, // VSTEBRG
let (opcode_rx, opcode_rxy, opcode_vrx) = match self {
&Inst::FpuStore32 { .. } => (Some(0x70), Some(0xed66), 0xe70b), // STE(Y), VSTEF
&Inst::FpuStore64 { .. } => (Some(0x60), Some(0xed67), 0xe70a), // STD(Y), VSTEG
&Inst::FpuStoreRev32 { .. } => (None, None, 0xe60b), // VSTEBRF
&Inst::FpuStoreRev64 { .. } => (None, None, 0xe60a), // VSTEBRG
_ => unreachable!(),
};
let (mem_insts, mem) = mem_finalize(&mem, state, true, false, false, true);
for inst in mem_insts.into_iter() {
inst.emit(&[], sink, emit_info, state);
}
let srcloc = state.cur_srcloc();
if srcloc != SourceLoc::default() && mem.can_trap() {
sink.add_trap(TrapCode::HeapOutOfBounds);
}
match &mem {
&MemArg::BXD12 {
base, index, disp, ..
} => {
put(sink, &enc_vrx(opcode, rd, base, index, disp.bits(), 0));
}
_ => unreachable!(),
if is_fpr(rd) && opcode_rx.is_some() {
mem_emit(
rd, &mem, opcode_rx, opcode_rxy, None, true, sink, emit_info, state,
);
} else {
mem_vrx_emit(rd, &mem, opcode_vrx, 0, true, sink, emit_info, state);
}
}
@@ -1966,47 +2116,95 @@ impl MachInstEmit for Inst {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
let opcode = 0x38; // LER
put(sink, &enc_rr(opcode, rd.to_reg(), rn));
if is_fpr(rd.to_reg()) && is_fpr(rn) {
let opcode = 0x38; // LER
put(sink, &enc_rr(opcode, rd.to_reg(), rn));
} else {
let opcode = 0xe756; // VLR
put(sink, &enc_vrr_a(opcode, rd.to_reg(), rn, 0, 0, 0));
}
}
&Inst::FpuMove64 { rd, rn } => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
let opcode = 0x28; // LDR
put(sink, &enc_rr(opcode, rd.to_reg(), rn));
if is_fpr(rd.to_reg()) && is_fpr(rn) {
let opcode = 0x28; // LDR
put(sink, &enc_rr(opcode, rd.to_reg(), rn));
} else {
let opcode = 0xe756; // VLR
put(sink, &enc_vrr_a(opcode, rd.to_reg(), rn, 0, 0, 0));
}
}
&Inst::FpuCMov32 { rd, cond, rm } => {
let rd = allocs.next_writable(rd);
let rm = allocs.next(rm);
let opcode = 0xa74; // BCR
put(sink, &enc_ri_c(opcode, cond.invert().bits(), 4 + 2));
let opcode = 0x38; // LER
put(sink, &enc_rr(opcode, rd.to_reg(), rm));
if is_fpr(rd.to_reg()) && is_fpr(rm) {
let opcode = 0xa74; // BCR
put(sink, &enc_ri_c(opcode, cond.invert().bits(), 4 + 2));
let opcode = 0x38; // LER
put(sink, &enc_rr(opcode, rd.to_reg(), rm));
} else {
let opcode = 0xa74; // BCR
put(sink, &enc_ri_c(opcode, cond.invert().bits(), 4 + 6));
let opcode = 0xe756; // VLR
put(sink, &enc_vrr_a(opcode, rd.to_reg(), rm, 0, 0, 0));
}
}
&Inst::FpuCMov64 { rd, cond, rm } => {
let rd = allocs.next_writable(rd);
let rm = allocs.next(rm);
let opcode = 0xa74; // BCR
put(sink, &enc_ri_c(opcode, cond.invert().bits(), 4 + 2));
let opcode = 0x28; // LDR
put(sink, &enc_rr(opcode, rd.to_reg(), rm));
if is_fpr(rd.to_reg()) && is_fpr(rm) {
let opcode = 0xa74; // BCR
put(sink, &enc_ri_c(opcode, cond.invert().bits(), 4 + 2));
let opcode = 0x28; // LDR
put(sink, &enc_rr(opcode, rd.to_reg(), rm));
} else {
let opcode = 0xa74; // BCR
put(sink, &enc_ri_c(opcode, cond.invert().bits(), 4 + 6));
let opcode = 0xe756; // VLR
put(sink, &enc_vrr_a(opcode, rd.to_reg(), rm, 0, 0, 0));
}
}
&Inst::MovToFpr { rd, rn } => {
&Inst::MovToFpr32 { rd, rn } => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
let opcode = 0xb3c1; // LDGR
put(sink, &enc_rre(opcode, rd.to_reg(), rn));
let (opcode, m4) = (0xe722, 2); // VLVG
put(sink, &enc_vrs_b(opcode, rd.to_reg(), zero_reg(), 0, rn, m4));
}
&Inst::MovFromFpr { rd, rn } => {
&Inst::MovToFpr64 { rd, rn } => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
let opcode = 0xb3cd; // LGDR
put(sink, &enc_rre(opcode, rd.to_reg(), rn));
if is_fpr(rd.to_reg()) {
let opcode = 0xb3c1; // LDGR
put(sink, &enc_rre(opcode, rd.to_reg(), rn));
} else {
let (opcode, m4) = (0xe722, 3); // VLVG
put(sink, &enc_vrs_b(opcode, rd.to_reg(), zero_reg(), 0, rn, m4));
}
}
&Inst::MovFromFpr32 { rd, rn } => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
let (opcode, m4) = (0xe721, 2); // VLGV
put(sink, &enc_vrs_c(opcode, rd.to_reg(), zero_reg(), 0, rn, m4));
}
&Inst::MovFromFpr64 { rd, rn } => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
if is_fpr(rn) {
let opcode = 0xb3cd; // LGDR
put(sink, &enc_rre(opcode, rd.to_reg(), rn));
} else {
let (opcode, m4) = (0xe721, 3); // VLVG
put(sink, &enc_vrs_c(opcode, rd.to_reg(), zero_reg(), 0, rn, m4));
}
}
&Inst::LoadFpuConst32 { rd, const_data } => {
let rd = allocs.next_writable(rd);
@@ -2034,138 +2232,143 @@ impl MachInstEmit for Inst {
};
inst.emit(&[], sink, emit_info, state);
}
&Inst::FpuCopysign { rd, rn, rm } => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
let rm = allocs.next(rm);
let opcode = 0xb372; // CPSDR
put(sink, &enc_rrf_ab(opcode, rd.to_reg(), rn, rm, 0));
}
&Inst::FpuRR { fpu_op, rd, rn } => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
let opcode = match fpu_op {
FPUOp1::Abs32 => 0xb300, // LPEBR
FPUOp1::Abs64 => 0xb310, // LPDBR
FPUOp1::Neg32 => 0xb303, // LCEBR
FPUOp1::Neg64 => 0xb313, // LCDBR
FPUOp1::NegAbs32 => 0xb301, // LNEBR
FPUOp1::NegAbs64 => 0xb311, // LNDBR
FPUOp1::Sqrt32 => 0xb314, // SQEBR
FPUOp1::Sqrt64 => 0xb315, // SQDBR
FPUOp1::Cvt32To64 => 0xb304, // LDEBR
FPUOp1::Cvt64To32 => 0xb344, // LEDBR
let (opcode, m3, m5, opcode_fpr) = match fpu_op {
FPUOp1::Abs32 => (0xe7cc, 2, 2, 0xb300), // VFPSO, LPEBR
FPUOp1::Abs64 => (0xe7cc, 3, 2, 0xb310), // VFPSO, LPDBR
FPUOp1::Neg32 => (0xe7cc, 2, 0, 0xb303), // VFPSO, LCEBR
FPUOp1::Neg64 => (0xe7cc, 3, 0, 0xb313), // VFPSO, LCDBR
FPUOp1::NegAbs32 => (0xe7cc, 2, 1, 0xb301), // VFPSO, LNEBR
FPUOp1::NegAbs64 => (0xe7cc, 3, 1, 0xb311), // VFPSO, LNDBR
FPUOp1::Sqrt32 => (0xe7ce, 2, 0, 0xb314), // VFSQ, SQEBR
FPUOp1::Sqrt64 => (0xe7ce, 3, 0, 0xb315), // VFSQ, SQDBR
FPUOp1::Cvt32To64 => (0xe7c4, 2, 0, 0xb304), // VFLL, LDEBR
};
put(sink, &enc_rre(opcode, rd.to_reg(), rn));
if is_fpr(rd.to_reg()) && is_fpr(rn) {
put(sink, &enc_rre(opcode_fpr, rd.to_reg(), rn));
} else {
put(sink, &enc_vrr_a(opcode, rd.to_reg(), rn, m3, 8, m5));
}
}
&Inst::FpuRRR { fpu_op, rd, rm } => {
let rd = allocs.next_writable(rd);
let rm = allocs.next(rm);
let opcode = match fpu_op {
FPUOp2::Add32 => 0xb30a, // AEBR
FPUOp2::Add64 => 0xb31a, // ADBR
FPUOp2::Sub32 => 0xb30b, // SEBR
FPUOp2::Sub64 => 0xb31b, // SDBR
FPUOp2::Mul32 => 0xb317, // MEEBR
FPUOp2::Mul64 => 0xb31c, // MDBR
FPUOp2::Div32 => 0xb30d, // DEBR
FPUOp2::Div64 => 0xb31d, // DDBR
_ => unimplemented!(),
};
put(sink, &enc_rre(opcode, rd.to_reg(), rm));
}
&Inst::FpuRRRR { fpu_op, rd, rn, rm } => {
&Inst::FpuRRR { fpu_op, rd, rn, rm } => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
let rm = allocs.next(rm);
let opcode = match fpu_op {
FPUOp3::MAdd32 => 0xb30e, // MAEBR
FPUOp3::MAdd64 => 0xb31e, // MADBR
FPUOp3::MSub32 => 0xb30f, // MSEBR
FPUOp3::MSub64 => 0xb31f, // MSDBR
let (opcode, m4, m6, opcode_fpr) = match fpu_op {
FPUOp2::Add32 => (0xe7e3, 2, 0, Some(0xb30a)), // VFA, AEBR
FPUOp2::Add64 => (0xe7e3, 3, 0, Some(0xb31a)), // VFA, ADBR
FPUOp2::Sub32 => (0xe7e2, 2, 0, Some(0xb30b)), // VFS, SEBR
FPUOp2::Sub64 => (0xe7e2, 3, 0, Some(0xb31b)), // VFS, SDBR
FPUOp2::Mul32 => (0xe7e7, 2, 0, Some(0xb317)), // VFM, MEEBR
FPUOp2::Mul64 => (0xe7e7, 3, 0, Some(0xb31c)), // VFM, MDBR
FPUOp2::Div32 => (0xe7e5, 2, 0, Some(0xb30d)), // VFD, DEBR
FPUOp2::Div64 => (0xe7e5, 3, 0, Some(0xb31d)), // VFD, DDBR
FPUOp2::Max32 => (0xe7ef, 2, 1, None), // VFMAX
FPUOp2::Max64 => (0xe7ef, 3, 1, None), // VFMAX
FPUOp2::Min32 => (0xe7ee, 2, 1, None), // VFMIN
FPUOp2::Min64 => (0xe7ee, 3, 1, None), // VFMIN
};
put(sink, &enc_rrd(opcode, rd.to_reg(), rm, rn));
if opcode_fpr.is_some() && rd.to_reg() == rn && is_fpr(rn) && is_fpr(rm) {
put(sink, &enc_rre(opcode_fpr.unwrap(), rd.to_reg(), rm));
} else {
put(sink, &enc_vrr_c(opcode, rd.to_reg(), rn, rm, m4, 8, m6));
}
}
&Inst::FpuToInt { op, rd, rn } => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
let opcode = match op {
FpuToIntOp::F32ToI32 => 0xb398, // CFEBRA
FpuToIntOp::F32ToU32 => 0xb39c, // CLFEBR
FpuToIntOp::F32ToI64 => 0xb3a8, // CGEBRA
FpuToIntOp::F32ToU64 => 0xb3ac, // CLGEBR
FpuToIntOp::F64ToI32 => 0xb399, // CFDBRA
FpuToIntOp::F64ToU32 => 0xb39d, // CLFDBR
FpuToIntOp::F64ToI64 => 0xb3a9, // CGDBRA
FpuToIntOp::F64ToU64 => 0xb3ad, // CLGDBR
};
put(sink, &enc_rrf_cde(opcode, rd.to_reg(), rn, 5, 0));
}
&Inst::IntToFpu { op, rd, rn } => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
let opcode = match op {
IntToFpuOp::I32ToF32 => 0xb394, // CEFBRA
IntToFpuOp::U32ToF32 => 0xb390, // CELFBR
IntToFpuOp::I64ToF32 => 0xb3a4, // CEGBRA
IntToFpuOp::U64ToF32 => 0xb3a0, // CELGBR
IntToFpuOp::I32ToF64 => 0xb395, // CDFBRA
IntToFpuOp::U32ToF64 => 0xb391, // CDLFBR
IntToFpuOp::I64ToF64 => 0xb3a5, // CDGBRA
IntToFpuOp::U64ToF64 => 0xb3a1, // CDLGBR
};
put(sink, &enc_rrf_cde(opcode, rd.to_reg(), rn, 0, 0));
}
&Inst::FpuRound { op, rd, rn } => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
let (opcode, m3) = match op {
FpuRoundMode::Minus32 => (0xb357, 7), // FIEBR
FpuRoundMode::Minus64 => (0xb35f, 7), // FIDBR
FpuRoundMode::Plus32 => (0xb357, 6), // FIEBR
FpuRoundMode::Plus64 => (0xb35f, 6), // FIDBR
FpuRoundMode::Zero32 => (0xb357, 5), // FIEBR
FpuRoundMode::Zero64 => (0xb35f, 5), // FIDBR
FpuRoundMode::Nearest32 => (0xb357, 4), // FIEBR
FpuRoundMode::Nearest64 => (0xb35f, 4), // FIDBR
};
put(sink, &enc_rrf_cde(opcode, rd.to_reg(), rn, m3, 0));
}
&Inst::FpuVecRRR { fpu_op, rd, rn, rm } => {
&Inst::FpuRRRR {
fpu_op,
rd,
rn,
rm,
ra,
} => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
let rm = allocs.next(rm);
let ra = allocs.next(ra);
let (opcode, m4) = match fpu_op {
FPUOp2::Max32 => (0xe7ef, 2), // VFMAX
FPUOp2::Max64 => (0xe7ef, 3), // VFMAX
FPUOp2::Min32 => (0xe7ee, 2), // VFMIN
FPUOp2::Min64 => (0xe7ee, 3), // VFMIN
_ => unimplemented!(),
let (opcode, m6, opcode_fpr) = match fpu_op {
FPUOp3::MAdd32 => (0xe78f, 2, 0xb30e), // VFMA, MAEBR
FPUOp3::MAdd64 => (0xe78f, 3, 0xb31e), // VFMA, MADBR
FPUOp3::MSub32 => (0xe78e, 2, 0xb30f), // VFMS, MSEBR
FPUOp3::MSub64 => (0xe78e, 3, 0xb31f), // VFMS, MSDBR
};
put(sink, &enc_vrr(opcode, rd.to_reg(), rn, rm, m4, 8, 1));
if rd.to_reg() == ra && is_fpr(rn) && is_fpr(rm) && is_fpr(ra) {
put(sink, &enc_rrd(opcode_fpr, rd.to_reg(), rm, rn));
} else {
put(sink, &enc_vrr_e(opcode, rd.to_reg(), rn, rm, ra, 8, m6));
}
}
&Inst::FpuRound { op, mode, rd, rn } => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
let mode = match mode {
FpuRoundMode::Current => 0,
FpuRoundMode::ToNearest => 1,
FpuRoundMode::ShorterPrecision => 3,
FpuRoundMode::ToNearestTiesToEven => 4,
FpuRoundMode::ToZero => 5,
FpuRoundMode::ToPosInfinity => 6,
FpuRoundMode::ToNegInfinity => 7,
};
let (opcode, m3, opcode_fpr) = match op {
FpuRoundOp::Cvt64To32 => (0xe7c5, 3, Some(0xb344)), // VFLR, LEDBR(A)
FpuRoundOp::Round32 => (0xe7c7, 2, Some(0xb357)), // VFI, FIEBR
FpuRoundOp::Round64 => (0xe7c7, 3, Some(0xb35f)), // VFI, FIDBR
FpuRoundOp::ToSInt32 => (0xe7c2, 2, None), // VCSFP
FpuRoundOp::ToSInt64 => (0xe7c2, 3, None), // VCSFP
FpuRoundOp::ToUInt32 => (0xe7c0, 2, None), // VCLFP
FpuRoundOp::ToUInt64 => (0xe7c0, 3, None), // VCLFP
FpuRoundOp::FromSInt32 => (0xe7c3, 2, None), // VCFPS
FpuRoundOp::FromSInt64 => (0xe7c3, 3, None), // VCFPS
FpuRoundOp::FromUInt32 => (0xe7c1, 2, None), // VCFPL
FpuRoundOp::FromUInt64 => (0xe7c1, 3, None), // VCFPL
};
if opcode_fpr.is_some() && is_fpr(rd.to_reg()) && is_fpr(rn) {
put(
sink,
&enc_rrf_cde(opcode_fpr.unwrap(), rd.to_reg(), rn, mode, 0),
);
} else {
put(sink, &enc_vrr_a(opcode, rd.to_reg(), rn, m3, 8, mode));
}
}
&Inst::FpuCmp32 { rn, rm } => {
let rn = allocs.next(rn);
let rm = allocs.next(rm);
let opcode = 0xb309; // CEBR
put(sink, &enc_rre(opcode, rn, rm));
if is_fpr(rn) && is_fpr(rm) {
let opcode = 0xb309; // CEBR
put(sink, &enc_rre(opcode, rn, rm));
} else {
let opcode = 0xe7cb; // WFC
put(sink, &enc_vrr_a(opcode, rn, rm, 2, 0, 0));
}
}
&Inst::FpuCmp64 { rn, rm } => {
let rn = allocs.next(rn);
let rm = allocs.next(rm);
let opcode = 0xb319; // CDBR
put(sink, &enc_rre(opcode, rn, rm));
if is_fpr(rn) && is_fpr(rm) {
let opcode = 0xb319; // CDBR
put(sink, &enc_rre(opcode, rn, rm));
} else {
let opcode = 0xe7cb; // WFC
put(sink, &enc_vrr_a(opcode, rn, rm, 3, 0, 0));
}
}
&Inst::VecSelect { rd, rn, rm, ra } => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
let rm = allocs.next(rm);
let ra = allocs.next(ra);
let opcode = 0xe78d; // VSEL
put(sink, &enc_vrr_e(opcode, rd.to_reg(), rn, rm, ra, 0, 0));
}
&Inst::Call { link, ref info } => {

File diff suppressed because it is too large Load Diff

View File

@@ -27,8 +27,8 @@ mod emit_tests;
// Instructions (top level): definition
pub use crate::isa::s390x::lower::isle::generated_code::{
ALUOp, CmpOp, FPUOp1, FPUOp2, FPUOp3, FpuRoundMode, FpuToIntOp, IntToFpuOp, MInst as Inst,
RxSBGOp, ShiftOp, UnaryOp,
ALUOp, CmpOp, FPUOp1, FPUOp2, FPUOp3, FpuRoundMode, FpuRoundOp, MInst as Inst, RxSBGOp,
ShiftOp, UnaryOp,
};
/// Additional information for (direct) Call instructions, left out of line to lower the size of
@@ -156,12 +156,13 @@ impl Inst {
| Inst::FpuMove64 { .. }
| Inst::FpuCMov32 { .. }
| Inst::FpuCMov64 { .. }
| Inst::MovToFpr { .. }
| Inst::MovFromFpr { .. }
| Inst::MovToFpr32 { .. }
| Inst::MovToFpr64 { .. }
| Inst::MovFromFpr32 { .. }
| Inst::MovFromFpr64 { .. }
| Inst::FpuRR { .. }
| Inst::FpuRRR { .. }
| Inst::FpuRRRR { .. }
| Inst::FpuCopysign { .. }
| Inst::FpuCmp32 { .. }
| Inst::FpuCmp64 { .. }
| Inst::FpuLoad32 { .. }
@@ -170,10 +171,7 @@ impl Inst {
| Inst::FpuStore64 { .. }
| Inst::LoadFpuConst32 { .. }
| Inst::LoadFpuConst64 { .. }
| Inst::FpuToInt { .. }
| Inst::IntToFpu { .. }
| Inst::FpuRound { .. }
| Inst::FpuVecRRR { .. }
| Inst::VecSelect { .. }
| Inst::Call { .. }
| Inst::CallInd { .. }
| Inst::Ret { .. }
@@ -206,6 +204,11 @@ impl Inst {
UnaryOp::PopcntReg => InstructionSet::MIE2,
_ => InstructionSet::Base,
},
Inst::FpuRound { op, .. } => match op {
FpuRoundOp::ToSInt32 | FpuRoundOp::FromSInt32 => InstructionSet::MIE2,
FpuRoundOp::ToUInt32 | FpuRoundOp::FromUInt32 => InstructionSet::MIE2,
_ => InstructionSet::Base,
},
// These are all part of VXRS_EXT2
Inst::FpuLoadRev32 { .. }
@@ -576,7 +579,10 @@ fn s390x_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandC
collector.reg_mod(rd);
collector.reg_use(rm);
}
&Inst::MovToFpr { rd, rn } | &Inst::MovFromFpr { rd, rn } => {
&Inst::MovToFpr32 { rd, rn }
| &Inst::MovToFpr64 { rd, rn }
| &Inst::MovFromFpr32 { rd, rn }
| &Inst::MovFromFpr64 { rd, rn } => {
collector.reg_def(rd);
collector.reg_use(rn);
}
@@ -584,20 +590,17 @@ fn s390x_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandC
collector.reg_def(rd);
collector.reg_use(rn);
}
&Inst::FpuRRR { rd, rm, .. } => {
collector.reg_mod(rd);
collector.reg_use(rm);
}
&Inst::FpuRRRR { rd, rn, rm, .. } => {
collector.reg_mod(rd);
collector.reg_use(rn);
collector.reg_use(rm);
}
&Inst::FpuCopysign { rd, rn, rm, .. } => {
&Inst::FpuRRR { rd, rn, rm, .. } => {
collector.reg_def(rd);
collector.reg_use(rn);
collector.reg_use(rm);
}
&Inst::FpuRRRR { rd, rn, rm, ra, .. } => {
collector.reg_def(rd);
collector.reg_use(rn);
collector.reg_use(rm);
collector.reg_use(ra);
}
&Inst::FpuCmp32 { rn, rm } | &Inst::FpuCmp64 { rn, rm } => {
collector.reg_use(rn);
collector.reg_use(rm);
@@ -637,22 +640,15 @@ fn s390x_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandC
&Inst::LoadFpuConst32 { rd, .. } | &Inst::LoadFpuConst64 { rd, .. } => {
collector.reg_def(rd);
}
&Inst::FpuToInt { rd, rn, .. } => {
collector.reg_def(rd);
collector.reg_use(rn);
}
&Inst::IntToFpu { rd, rn, .. } => {
collector.reg_def(rd);
collector.reg_use(rn);
}
&Inst::FpuRound { rd, rn, .. } => {
collector.reg_def(rd);
collector.reg_use(rn);
}
&Inst::FpuVecRRR { rd, rn, rm, .. } => {
&Inst::VecSelect { rd, rn, rm, ra, .. } => {
collector.reg_def(rd);
collector.reg_use(rn);
collector.reg_use(rm);
collector.reg_use(ra);
}
&Inst::Extend { rd, rn, .. } => {
collector.reg_def(rd);
@@ -1462,9 +1458,7 @@ impl Inst {
| &Inst::Load64SExt32 { rd, ref mem }
| &Inst::LoadRev16 { rd, ref mem }
| &Inst::LoadRev32 { rd, ref mem }
| &Inst::LoadRev64 { rd, ref mem }
| &Inst::FpuLoad32 { rd, ref mem }
| &Inst::FpuLoad64 { rd, ref mem } => {
| &Inst::LoadRev64 { rd, ref mem } => {
let (opcode_rx, opcode_rxy, opcode_ril) = match self {
&Inst::Load32 { .. } => (Some("l"), Some("ly"), Some("lrl")),
&Inst::Load32ZExt8 { .. } => (None, Some("llc"), None),
@@ -1481,8 +1475,6 @@ impl Inst {
&Inst::LoadRev16 { .. } => (None, Some("lrvh"), None),
&Inst::LoadRev32 { .. } => (None, Some("lrv"), None),
&Inst::LoadRev64 { .. } => (None, Some("lrvg"), None),
&Inst::FpuLoad32 { .. } => (Some("le"), Some("ley"), None),
&Inst::FpuLoad64 { .. } => (Some("ld"), Some("ldy"), None),
_ => unreachable!(),
};
@@ -1505,17 +1497,42 @@ impl Inst {
let mem = mem.pretty_print_default();
format!("{}{} {}, {}", mem_str, op.unwrap(), rd, mem)
}
&Inst::FpuLoadRev32 { rd, ref mem } | &Inst::FpuLoadRev64 { rd, ref mem } => {
let rd = pretty_print_reg(rd.to_reg(), allocs);
let mem = mem.with_allocs(allocs);
let (mem_str, mem) = mem_finalize_for_show(&mem, state, true, false, false, true);
let op = match self {
&Inst::FpuLoadRev32 { .. } => "vlebrf",
&Inst::FpuLoadRev64 { .. } => "vlebrg",
&Inst::FpuLoad32 { rd, ref mem }
| &Inst::FpuLoad64 { rd, ref mem }
| &Inst::FpuLoadRev32 { rd, ref mem }
| &Inst::FpuLoadRev64 { rd, ref mem } => {
let (opcode_rx, opcode_rxy, opcode_vrx) = match self {
&Inst::FpuLoad32 { .. } => (Some("le"), Some("ley"), "vlef"),
&Inst::FpuLoad64 { .. } => (Some("ld"), Some("ldy"), "vleg"),
&Inst::FpuLoadRev32 { .. } => (None, None, "vlebrf"),
&Inst::FpuLoadRev64 { .. } => (None, None, "vlebrg"),
_ => unreachable!(),
};
let mem = mem.pretty_print_default();
format!("{}{} {}, {}, 0", mem_str, op, rd, mem)
let (rd, rd_fpr) = pretty_print_fpr(rd.to_reg(), allocs);
let mem = mem.with_allocs(allocs);
if rd_fpr.is_some() && opcode_rx.is_some() {
let (mem_str, mem) =
mem_finalize_for_show(&mem, state, true, true, false, true);
let op = match &mem {
&MemArg::BXD12 { .. } => opcode_rx,
&MemArg::BXD20 { .. } => opcode_rxy,
_ => unreachable!(),
};
let mem = mem.pretty_print_default();
format!("{}{} {}, {}", mem_str, op.unwrap(), rd_fpr.unwrap(), mem)
} else {
let (mem_str, mem) =
mem_finalize_for_show(&mem, state, true, false, false, true);
let mem = mem.pretty_print_default();
format!(
"{}{} {}, {}, 0",
mem_str,
opcode_vrx,
rd_fpr.unwrap_or(rd),
mem
)
}
}
&Inst::Store8 { rd, ref mem }
| &Inst::Store16 { rd, ref mem }
@@ -1523,9 +1540,7 @@ impl Inst {
| &Inst::Store64 { rd, ref mem }
| &Inst::StoreRev16 { rd, ref mem }
| &Inst::StoreRev32 { rd, ref mem }
| &Inst::StoreRev64 { rd, ref mem }
| &Inst::FpuStore32 { rd, ref mem }
| &Inst::FpuStore64 { rd, ref mem } => {
| &Inst::StoreRev64 { rd, ref mem } => {
let (opcode_rx, opcode_rxy, opcode_ril) = match self {
&Inst::Store8 { .. } => (Some("stc"), Some("stcy"), None),
&Inst::Store16 { .. } => (Some("sth"), Some("sthy"), Some("sthrl")),
@@ -1534,8 +1549,6 @@ impl Inst {
&Inst::StoreRev16 { .. } => (None, Some("strvh"), None),
&Inst::StoreRev32 { .. } => (None, Some("strv"), None),
&Inst::StoreRev64 { .. } => (None, Some("strvg"), None),
&Inst::FpuStore32 { .. } => (Some("ste"), Some("stey"), None),
&Inst::FpuStore64 { .. } => (Some("std"), Some("stdy"), None),
_ => unreachable!(),
};
@@ -1586,18 +1599,42 @@ impl Inst {
format!("{}{} {}, {}", mem_str, op, mem, imm)
}
&Inst::FpuStoreRev32 { rd, ref mem } | &Inst::FpuStoreRev64 { rd, ref mem } => {
let rd = pretty_print_reg(rd, allocs);
let mem = mem.with_allocs(allocs);
let (mem_str, mem) = mem_finalize_for_show(&mem, state, true, false, false, true);
let op = match self {
&Inst::FpuStoreRev32 { .. } => "vstebrf",
&Inst::FpuStoreRev64 { .. } => "vstebrg",
&Inst::FpuStore32 { rd, ref mem }
| &Inst::FpuStore64 { rd, ref mem }
| &Inst::FpuStoreRev32 { rd, ref mem }
| &Inst::FpuStoreRev64 { rd, ref mem } => {
let (opcode_rx, opcode_rxy, opcode_vrx) = match self {
&Inst::FpuStore32 { .. } => (Some("ste"), Some("stey"), "vstef"),
&Inst::FpuStore64 { .. } => (Some("std"), Some("stdy"), "vsteg"),
&Inst::FpuStoreRev32 { .. } => (None, None, "vstebrf"),
&Inst::FpuStoreRev64 { .. } => (None, None, "vstebrg"),
_ => unreachable!(),
};
let mem = mem.pretty_print_default();
format!("{}{} {}, {}, 0", mem_str, op, rd, mem)
let (rd, rd_fpr) = pretty_print_fpr(rd, allocs);
let mem = mem.with_allocs(allocs);
if rd_fpr.is_some() && opcode_rx.is_some() {
let (mem_str, mem) =
mem_finalize_for_show(&mem, state, true, true, false, true);
let op = match &mem {
&MemArg::BXD12 { .. } => opcode_rx,
&MemArg::BXD20 { .. } => opcode_rxy,
_ => unreachable!(),
};
let mem = mem.pretty_print_default();
format!("{}{} {}, {}", mem_str, op.unwrap(), rd_fpr.unwrap(), mem)
} else {
let (mem_str, mem) =
mem_finalize_for_show(&mem, state, true, false, false, true);
let mem = mem.pretty_print_default();
format!(
"{}{} {}, {}, 0",
mem_str,
opcode_vrx,
rd_fpr.unwrap_or(rd),
mem
)
}
}
&Inst::LoadMultiple64 { rt, rt2, ref mem } => {
let mem = mem.with_allocs(allocs);
@@ -1704,177 +1741,278 @@ impl Inst {
format!("locghi{} {}, {}", cond, rd, imm)
}
&Inst::FpuMove32 { rd, rn } => {
let rd = pretty_print_reg(rd.to_reg(), allocs);
let rn = pretty_print_reg(rn, allocs);
format!("ler {}, {}", rd, rn)
let (rd, rd_fpr) = pretty_print_fpr(rd.to_reg(), allocs);
let (rn, rn_fpr) = pretty_print_fpr(rn, allocs);
if rd_fpr.is_some() && rn_fpr.is_some() {
format!("ler {}, {}", rd_fpr.unwrap(), rn_fpr.unwrap())
} else {
format!("vlr {}, {}", rd, rn)
}
}
&Inst::FpuMove64 { rd, rn } => {
let rd = pretty_print_reg(rd.to_reg(), allocs);
let rn = pretty_print_reg(rn, allocs);
format!("ldr {}, {}", rd, rn)
let (rd, rd_fpr) = pretty_print_fpr(rd.to_reg(), allocs);
let (rn, rn_fpr) = pretty_print_fpr(rn, allocs);
if rd_fpr.is_some() && rn_fpr.is_some() {
format!("ldr {}, {}", rd_fpr.unwrap(), rn_fpr.unwrap())
} else {
format!("vlr {}, {}", rd, rn)
}
}
&Inst::FpuCMov32 { rd, cond, rm } => {
let rd = pretty_print_reg(rd.to_reg(), allocs);
let rm = pretty_print_reg(rm, allocs);
let cond = cond.invert().pretty_print_default();
format!("j{} 6 ; ler {}, {}", cond, rd, rm)
let (rd, rd_fpr) = pretty_print_fpr(rd.to_reg(), allocs);
let (rm, rm_fpr) = pretty_print_fpr(rm, allocs);
if rd_fpr.is_some() && rm_fpr.is_some() {
let cond = cond.invert().pretty_print_default();
format!("j{} 6 ; ler {}, {}", cond, rd_fpr.unwrap(), rm_fpr.unwrap())
} else {
let cond = cond.invert().pretty_print_default();
format!("j{} 10 ; vlr {}, {}", cond, rd, rm)
}
}
&Inst::FpuCMov64 { rd, cond, rm } => {
let rd = pretty_print_reg(rd.to_reg(), allocs);
let rm = pretty_print_reg(rm, allocs);
let cond = cond.invert().pretty_print_default();
format!("j{} 6 ; ldr {}, {}", cond, rd, rm)
let (rd, rd_fpr) = pretty_print_fpr(rd.to_reg(), allocs);
let (rm, rm_fpr) = pretty_print_fpr(rm, allocs);
if rd_fpr.is_some() && rm_fpr.is_some() {
let cond = cond.invert().pretty_print_default();
format!("j{} 6 ; ldr {}, {}", cond, rd_fpr.unwrap(), rm_fpr.unwrap())
} else {
let cond = cond.invert().pretty_print_default();
format!("j{} 10 ; vlr {}, {}", cond, rd, rm)
}
}
&Inst::MovToFpr { rd, rn } => {
&Inst::MovToFpr32 { rd, rn } => {
let rd = pretty_print_reg(rd.to_reg(), allocs);
let rn = pretty_print_reg(rn, allocs);
format!("ldgr {}, {}", rd, rn)
format!("vlvgf {}, {}, 0", rd, rn)
}
&Inst::MovFromFpr { rd, rn } => {
&Inst::MovToFpr64 { rd, rn } => {
let (rd, rd_fpr) = pretty_print_fpr(rd.to_reg(), allocs);
let rn = pretty_print_reg(rn, allocs);
if rd_fpr.is_some() {
format!("ldgr {}, {}", rd_fpr.unwrap(), rn)
} else {
format!("vlvgg {}, {}, 0", rd, rn)
}
}
&Inst::MovFromFpr32 { rd, rn } => {
let rd = pretty_print_reg(rd.to_reg(), allocs);
let rn = pretty_print_reg(rn, allocs);
format!("lgdr {}, {}", rd, rn)
format!("vlgvf {}, {}, 0", rd, rn)
}
&Inst::MovFromFpr64 { rd, rn } => {
let rd = pretty_print_reg(rd.to_reg(), allocs);
let (rn, rn_fpr) = pretty_print_fpr(rn, allocs);
if rn_fpr.is_some() {
format!("lgdr {}, {}", rd, rn_fpr.unwrap())
} else {
format!("vlgvg {}, {}, 0", rd, rn)
}
}
&Inst::FpuRR { fpu_op, rd, rn } => {
let op = match fpu_op {
FPUOp1::Abs32 => "lpebr",
FPUOp1::Abs64 => "lpdbr",
FPUOp1::Neg32 => "lcebr",
FPUOp1::Neg64 => "lcdbr",
FPUOp1::NegAbs32 => "lnebr",
FPUOp1::NegAbs64 => "lndbr",
FPUOp1::Sqrt32 => "sqebr",
FPUOp1::Sqrt64 => "sqdbr",
FPUOp1::Cvt32To64 => "ldebr",
FPUOp1::Cvt64To32 => "ledbr",
let (op, op_fpr) = match fpu_op {
FPUOp1::Abs32 => ("wflpsb", "lpebr"),
FPUOp1::Abs64 => ("wflpdb", "lpdbr"),
FPUOp1::Neg32 => ("wflcsb", "lcebr"),
FPUOp1::Neg64 => ("wflcdb", "lcdbr"),
FPUOp1::NegAbs32 => ("wflnsb", "lnebr"),
FPUOp1::NegAbs64 => ("wflndb", "lndbr"),
FPUOp1::Sqrt32 => ("wfsqsb", "sqebr"),
FPUOp1::Sqrt64 => ("wfsqdb", "sqdbr"),
FPUOp1::Cvt32To64 => ("wldeb", "ldebr"),
};
let rd = pretty_print_reg(rd.to_reg(), allocs);
let rn = pretty_print_reg(rn, allocs);
format!("{} {}, {}", op, rd, rn)
let (rd, rd_fpr) = pretty_print_fpr(rd.to_reg(), allocs);
let (rn, rn_fpr) = pretty_print_fpr(rn, allocs);
if rd_fpr.is_some() && rn_fpr.is_some() {
format!("{} {}, {}", op_fpr, rd_fpr.unwrap(), rn_fpr.unwrap())
} else {
format!("{} {}, {}", op, rd_fpr.unwrap_or(rd), rn_fpr.unwrap_or(rn))
}
}
&Inst::FpuRRR { fpu_op, rd, rm } => {
let op = match fpu_op {
FPUOp2::Add32 => "aebr",
FPUOp2::Add64 => "adbr",
FPUOp2::Sub32 => "sebr",
FPUOp2::Sub64 => "sdbr",
FPUOp2::Mul32 => "meebr",
FPUOp2::Mul64 => "mdbr",
FPUOp2::Div32 => "debr",
FPUOp2::Div64 => "ddbr",
_ => unimplemented!(),
&Inst::FpuRRR { fpu_op, rd, rn, rm } => {
let (op, opt_m6, op_fpr) = match fpu_op {
FPUOp2::Add32 => ("wfasb", "", Some("aebr")),
FPUOp2::Add64 => ("wfadb", "", Some("adbr")),
FPUOp2::Sub32 => ("wfssb", "", Some("sebr")),
FPUOp2::Sub64 => ("wfsdb", "", Some("sdbr")),
FPUOp2::Mul32 => ("wfmsb", "", Some("meebr")),
FPUOp2::Mul64 => ("wfmdb", "", Some("mdbr")),
FPUOp2::Div32 => ("wfdsb", "", Some("debr")),
FPUOp2::Div64 => ("wfddb", "", Some("ddbr")),
FPUOp2::Max32 => ("wfmaxsb", ", 1", None),
FPUOp2::Max64 => ("wfmaxdb", ", 1", None),
FPUOp2::Min32 => ("wfminsb", ", 1", None),
FPUOp2::Min64 => ("wfmindb", ", 1", None),
};
let rd = pretty_print_reg(rd.to_reg(), allocs);
let rm = pretty_print_reg(rm, allocs);
format!("{} {}, {}", op, rd, rm)
let (rd, rd_fpr) = pretty_print_fpr(rd.to_reg(), allocs);
let (rn, rn_fpr) = pretty_print_fpr(rn, allocs);
let (rm, rm_fpr) = pretty_print_fpr(rm, allocs);
if op_fpr.is_some() && rd == rn && rd_fpr.is_some() && rm_fpr.is_some() {
format!(
"{} {}, {}",
op_fpr.unwrap(),
rd_fpr.unwrap(),
rm_fpr.unwrap()
)
} else {
format!(
"{} {}, {}, {}{}",
op,
rd_fpr.unwrap_or(rd),
rn_fpr.unwrap_or(rn),
rm_fpr.unwrap_or(rm),
opt_m6
)
}
}
&Inst::FpuRRRR { fpu_op, rd, rn, rm } => {
let op = match fpu_op {
FPUOp3::MAdd32 => "maebr",
FPUOp3::MAdd64 => "madbr",
FPUOp3::MSub32 => "msebr",
FPUOp3::MSub64 => "msdbr",
&Inst::FpuRRRR {
fpu_op,
rd,
rn,
rm,
ra,
} => {
let (op, op_fpr) = match fpu_op {
FPUOp3::MAdd32 => ("wfmasb", "maebr"),
FPUOp3::MAdd64 => ("wfmadb", "madbr"),
FPUOp3::MSub32 => ("wfmssb", "msebr"),
FPUOp3::MSub64 => ("wfmsdb", "msdbr"),
};
let rd = pretty_print_reg(rd.to_reg(), allocs);
let rn = pretty_print_reg(rn, allocs);
let rm = pretty_print_reg(rm, allocs);
format!("{} {}, {}, {}", op, rd, rn, rm)
}
&Inst::FpuCopysign { rd, rn, rm } => {
let rd = pretty_print_reg(rd.to_reg(), allocs);
let rn = pretty_print_reg(rn, allocs);
let rm = pretty_print_reg(rm, allocs);
format!("cpsdr {}, {}, {}", rd, rm, rn)
let (rd, rd_fpr) = pretty_print_fpr(rd.to_reg(), allocs);
let (rn, rn_fpr) = pretty_print_fpr(rn, allocs);
let (rm, rm_fpr) = pretty_print_fpr(rm, allocs);
let (ra, ra_fpr) = pretty_print_fpr(ra, allocs);
if rd == ra && rd_fpr.is_some() && rn_fpr.is_some() && rm_fpr.is_some() {
format!(
"{} {}, {}, {}",
op_fpr,
rd_fpr.unwrap(),
rn_fpr.unwrap(),
rm_fpr.unwrap()
)
} else {
format!(
"{} {}, {}, {}, {}",
op,
rd_fpr.unwrap_or(rd),
rn_fpr.unwrap_or(rn),
rm_fpr.unwrap_or(rm),
ra_fpr.unwrap_or(ra)
)
}
}
&Inst::FpuCmp32 { rn, rm } => {
let rn = pretty_print_reg(rn, allocs);
let rm = pretty_print_reg(rm, allocs);
format!("cebr {}, {}", rn, rm)
let (rn, rn_fpr) = pretty_print_fpr(rn, allocs);
let (rm, rm_fpr) = pretty_print_fpr(rm, allocs);
if rn_fpr.is_some() && rm_fpr.is_some() {
format!("cebr {}, {}", rn_fpr.unwrap(), rm_fpr.unwrap())
} else {
format!("wfcsb {}, {}", rn_fpr.unwrap_or(rn), rm_fpr.unwrap_or(rm))
}
}
&Inst::FpuCmp64 { rn, rm } => {
let rn = pretty_print_reg(rn, allocs);
let rm = pretty_print_reg(rm, allocs);
format!("cdbr {}, {}", rn, rm)
let (rn, rn_fpr) = pretty_print_fpr(rn, allocs);
let (rm, rm_fpr) = pretty_print_fpr(rm, allocs);
if rn_fpr.is_some() && rm_fpr.is_some() {
format!("cdbr {}, {}", rn_fpr.unwrap(), rm_fpr.unwrap())
} else {
format!("wfcdb {}, {}", rn_fpr.unwrap_or(rn), rm_fpr.unwrap_or(rm))
}
}
&Inst::LoadFpuConst32 { rd, const_data } => {
let rd = pretty_print_reg(rd.to_reg(), allocs);
let (rd, rd_fpr) = pretty_print_fpr(rd.to_reg(), allocs);
let tmp = pretty_print_reg(writable_spilltmp_reg().to_reg(), &mut empty_allocs);
format!(
"bras {}, 8 ; data.f32 {} ; le {}, 0({})",
tmp,
f32::from_bits(const_data),
rd,
tmp
)
if rd_fpr.is_some() {
format!(
"bras {}, 8 ; data.f32 {} ; le {}, 0({})",
tmp,
f32::from_bits(const_data),
rd_fpr.unwrap(),
tmp
)
} else {
format!(
"bras {}, 8 ; data.f32 {} ; vlef {}, 0({}), 0",
tmp,
f32::from_bits(const_data),
rd,
tmp
)
}
}
&Inst::LoadFpuConst64 { rd, const_data } => {
let rd = pretty_print_reg(rd.to_reg(), allocs);
let (rd, rd_fpr) = pretty_print_fpr(rd.to_reg(), allocs);
let tmp = pretty_print_reg(writable_spilltmp_reg().to_reg(), &mut empty_allocs);
format!(
"bras {}, 12 ; data.f64 {} ; ld {}, 0({})",
tmp,
f64::from_bits(const_data),
rd,
tmp
)
if rd_fpr.is_some() {
format!(
"bras {}, 12 ; data.f64 {} ; ld {}, 0({})",
tmp,
f64::from_bits(const_data),
rd_fpr.unwrap(),
tmp
)
} else {
format!(
"bras {}, 12 ; data.f64 {} ; vleg {}, 0({}), 0",
tmp,
f64::from_bits(const_data),
rd,
tmp
)
}
}
&Inst::FpuToInt { op, rd, rn } => {
let op = match op {
FpuToIntOp::F32ToI32 => "cfebra",
FpuToIntOp::F32ToU32 => "clfebr",
FpuToIntOp::F32ToI64 => "cgebra",
FpuToIntOp::F32ToU64 => "clgebr",
FpuToIntOp::F64ToI32 => "cfdbra",
FpuToIntOp::F64ToU32 => "clfdbr",
FpuToIntOp::F64ToI64 => "cgdbra",
FpuToIntOp::F64ToU64 => "clgdbr",
&Inst::FpuRound { op, mode, rd, rn } => {
let mode = match mode {
FpuRoundMode::Current => 0,
FpuRoundMode::ToNearest => 1,
FpuRoundMode::ShorterPrecision => 3,
FpuRoundMode::ToNearestTiesToEven => 4,
FpuRoundMode::ToZero => 5,
FpuRoundMode::ToPosInfinity => 6,
FpuRoundMode::ToNegInfinity => 7,
};
let rd = pretty_print_reg(rd.to_reg(), allocs);
let rn = pretty_print_reg(rn, allocs);
format!("{} {}, 5, {}, 0", op, rd, rn)
let (opcode, opcode_fpr) = match op {
FpuRoundOp::Cvt64To32 => ("wledb", Some("ledbra")),
FpuRoundOp::Round32 => ("wfisb", Some("fiebr")),
FpuRoundOp::Round64 => ("wfidb", Some("fidbr")),
FpuRoundOp::ToSInt32 => ("wcfeb", None),
FpuRoundOp::ToSInt64 => ("wcgdb", None),
FpuRoundOp::ToUInt32 => ("wclfeb", None),
FpuRoundOp::ToUInt64 => ("wclgdb", None),
FpuRoundOp::FromSInt32 => ("wcefb", None),
FpuRoundOp::FromSInt64 => ("wcdgb", None),
FpuRoundOp::FromUInt32 => ("wcelfb", None),
FpuRoundOp::FromUInt64 => ("wcdlgb", None),
};
let (rd, rd_fpr) = pretty_print_fpr(rd.to_reg(), allocs);
let (rn, rn_fpr) = pretty_print_fpr(rn, allocs);
if opcode_fpr.is_some() && rd_fpr.is_some() && rn_fpr.is_some() {
format!(
"{} {}, {}, {}",
opcode_fpr.unwrap(),
rd_fpr.unwrap(),
rn_fpr.unwrap(),
mode
)
} else {
format!(
"{} {}, {}, 0, {}",
opcode,
rd_fpr.unwrap_or(rd),
rn_fpr.unwrap_or(rn),
mode
)
}
}
&Inst::IntToFpu { op, rd, rn } => {
let op = match op {
IntToFpuOp::I32ToF32 => "cefbra",
IntToFpuOp::U32ToF32 => "celfbr",
IntToFpuOp::I64ToF32 => "cegbra",
IntToFpuOp::U64ToF32 => "celgbr",
IntToFpuOp::I32ToF64 => "cdfbra",
IntToFpuOp::U32ToF64 => "cdlfbr",
IntToFpuOp::I64ToF64 => "cdgbra",
IntToFpuOp::U64ToF64 => "cdlgbr",
};
let rd = pretty_print_reg(rd.to_reg(), allocs);
let rn = pretty_print_reg(rn, allocs);
format!("{} {}, 0, {}, 0", op, rd, rn)
}
&Inst::FpuRound { op, rd, rn } => {
let (op, m3) = match op {
FpuRoundMode::Minus32 => ("fiebr", 7),
FpuRoundMode::Minus64 => ("fidbr", 7),
FpuRoundMode::Plus32 => ("fiebr", 6),
FpuRoundMode::Plus64 => ("fidbr", 6),
FpuRoundMode::Zero32 => ("fiebr", 5),
FpuRoundMode::Zero64 => ("fidbr", 5),
FpuRoundMode::Nearest32 => ("fiebr", 4),
FpuRoundMode::Nearest64 => ("fidbr", 4),
};
let rd = pretty_print_reg(rd.to_reg(), allocs);
let rn = pretty_print_reg(rn, allocs);
format!("{} {}, {}, {}", op, rd, rn, m3)
}
&Inst::FpuVecRRR { fpu_op, rd, rn, rm } => {
let op = match fpu_op {
FPUOp2::Max32 => "wfmaxsb",
FPUOp2::Max64 => "wfmaxdb",
FPUOp2::Min32 => "wfminsb",
FPUOp2::Min64 => "wfmindb",
_ => unimplemented!(),
};
&Inst::VecSelect { rd, rn, rm, ra } => {
let rd = pretty_print_reg(rd.to_reg(), allocs);
let rn = pretty_print_reg(rn, allocs);
let rm = pretty_print_reg(rm, allocs);
format!("{} {}, {}, {}, 1", op, rd, rn, rm)
let ra = pretty_print_reg(ra, allocs);
format!("vsel {}, {}, {}, {}", rd, rn, rm, ra)
}
&Inst::Extend {
rd,

View File

@@ -27,21 +27,28 @@ pub fn writable_gpr(num: u8) -> Writable<Reg> {
Writable::from_reg(gpr(num))
}
/// Get a reference to a FPR (floating-point register).
pub fn fpr(num: u8) -> Reg {
let preg = fpr_preg(num);
/// Get a reference to a VR (vector register).
pub fn vr(num: u8) -> Reg {
let preg = vr_preg(num);
Reg::from(VReg::new(preg.index(), RegClass::Float))
}
pub(crate) const fn fpr_preg(num: u8) -> PReg {
assert!(num < 16);
pub(crate) const fn vr_preg(num: u8) -> PReg {
assert!(num < 32);
PReg::new(num as usize, RegClass::Float)
}
/// Get a writable reference to a FPR.
/// Get a writable reference to a VR.
#[allow(dead_code)] // used by tests.
pub fn writable_fpr(num: u8) -> Writable<Reg> {
Writable::from_reg(fpr(num))
pub fn writable_vr(num: u8) -> Writable<Reg> {
Writable::from_reg(vr(num))
}
/// Test whether a vector register is overlapping an FPR.
pub fn is_fpr(r: Reg) -> bool {
let r = r.to_real_reg().unwrap();
assert!(r.class() == RegClass::Float);
return r.hw_enc() < 16;
}
/// Get a reference to the stack-pointer register.
@@ -92,14 +99,30 @@ pub fn create_machine_env(_flags: &settings::Flags) -> MachineEnv {
preg(gpr(5)),
],
vec![
preg(fpr(0)),
preg(fpr(1)),
preg(fpr(2)),
preg(fpr(3)),
preg(fpr(4)),
preg(fpr(5)),
preg(fpr(6)),
preg(fpr(7)),
preg(vr(0)),
preg(vr(1)),
preg(vr(2)),
preg(vr(3)),
preg(vr(4)),
preg(vr(5)),
preg(vr(6)),
preg(vr(7)),
preg(vr(16)),
preg(vr(17)),
preg(vr(18)),
preg(vr(19)),
preg(vr(20)),
preg(vr(21)),
preg(vr(22)),
preg(vr(23)),
preg(vr(24)),
preg(vr(25)),
preg(vr(26)),
preg(vr(27)),
preg(vr(28)),
preg(vr(29)),
preg(vr(30)),
preg(vr(31)),
],
],
non_preferred_regs_by_class: [
@@ -116,14 +139,14 @@ pub fn create_machine_env(_flags: &settings::Flags) -> MachineEnv {
// no r15; it is the stack pointer.
],
vec![
preg(fpr(8)),
preg(fpr(9)),
preg(fpr(10)),
preg(fpr(11)),
preg(fpr(12)),
preg(fpr(13)),
preg(fpr(14)),
preg(fpr(15)),
preg(vr(8)),
preg(vr(9)),
preg(vr(10)),
preg(vr(11)),
preg(vr(12)),
preg(vr(13)),
preg(vr(14)),
preg(vr(15)),
],
],
fixed_stack_slots: vec![],
@@ -134,14 +157,28 @@ pub fn show_reg(reg: Reg) -> String {
if let Some(rreg) = reg.to_real_reg() {
match rreg.class() {
RegClass::Int => format!("%r{}", rreg.hw_enc()),
RegClass::Float => format!("%f{}", rreg.hw_enc()),
RegClass::Float => format!("%v{}", rreg.hw_enc()),
}
} else {
format!("%{:?}", reg)
}
}
pub fn maybe_show_fpr(reg: Reg) -> Option<String> {
if let Some(rreg) = reg.to_real_reg() {
if is_fpr(reg) {
return Some(format!("%f{}", rreg.hw_enc()));
}
}
None
}
pub fn pretty_print_reg(reg: Reg, allocs: &mut AllocationConsumer<'_>) -> String {
let reg = allocs.next(reg);
show_reg(reg)
}
pub fn pretty_print_fpr(reg: Reg, allocs: &mut AllocationConsumer<'_>) -> (String, Option<String>) {
let reg = allocs.next(reg);
(show_reg(reg), maybe_show_fpr(reg))
}

View File

@@ -45,7 +45,7 @@ pub fn map_reg(reg: Reg) -> Result<Register, RegisterMappingError> {
Register(14),
Register(15),
];
const FPR_MAP: [gimli::Register; 16] = [
const VR_MAP: [gimli::Register; 32] = [
Register(16),
Register(20),
Register(17),
@@ -62,11 +62,27 @@ pub fn map_reg(reg: Reg) -> Result<Register, RegisterMappingError> {
Register(30),
Register(27),
Register(31),
Register(68),
Register(72),
Register(69),
Register(73),
Register(70),
Register(74),
Register(71),
Register(75),
Register(76),
Register(80),
Register(77),
Register(81),
Register(78),
Register(82),
Register(79),
Register(83),
];
match reg.class() {
RegClass::Int => Ok(GPR_MAP[reg.to_real_reg().unwrap().hw_enc() as usize]),
RegClass::Float => Ok(FPR_MAP[reg.to_real_reg().unwrap().hw_enc() as usize]),
RegClass::Float => Ok(VR_MAP[reg.to_real_reg().unwrap().hw_enc() as usize]),
}
}