AArch64: Add various missing SIMD bits
In addition, improve the code for stack pointer manipulation. Copyright (c) 2020, Arm Limited.
This commit is contained in:
@@ -311,11 +311,12 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
|||||||
|
|
||||||
fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallVec<[Inst; 2]> {
|
fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallVec<[Inst; 2]> {
|
||||||
let mut insts = SmallVec::new();
|
let mut insts = SmallVec::new();
|
||||||
insts.push(Inst::AluRRR {
|
insts.push(Inst::AluRRRExtend {
|
||||||
alu_op: ALUOp::SubS64XR,
|
alu_op: ALUOp::SubS64,
|
||||||
rd: writable_zero_reg(),
|
rd: writable_zero_reg(),
|
||||||
rn: stack_reg(),
|
rn: stack_reg(),
|
||||||
rm: limit_reg,
|
rm: limit_reg,
|
||||||
|
extendop: ExtendOp::UXTX,
|
||||||
});
|
});
|
||||||
insts.push(Inst::TrapIf {
|
insts.push(Inst::TrapIf {
|
||||||
trap_info: (ir::SourceLoc::default(), ir::TrapCode::StackOverflow),
|
trap_info: (ir::SourceLoc::default(), ir::TrapCode::StackOverflow),
|
||||||
@@ -373,10 +374,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
|||||||
ret.push(adj_inst);
|
ret.push(adj_inst);
|
||||||
} else {
|
} else {
|
||||||
let tmp = writable_spilltmp_reg();
|
let tmp = writable_spilltmp_reg();
|
||||||
let const_inst = Inst::LoadConst64 {
|
let const_inst = Inst::load_constant(tmp, amount);
|
||||||
rd: tmp,
|
|
||||||
const_data: amount,
|
|
||||||
};
|
|
||||||
let adj_inst = Inst::AluRRRExtend {
|
let adj_inst = Inst::AluRRRExtend {
|
||||||
alu_op,
|
alu_op,
|
||||||
rd: writable_stack_reg(),
|
rd: writable_stack_reg(),
|
||||||
@@ -384,7 +382,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
|
|||||||
rm: tmp.to_reg(),
|
rm: tmp.to_reg(),
|
||||||
extendop: ExtendOp::UXTX,
|
extendop: ExtendOp::UXTX,
|
||||||
};
|
};
|
||||||
ret.push(const_inst);
|
ret.extend(const_inst);
|
||||||
ret.push(adj_inst);
|
ret.push(adj_inst);
|
||||||
}
|
}
|
||||||
ret
|
ret
|
||||||
|
|||||||
@@ -575,7 +575,7 @@ impl ScalarSize {
|
|||||||
32 => ScalarSize::Size32,
|
32 => ScalarSize::Size32,
|
||||||
64 => ScalarSize::Size64,
|
64 => ScalarSize::Size64,
|
||||||
128 => ScalarSize::Size128,
|
128 => ScalarSize::Size128,
|
||||||
_ => panic!("Unexpected type width"),
|
w => panic!("Unexpected type width: {}", w),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -591,7 +591,7 @@ impl ScalarSize {
|
|||||||
ScalarSize::Size16 => 0b11,
|
ScalarSize::Size16 => 0b11,
|
||||||
ScalarSize::Size32 => 0b00,
|
ScalarSize::Size32 => 0b00,
|
||||||
ScalarSize::Size64 => 0b01,
|
ScalarSize::Size64 => 0b01,
|
||||||
_ => panic!("Unexpected scalar FP operand size"),
|
_ => panic!("Unexpected scalar FP operand size: {:?}", self),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -612,6 +612,7 @@ impl VectorSize {
|
|||||||
/// Convert from a type into a vector operand size.
|
/// Convert from a type into a vector operand size.
|
||||||
pub fn from_ty(ty: Type) -> VectorSize {
|
pub fn from_ty(ty: Type) -> VectorSize {
|
||||||
match ty {
|
match ty {
|
||||||
|
B32X4 => VectorSize::Size32x4,
|
||||||
F32X2 => VectorSize::Size32x2,
|
F32X2 => VectorSize::Size32x2,
|
||||||
F32X4 => VectorSize::Size32x4,
|
F32X4 => VectorSize::Size32x4,
|
||||||
F64X2 => VectorSize::Size64x2,
|
F64X2 => VectorSize::Size64x2,
|
||||||
@@ -622,7 +623,7 @@ impl VectorSize {
|
|||||||
I32X2 => VectorSize::Size32x2,
|
I32X2 => VectorSize::Size32x2,
|
||||||
I32X4 => VectorSize::Size32x4,
|
I32X4 => VectorSize::Size32x4,
|
||||||
I64X2 => VectorSize::Size64x2,
|
I64X2 => VectorSize::Size64x2,
|
||||||
_ => unimplemented!(),
|
_ => unimplemented!("Unsupported type: {}", ty),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -497,7 +497,6 @@ impl MachInstEmit for Inst {
|
|||||||
ALUOp::AddS64 => 0b10101011_000,
|
ALUOp::AddS64 => 0b10101011_000,
|
||||||
ALUOp::SubS32 => 0b01101011_000,
|
ALUOp::SubS32 => 0b01101011_000,
|
||||||
ALUOp::SubS64 => 0b11101011_000,
|
ALUOp::SubS64 => 0b11101011_000,
|
||||||
ALUOp::SubS64XR => 0b11101011_001,
|
|
||||||
ALUOp::SDiv64 => 0b10011010_110,
|
ALUOp::SDiv64 => 0b10011010_110,
|
||||||
ALUOp::UDiv64 => 0b10011010_110,
|
ALUOp::UDiv64 => 0b10011010_110,
|
||||||
ALUOp::RotR32 | ALUOp::Lsr32 | ALUOp::Asr32 | ALUOp::Lsl32 => 0b00011010_110,
|
ALUOp::RotR32 | ALUOp::Lsr32 | ALUOp::Asr32 | ALUOp::Lsl32 => 0b00011010_110,
|
||||||
@@ -512,17 +511,13 @@ impl MachInstEmit for Inst {
|
|||||||
ALUOp::Lsr32 | ALUOp::Lsr64 => 0b001001,
|
ALUOp::Lsr32 | ALUOp::Lsr64 => 0b001001,
|
||||||
ALUOp::Asr32 | ALUOp::Asr64 => 0b001010,
|
ALUOp::Asr32 | ALUOp::Asr64 => 0b001010,
|
||||||
ALUOp::Lsl32 | ALUOp::Lsl64 => 0b001000,
|
ALUOp::Lsl32 | ALUOp::Lsl64 => 0b001000,
|
||||||
ALUOp::SubS64XR => 0b011000,
|
|
||||||
ALUOp::SMulH | ALUOp::UMulH => 0b011111,
|
ALUOp::SMulH | ALUOp::UMulH => 0b011111,
|
||||||
_ => 0b000000,
|
_ => 0b000000,
|
||||||
};
|
};
|
||||||
debug_assert_ne!(writable_stack_reg(), rd);
|
debug_assert_ne!(writable_stack_reg(), rd);
|
||||||
// The stack pointer is the zero register if this instruction
|
// The stack pointer is the zero register in this context, so this might be an
|
||||||
// doesn't have access to extended registers, so this might be
|
// indication that something is wrong.
|
||||||
// an indication that something is wrong.
|
debug_assert_ne!(stack_reg(), rn);
|
||||||
if alu_op != ALUOp::SubS64XR {
|
|
||||||
debug_assert_ne!(stack_reg(), rn);
|
|
||||||
}
|
|
||||||
debug_assert_ne!(stack_reg(), rm);
|
debug_assert_ne!(stack_reg(), rm);
|
||||||
sink.put4(enc_arith_rrr(top11, bit15_10, rd, rn, rm));
|
sink.put4(enc_arith_rrr(top11, bit15_10, rd, rn, rm));
|
||||||
}
|
}
|
||||||
@@ -2079,19 +2074,6 @@ impl MachInstEmit for Inst {
|
|||||||
// disable the worst-case-size check in this case.
|
// disable the worst-case-size check in this case.
|
||||||
start_off = sink.cur_offset();
|
start_off = sink.cur_offset();
|
||||||
}
|
}
|
||||||
&Inst::LoadConst64 { rd, const_data } => {
|
|
||||||
let inst = Inst::ULoad64 {
|
|
||||||
rd,
|
|
||||||
mem: AMode::Label(MemLabel::PCRel(8)),
|
|
||||||
srcloc: None, // can't cause a user trap.
|
|
||||||
};
|
|
||||||
inst.emit(sink, flags, state);
|
|
||||||
let inst = Inst::Jump {
|
|
||||||
dest: BranchTarget::ResolvedOffset(12),
|
|
||||||
};
|
|
||||||
inst.emit(sink, flags, state);
|
|
||||||
sink.put8(const_data);
|
|
||||||
}
|
|
||||||
&Inst::LoadExtName {
|
&Inst::LoadExtName {
|
||||||
rd,
|
rd,
|
||||||
ref name,
|
ref name,
|
||||||
|
|||||||
@@ -777,14 +777,15 @@ fn test_aarch64_binemit() {
|
|||||||
));
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::AluRRR {
|
Inst::AluRRRExtend {
|
||||||
alu_op: ALUOp::SubS64XR,
|
alu_op: ALUOp::SubS64,
|
||||||
rd: writable_zero_reg(),
|
rd: writable_zero_reg(),
|
||||||
rn: stack_reg(),
|
rn: stack_reg(),
|
||||||
rm: xreg(12),
|
rm: xreg(12),
|
||||||
|
extendop: ExtendOp::UXTX,
|
||||||
},
|
},
|
||||||
"FF632CEB",
|
"FF632CEB",
|
||||||
"subs xzr, sp, x12",
|
"subs xzr, sp, x12, UXTX",
|
||||||
));
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
|
|||||||
@@ -45,15 +45,11 @@ pub enum ALUOp {
|
|||||||
Sub64,
|
Sub64,
|
||||||
Orr32,
|
Orr32,
|
||||||
Orr64,
|
Orr64,
|
||||||
/// NOR
|
|
||||||
OrrNot32,
|
OrrNot32,
|
||||||
/// NOR
|
|
||||||
OrrNot64,
|
OrrNot64,
|
||||||
And32,
|
And32,
|
||||||
And64,
|
And64,
|
||||||
/// NAND
|
|
||||||
AndNot32,
|
AndNot32,
|
||||||
/// NAND
|
|
||||||
AndNot64,
|
AndNot64,
|
||||||
/// XOR (AArch64 calls this "EOR")
|
/// XOR (AArch64 calls this "EOR")
|
||||||
Eor32,
|
Eor32,
|
||||||
@@ -71,8 +67,6 @@ pub enum ALUOp {
|
|||||||
SubS32,
|
SubS32,
|
||||||
/// Sub, setting flags
|
/// Sub, setting flags
|
||||||
SubS64,
|
SubS64,
|
||||||
/// Sub, setting flags, using extended registers
|
|
||||||
SubS64XR,
|
|
||||||
/// Signed multiply, high-word result
|
/// Signed multiply, high-word result
|
||||||
SMulH,
|
SMulH,
|
||||||
/// Unsigned multiply, high-word result
|
/// Unsigned multiply, high-word result
|
||||||
@@ -1078,12 +1072,6 @@ pub enum Inst {
|
|||||||
rtmp2: Writable<Reg>,
|
rtmp2: Writable<Reg>,
|
||||||
},
|
},
|
||||||
|
|
||||||
/// Load an inline constant.
|
|
||||||
LoadConst64 {
|
|
||||||
rd: Writable<Reg>,
|
|
||||||
const_data: u64,
|
|
||||||
},
|
|
||||||
|
|
||||||
/// Load an inline symbol reference.
|
/// Load an inline symbol reference.
|
||||||
LoadExtName {
|
LoadExtName {
|
||||||
rd: Writable<Reg>,
|
rd: Writable<Reg>,
|
||||||
@@ -1309,7 +1297,22 @@ impl Inst {
|
|||||||
mem,
|
mem,
|
||||||
srcloc: None,
|
srcloc: None,
|
||||||
},
|
},
|
||||||
_ => unimplemented!("gen_load({})", ty),
|
_ => {
|
||||||
|
if ty.is_vector() {
|
||||||
|
let bits = ty_bits(ty);
|
||||||
|
let rd = into_reg;
|
||||||
|
let srcloc = None;
|
||||||
|
|
||||||
|
if bits == 128 {
|
||||||
|
Inst::FpuLoad128 { rd, mem, srcloc }
|
||||||
|
} else {
|
||||||
|
assert_eq!(bits, 64);
|
||||||
|
Inst::FpuLoad64 { rd, mem, srcloc }
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
unimplemented!("gen_load({})", ty);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1346,7 +1349,22 @@ impl Inst {
|
|||||||
mem,
|
mem,
|
||||||
srcloc: None,
|
srcloc: None,
|
||||||
},
|
},
|
||||||
_ => unimplemented!("gen_store({})", ty),
|
_ => {
|
||||||
|
if ty.is_vector() {
|
||||||
|
let bits = ty_bits(ty);
|
||||||
|
let rd = from_reg;
|
||||||
|
let srcloc = None;
|
||||||
|
|
||||||
|
if bits == 128 {
|
||||||
|
Inst::FpuStore128 { rd, mem, srcloc }
|
||||||
|
} else {
|
||||||
|
assert_eq!(bits, 64);
|
||||||
|
Inst::FpuStore64 { rd, mem, srcloc }
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
unimplemented!("gen_store({})", ty);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1736,7 +1754,7 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
|||||||
collector.add_def(rtmp1);
|
collector.add_def(rtmp1);
|
||||||
collector.add_def(rtmp2);
|
collector.add_def(rtmp2);
|
||||||
}
|
}
|
||||||
&Inst::LoadConst64 { rd, .. } | &Inst::LoadExtName { rd, .. } => {
|
&Inst::LoadExtName { rd, .. } => {
|
||||||
collector.add_def(rd);
|
collector.add_def(rd);
|
||||||
}
|
}
|
||||||
&Inst::LoadAddr { rd, mem: _ } => {
|
&Inst::LoadAddr { rd, mem: _ } => {
|
||||||
@@ -2427,9 +2445,6 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
|||||||
map_def(mapper, rtmp1);
|
map_def(mapper, rtmp1);
|
||||||
map_def(mapper, rtmp2);
|
map_def(mapper, rtmp2);
|
||||||
}
|
}
|
||||||
&mut Inst::LoadConst64 { ref mut rd, .. } => {
|
|
||||||
map_def(mapper, rd);
|
|
||||||
}
|
|
||||||
&mut Inst::LoadExtName { ref mut rd, .. } => {
|
&mut Inst::LoadExtName { ref mut rd, .. } => {
|
||||||
map_def(mapper, rd);
|
map_def(mapper, rd);
|
||||||
}
|
}
|
||||||
@@ -2632,7 +2647,6 @@ impl Inst {
|
|||||||
ALUOp::AddS64 => ("adds", OperandSize::Size64),
|
ALUOp::AddS64 => ("adds", OperandSize::Size64),
|
||||||
ALUOp::SubS32 => ("subs", OperandSize::Size32),
|
ALUOp::SubS32 => ("subs", OperandSize::Size32),
|
||||||
ALUOp::SubS64 => ("subs", OperandSize::Size64),
|
ALUOp::SubS64 => ("subs", OperandSize::Size64),
|
||||||
ALUOp::SubS64XR => ("subs", OperandSize::Size64),
|
|
||||||
ALUOp::SMulH => ("smulh", OperandSize::Size64),
|
ALUOp::SMulH => ("smulh", OperandSize::Size64),
|
||||||
ALUOp::UMulH => ("umulh", OperandSize::Size64),
|
ALUOp::UMulH => ("umulh", OperandSize::Size64),
|
||||||
ALUOp::SDiv64 => ("sdiv", OperandSize::Size64),
|
ALUOp::SDiv64 => ("sdiv", OperandSize::Size64),
|
||||||
@@ -3535,10 +3549,6 @@ impl Inst {
|
|||||||
info.targets
|
info.targets
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
&Inst::LoadConst64 { rd, const_data } => {
|
|
||||||
let rd = rd.show_rru(mb_rru);
|
|
||||||
format!("ldr {}, 8 ; b 12 ; data {:?}", rd, const_data)
|
|
||||||
}
|
|
||||||
&Inst::LoadExtName {
|
&Inst::LoadExtName {
|
||||||
rd,
|
rd,
|
||||||
ref name,
|
ref name,
|
||||||
|
|||||||
@@ -105,7 +105,8 @@ block0(v0: i64):
|
|||||||
; nextln: add x16, x0, x17, UXTX
|
; nextln: add x16, x0, x17, UXTX
|
||||||
; nextln: subs xzr, sp, x16
|
; nextln: subs xzr, sp, x16
|
||||||
; nextln: b.hs 8 ; udf
|
; nextln: b.hs 8 ; udf
|
||||||
; nextln: ldr x16, 8 ; b 12 ; data 400000
|
; nextln: movz w16, #6784
|
||||||
|
; nextln: movk w16, #6, LSL #16
|
||||||
; nextln: sub sp, sp, x16, UXTX
|
; nextln: sub sp, sp, x16, UXTX
|
||||||
; nextln: mov sp, fp
|
; nextln: mov sp, fp
|
||||||
; nextln: ldp fp, lr, [sp], #16
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
@@ -154,7 +155,8 @@ block0(v0: i64):
|
|||||||
; nextln: add x16, x16, x17, UXTX
|
; nextln: add x16, x16, x17, UXTX
|
||||||
; nextln: subs xzr, sp, x16
|
; nextln: subs xzr, sp, x16
|
||||||
; nextln: b.hs 8 ; udf
|
; nextln: b.hs 8 ; udf
|
||||||
; nextln: ldr x16, 8 ; b 12 ; data 400000
|
; nextln: movz w16, #6784
|
||||||
|
; nextln: movk w16, #6, LSL #16
|
||||||
; nextln: sub sp, sp, x16, UXTX
|
; nextln: sub sp, sp, x16, UXTX
|
||||||
; nextln: mov sp, fp
|
; nextln: mov sp, fp
|
||||||
; nextln: ldp fp, lr, [sp], #16
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
|
|||||||
@@ -29,7 +29,8 @@ block0:
|
|||||||
|
|
||||||
; check: stp fp, lr, [sp, #-16]!
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
; nextln: mov fp, sp
|
; nextln: mov fp, sp
|
||||||
; nextln: ldr x16, 8 ; b 12 ; data 100016
|
; nextln: movz w16, #34480
|
||||||
|
; nextln: movk w16, #1, LSL #16
|
||||||
; nextln: sub sp, sp, x16, UXTX
|
; nextln: sub sp, sp, x16, UXTX
|
||||||
; nextln: mov x0, sp
|
; nextln: mov x0, sp
|
||||||
; nextln: mov sp, fp
|
; nextln: mov sp, fp
|
||||||
@@ -68,7 +69,8 @@ block0:
|
|||||||
|
|
||||||
; check: stp fp, lr, [sp, #-16]!
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
; nextln: mov fp, sp
|
; nextln: mov fp, sp
|
||||||
; nextln: ldr x16, 8 ; b 12 ; data 100016
|
; nextln: movz w16, #34480
|
||||||
|
; nextln: movk w16, #1, LSL #16
|
||||||
; nextln: sub sp, sp, x16, UXTX
|
; nextln: sub sp, sp, x16, UXTX
|
||||||
; nextln: mov x0, sp
|
; nextln: mov x0, sp
|
||||||
; nextln: ldr x0, [x0]
|
; nextln: ldr x0, [x0]
|
||||||
@@ -106,7 +108,8 @@ block0(v0: i64):
|
|||||||
|
|
||||||
; check: stp fp, lr, [sp, #-16]!
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
; nextln: mov fp, sp
|
; nextln: mov fp, sp
|
||||||
; nextln: ldr x16, 8 ; b 12 ; data 100016
|
; nextln: movz w16, #34480
|
||||||
|
; nextln: movk w16, #1, LSL #16
|
||||||
; nextln: sub sp, sp, x16, UXTX
|
; nextln: sub sp, sp, x16, UXTX
|
||||||
; nextln: mov x1, sp
|
; nextln: mov x1, sp
|
||||||
; nextln: str x0, [x1]
|
; nextln: str x0, [x1]
|
||||||
|
|||||||
Reference in New Issue
Block a user