arm64: Use FPU instrctions for Fcopysign
Copyright (c) 2020, Arm Limited.
This commit is contained in:
@@ -85,12 +85,12 @@ pub fn u64_constant(bits: u64) -> ConstantData {
|
|||||||
// Instructions and subcomponents: emission
|
// Instructions and subcomponents: emission
|
||||||
|
|
||||||
fn machreg_to_gpr(m: Reg) -> u32 {
|
fn machreg_to_gpr(m: Reg) -> u32 {
|
||||||
assert!(m.get_class() == RegClass::I64);
|
assert_eq!(m.get_class(), RegClass::I64);
|
||||||
u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
|
u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn machreg_to_vec(m: Reg) -> u32 {
|
fn machreg_to_vec(m: Reg) -> u32 {
|
||||||
assert!(m.get_class() == RegClass::V128);
|
assert_eq!(m.get_class(), RegClass::V128);
|
||||||
u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
|
u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -948,6 +948,44 @@ impl MachInstEmit for Inst {
|
|||||||
};
|
};
|
||||||
sink.put4(enc_fpurrr(top22, rd, rn, rm));
|
sink.put4(enc_fpurrr(top22, rd, rn, rm));
|
||||||
}
|
}
|
||||||
|
&Inst::FpuRRI { fpu_op, rd, rn } => match fpu_op {
|
||||||
|
FPUOpRI::UShr32(imm) => {
|
||||||
|
debug_assert_eq!(32, imm.lane_size_in_bits);
|
||||||
|
sink.put4(
|
||||||
|
0b0_0_1_011110_0000000_00_0_0_0_1_00000_00000
|
||||||
|
| imm.enc() << 16
|
||||||
|
| machreg_to_vec(rn) << 5
|
||||||
|
| machreg_to_vec(rd.to_reg()),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
FPUOpRI::UShr64(imm) => {
|
||||||
|
debug_assert_eq!(64, imm.lane_size_in_bits);
|
||||||
|
sink.put4(
|
||||||
|
0b01_1_111110_0000000_00_0_0_0_1_00000_00000
|
||||||
|
| imm.enc() << 16
|
||||||
|
| machreg_to_vec(rn) << 5
|
||||||
|
| machreg_to_vec(rd.to_reg()),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
FPUOpRI::Sli64(imm) => {
|
||||||
|
debug_assert_eq!(64, imm.lane_size_in_bits);
|
||||||
|
sink.put4(
|
||||||
|
0b01_1_111110_0000000_010101_00000_00000
|
||||||
|
| imm.enc() << 16
|
||||||
|
| machreg_to_vec(rn) << 5
|
||||||
|
| machreg_to_vec(rd.to_reg()),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
FPUOpRI::Sli32(imm) => {
|
||||||
|
debug_assert_eq!(32, imm.lane_size_in_bits);
|
||||||
|
sink.put4(
|
||||||
|
0b0_0_1_011110_0000000_010101_00000_00000
|
||||||
|
| imm.enc() << 16
|
||||||
|
| machreg_to_vec(rn) << 5
|
||||||
|
| machreg_to_vec(rd.to_reg()),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
},
|
||||||
&Inst::FpuRRRR {
|
&Inst::FpuRRRR {
|
||||||
fpu_op,
|
fpu_op,
|
||||||
rd,
|
rd,
|
||||||
|
|||||||
@@ -2400,6 +2400,46 @@ fn test_aarch64_binemit() {
|
|||||||
"fmadd d15, d30, d31, d1",
|
"fmadd d15, d30, d31, d1",
|
||||||
));
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::FpuRRI {
|
||||||
|
fpu_op: FPUOpRI::UShr32(FPURightShiftImm::maybe_from_u8(32, 32).unwrap()),
|
||||||
|
rd: writable_vreg(2),
|
||||||
|
rn: vreg(5),
|
||||||
|
},
|
||||||
|
"A204202F",
|
||||||
|
"ushr v2.2s, v5.2s, #32",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::FpuRRI {
|
||||||
|
fpu_op: FPUOpRI::UShr64(FPURightShiftImm::maybe_from_u8(63, 64).unwrap()),
|
||||||
|
rd: writable_vreg(2),
|
||||||
|
rn: vreg(5),
|
||||||
|
},
|
||||||
|
"A204417F",
|
||||||
|
"ushr d2, d5, #63",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::FpuRRI {
|
||||||
|
fpu_op: FPUOpRI::Sli32(FPULeftShiftImm::maybe_from_u8(31, 32).unwrap()),
|
||||||
|
rd: writable_vreg(4),
|
||||||
|
rn: vreg(10),
|
||||||
|
},
|
||||||
|
"44553F2F",
|
||||||
|
"sli v4.2s, v10.2s, #31",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::FpuRRI {
|
||||||
|
fpu_op: FPUOpRI::Sli64(FPULeftShiftImm::maybe_from_u8(63, 64).unwrap()),
|
||||||
|
rd: writable_vreg(4),
|
||||||
|
rn: vreg(10),
|
||||||
|
},
|
||||||
|
"44557F7F",
|
||||||
|
"sli d4, d10, #63",
|
||||||
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::FpuToInt {
|
Inst::FpuToInt {
|
||||||
op: FpuToIntOp::F32ToU32,
|
op: FpuToIntOp::F32ToU32,
|
||||||
|
|||||||
@@ -106,6 +106,85 @@ impl SImm7Scaled {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Debug)]
|
||||||
|
pub struct FPULeftShiftImm {
|
||||||
|
pub amount: u8,
|
||||||
|
pub lane_size_in_bits: u8,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FPULeftShiftImm {
|
||||||
|
pub fn maybe_from_u8(amount: u8, lane_size_in_bits: u8) -> Option<Self> {
|
||||||
|
debug_assert!(lane_size_in_bits == 32 || lane_size_in_bits == 64);
|
||||||
|
if amount < lane_size_in_bits {
|
||||||
|
Some(Self {
|
||||||
|
amount,
|
||||||
|
lane_size_in_bits,
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn enc(&self) -> u32 {
|
||||||
|
debug_assert!(self.lane_size_in_bits.is_power_of_two());
|
||||||
|
debug_assert!(self.lane_size_in_bits > self.amount);
|
||||||
|
// The encoding of the immediate follows the table below,
|
||||||
|
// where xs encode the shift amount.
|
||||||
|
//
|
||||||
|
// | lane_size_in_bits | encoding |
|
||||||
|
// +------------------------------+
|
||||||
|
// | 8 | 0001xxx |
|
||||||
|
// | 16 | 001xxxx |
|
||||||
|
// | 32 | 01xxxxx |
|
||||||
|
// | 64 | 1xxxxxx |
|
||||||
|
//
|
||||||
|
// The highest one bit is represented by `lane_size_in_bits`. Since
|
||||||
|
// `lane_size_in_bits` is a power of 2 and `amount` is less
|
||||||
|
// than `lane_size_in_bits`, they can be ORed
|
||||||
|
// together to produced the encoded value.
|
||||||
|
u32::from(self.lane_size_in_bits | self.amount)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Debug)]
|
||||||
|
pub struct FPURightShiftImm {
|
||||||
|
pub amount: u8,
|
||||||
|
pub lane_size_in_bits: u8,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FPURightShiftImm {
|
||||||
|
pub fn maybe_from_u8(amount: u8, lane_size_in_bits: u8) -> Option<Self> {
|
||||||
|
debug_assert!(lane_size_in_bits == 32 || lane_size_in_bits == 64);
|
||||||
|
if amount > 0 && amount <= lane_size_in_bits {
|
||||||
|
Some(Self {
|
||||||
|
amount,
|
||||||
|
lane_size_in_bits,
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn enc(&self) -> u32 {
|
||||||
|
debug_assert_ne!(0, self.amount);
|
||||||
|
// The encoding of the immediate follows the table below,
|
||||||
|
// where xs encodes the negated shift amount.
|
||||||
|
//
|
||||||
|
// | lane_size_in_bits | encoding |
|
||||||
|
// +------------------------------+
|
||||||
|
// | 8 | 0001xxx |
|
||||||
|
// | 16 | 001xxxx |
|
||||||
|
// | 32 | 01xxxxx |
|
||||||
|
// | 64 | 1xxxxxx |
|
||||||
|
//
|
||||||
|
// The shift amount is negated such that a shift ammount
|
||||||
|
// of 1 (in 64-bit) is encoded as 0b111111 and a shift
|
||||||
|
// amount of 64 is encoded as 0b000000,
|
||||||
|
// in the bottom 6 bits.
|
||||||
|
u32::from((self.lane_size_in_bits * 2) - self.amount)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// a 9-bit signed offset.
|
/// a 9-bit signed offset.
|
||||||
#[derive(Clone, Copy, Debug)]
|
#[derive(Clone, Copy, Debug)]
|
||||||
pub struct SImm9 {
|
pub struct SImm9 {
|
||||||
@@ -576,6 +655,18 @@ impl ShowWithRRU for SImm7Scaled {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl ShowWithRRU for FPULeftShiftImm {
|
||||||
|
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
|
||||||
|
format!("#{}", self.amount)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ShowWithRRU for FPURightShiftImm {
|
||||||
|
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
|
||||||
|
format!("#{}", self.amount)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl ShowWithRRU for SImm9 {
|
impl ShowWithRRU for SImm9 {
|
||||||
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
|
fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String {
|
||||||
format!("#{}", self.value)
|
format!("#{}", self.value)
|
||||||
|
|||||||
@@ -4,7 +4,7 @@
|
|||||||
#![allow(dead_code)]
|
#![allow(dead_code)]
|
||||||
|
|
||||||
use crate::binemit::CodeOffset;
|
use crate::binemit::CodeOffset;
|
||||||
use crate::ir::types::{B1, B16, B32, B64, B8, F32, F64, FFLAGS, I16, I32, I64, I8, IFLAGS};
|
use crate::ir::types::{B1, B16, B32, B64, B8, F32, F32X2, F64, FFLAGS, I16, I32, I64, I8, IFLAGS};
|
||||||
use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode, Type};
|
use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode, Type};
|
||||||
use crate::machinst::*;
|
use crate::machinst::*;
|
||||||
use crate::{settings, CodegenError, CodegenResult};
|
use crate::{settings, CodegenError, CodegenResult};
|
||||||
@@ -124,6 +124,19 @@ pub enum FPUOp2 {
|
|||||||
Min64,
|
Min64,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A floating-point unit (FPU) operation with two args, a register and an immediate.
|
||||||
|
#[derive(Copy, Clone, Debug)]
|
||||||
|
pub enum FPUOpRI {
|
||||||
|
/// Unsigned right shift. Rd = Rn << #imm
|
||||||
|
UShr32(FPURightShiftImm),
|
||||||
|
/// Unsigned right shift. Rd = Rn << #imm
|
||||||
|
UShr64(FPURightShiftImm),
|
||||||
|
/// Shift left and insert. Rd |= Rn << #imm
|
||||||
|
Sli32(FPULeftShiftImm),
|
||||||
|
/// Shift left and insert. Rd |= Rn << #imm
|
||||||
|
Sli64(FPULeftShiftImm),
|
||||||
|
}
|
||||||
|
|
||||||
/// A floating-point unit (FPU) operation with three args.
|
/// A floating-point unit (FPU) operation with three args.
|
||||||
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
|
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
|
||||||
pub enum FPUOp3 {
|
pub enum FPUOp3 {
|
||||||
@@ -472,6 +485,12 @@ pub enum Inst {
|
|||||||
rm: Reg,
|
rm: Reg,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
FpuRRI {
|
||||||
|
fpu_op: FPUOpRI,
|
||||||
|
rd: Writable<Reg>,
|
||||||
|
rn: Reg,
|
||||||
|
},
|
||||||
|
|
||||||
/// 3-op FPU instruction.
|
/// 3-op FPU instruction.
|
||||||
FpuRRRR {
|
FpuRRRR {
|
||||||
fpu_op: FPUOp3,
|
fpu_op: FPUOp3,
|
||||||
@@ -1034,6 +1053,13 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
|||||||
collector.add_use(rn);
|
collector.add_use(rn);
|
||||||
collector.add_use(rm);
|
collector.add_use(rm);
|
||||||
}
|
}
|
||||||
|
&Inst::FpuRRI { fpu_op, rd, rn, .. } => {
|
||||||
|
match fpu_op {
|
||||||
|
FPUOpRI::UShr32(..) | FPUOpRI::UShr64(..) => collector.add_def(rd),
|
||||||
|
FPUOpRI::Sli32(..) | FPUOpRI::Sli64(..) => collector.add_mod(rd),
|
||||||
|
}
|
||||||
|
collector.add_use(rn);
|
||||||
|
}
|
||||||
&Inst::FpuRRRR { rd, rn, rm, ra, .. } => {
|
&Inst::FpuRRRR { rd, rn, rm, ra, .. } => {
|
||||||
collector.add_def(rd);
|
collector.add_def(rd);
|
||||||
collector.add_use(rn);
|
collector.add_use(rn);
|
||||||
@@ -1482,6 +1508,14 @@ fn aarch64_map_regs(inst: &mut Inst, mapper: &RegUsageMapper) {
|
|||||||
map_use(mapper, rn);
|
map_use(mapper, rn);
|
||||||
map_use(mapper, rm);
|
map_use(mapper, rm);
|
||||||
}
|
}
|
||||||
|
&mut Inst::FpuRRI {
|
||||||
|
ref mut rd,
|
||||||
|
ref mut rn,
|
||||||
|
..
|
||||||
|
} => {
|
||||||
|
map_def(mapper, rd);
|
||||||
|
map_use(mapper, rn);
|
||||||
|
}
|
||||||
&mut Inst::FpuRRRR {
|
&mut Inst::FpuRRRR {
|
||||||
ref mut rd,
|
ref mut rd,
|
||||||
ref mut rn,
|
ref mut rn,
|
||||||
@@ -2236,6 +2270,23 @@ impl ShowWithRRU for Inst {
|
|||||||
let rm = show_freg_sized(rm, mb_rru, size);
|
let rm = show_freg_sized(rm, mb_rru, size);
|
||||||
format!("{} {}, {}, {}", op, rd, rn, rm)
|
format!("{} {}, {}, {}", op, rd, rn, rm)
|
||||||
}
|
}
|
||||||
|
&Inst::FpuRRI { fpu_op, rd, rn } => {
|
||||||
|
let (op, imm, vector) = match fpu_op {
|
||||||
|
FPUOpRI::UShr32(imm) => ("ushr", imm.show_rru(mb_rru), true),
|
||||||
|
FPUOpRI::UShr64(imm) => ("ushr", imm.show_rru(mb_rru), false),
|
||||||
|
FPUOpRI::Sli32(imm) => ("sli", imm.show_rru(mb_rru), true),
|
||||||
|
FPUOpRI::Sli64(imm) => ("sli", imm.show_rru(mb_rru), false),
|
||||||
|
};
|
||||||
|
|
||||||
|
let show_vreg_fn: fn(Reg, Option<&RealRegUniverse>) -> String = if vector {
|
||||||
|
|reg, mb_rru| show_vreg_vector(reg, mb_rru, F32X2)
|
||||||
|
} else {
|
||||||
|
show_vreg_scalar
|
||||||
|
};
|
||||||
|
let rd = show_vreg_fn(rd.to_reg(), mb_rru);
|
||||||
|
let rn = show_vreg_fn(rn, mb_rru);
|
||||||
|
format!("{} {}, {}, {}", op, rd, rn, imm)
|
||||||
|
}
|
||||||
&Inst::FpuRRRR {
|
&Inst::FpuRRRR {
|
||||||
fpu_op,
|
fpu_op,
|
||||||
rd,
|
rd,
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
//! AArch64 ISA definitions: registers.
|
//! AArch64 ISA definitions: registers.
|
||||||
|
|
||||||
|
use crate::ir::types::*;
|
||||||
use crate::isa::aarch64::inst::InstSize;
|
use crate::isa::aarch64::inst::InstSize;
|
||||||
use crate::machinst::*;
|
use crate::machinst::*;
|
||||||
use crate::settings;
|
use crate::settings;
|
||||||
@@ -307,3 +308,16 @@ pub fn show_vreg_scalar(reg: Reg, mb_rru: Option<&RealRegUniverse>) -> String {
|
|||||||
}
|
}
|
||||||
s
|
s
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Show a vector register.
|
||||||
|
pub fn show_vreg_vector(reg: Reg, mb_rru: Option<&RealRegUniverse>, ty: Type) -> String {
|
||||||
|
assert_eq!(RegClass::V128, reg.get_class());
|
||||||
|
let mut s = reg.show_rru(mb_rru);
|
||||||
|
|
||||||
|
match ty {
|
||||||
|
F32X2 => s.push_str(".2s"),
|
||||||
|
_ => unimplemented!(),
|
||||||
|
}
|
||||||
|
|
||||||
|
s
|
||||||
|
}
|
||||||
|
|||||||
@@ -1460,54 +1460,38 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
Opcode::Fcopysign => {
|
Opcode::Fcopysign => {
|
||||||
// Copy the sign bit from inputs[1] to inputs[0]. We use the following sequence:
|
// Copy the sign bit from inputs[1] to inputs[0]. We use the following sequence:
|
||||||
//
|
//
|
||||||
// (64 bits for example, 32-bit sequence is analogous):
|
// This is a scalar Fcopysign.
|
||||||
|
// This uses scalar NEON operations for 64-bit and vector operations (2S) for 32-bit.
|
||||||
//
|
//
|
||||||
// MOV Xtmp1, Dinput0
|
// mov vd, vn
|
||||||
// MOV Xtmp2, Dinput1
|
// ushr vtmp, vm, #63 / #31
|
||||||
// AND Xtmp2, 0x8000_0000_0000_0000
|
// sli vd, vtmp, #63 / #31
|
||||||
// BIC Xtmp1, 0x8000_0000_0000_0000
|
|
||||||
// ORR Xtmp1, Xtmp1, Xtmp2
|
|
||||||
// MOV Doutput, Xtmp1
|
|
||||||
|
|
||||||
let ty = ctx.output_ty(insn, 0);
|
let ty = ctx.output_ty(insn, 0);
|
||||||
let bits = ty_bits(ty);
|
let bits = ty_bits(ty) as u8;
|
||||||
assert!(bits == 32 || bits == 64);
|
assert!(bits == 32 || bits == 64);
|
||||||
let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
|
let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||||
let rm = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
|
let rm = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||||
let rd = output_to_reg(ctx, outputs[0]);
|
let rd = output_to_reg(ctx, outputs[0]);
|
||||||
let tmp1 = ctx.alloc_tmp(RegClass::I64, I64);
|
let tmp = ctx.alloc_tmp(RegClass::V128, F64);
|
||||||
let tmp2 = ctx.alloc_tmp(RegClass::I64, I64);
|
|
||||||
ctx.emit(Inst::MovFromVec64 { rd: tmp1, rn: rn });
|
// Copy LHS to rd.
|
||||||
ctx.emit(Inst::MovFromVec64 { rd: tmp2, rn: rm });
|
ctx.emit(Inst::FpuMove64 { rd, rn });
|
||||||
let imml = if bits == 32 {
|
|
||||||
ImmLogic::maybe_from_u64(0x8000_0000, I32).unwrap()
|
// Copy the sign bit to the lowest bit in tmp.
|
||||||
} else {
|
let imm = FPURightShiftImm::maybe_from_u8(bits - 1, bits).unwrap();
|
||||||
ImmLogic::maybe_from_u64(0x8000_0000_0000_0000, I64).unwrap()
|
ctx.emit(Inst::FpuRRI {
|
||||||
};
|
fpu_op: choose_32_64(ty, FPUOpRI::UShr32(imm), FPUOpRI::UShr64(imm)),
|
||||||
let alu_op = choose_32_64(ty, ALUOp::And32, ALUOp::And64);
|
rd: tmp,
|
||||||
ctx.emit(Inst::AluRRImmLogic {
|
rn: rm,
|
||||||
alu_op,
|
|
||||||
rd: tmp2,
|
|
||||||
rn: tmp2.to_reg(),
|
|
||||||
imml: imml.clone(),
|
|
||||||
});
|
});
|
||||||
let alu_op = choose_32_64(ty, ALUOp::AndNot32, ALUOp::AndNot64);
|
|
||||||
ctx.emit(Inst::AluRRImmLogic {
|
// Insert the bit from tmp into the sign bit of rd.
|
||||||
alu_op,
|
let imm = FPULeftShiftImm::maybe_from_u8(bits - 1, bits).unwrap();
|
||||||
rd: tmp1,
|
ctx.emit(Inst::FpuRRI {
|
||||||
rn: tmp1.to_reg(),
|
fpu_op: choose_32_64(ty, FPUOpRI::Sli32(imm), FPUOpRI::Sli64(imm)),
|
||||||
imml,
|
|
||||||
});
|
|
||||||
let alu_op = choose_32_64(ty, ALUOp::Orr32, ALUOp::Orr64);
|
|
||||||
ctx.emit(Inst::AluRRR {
|
|
||||||
alu_op,
|
|
||||||
rd: tmp1,
|
|
||||||
rn: tmp1.to_reg(),
|
|
||||||
rm: tmp2.to_reg(),
|
|
||||||
});
|
|
||||||
ctx.emit(Inst::MovToVec64 {
|
|
||||||
rd,
|
rd,
|
||||||
rn: tmp1.to_reg(),
|
rn: tmp.to_reg(),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -397,12 +397,8 @@ block0(v0: f32, v1: f32):
|
|||||||
|
|
||||||
; check: stp fp, lr, [sp, #-16]!
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
; nextln: mov fp, sp
|
; nextln: mov fp, sp
|
||||||
; nextln: mov x0, v0.d[0]
|
; nextln: ushr v1.2s, v1.2s, #31
|
||||||
; nextln: mov x1, v1.d[0]
|
; nextln: sli v0.2s, v1.2s, #31
|
||||||
; nextln: and w1, w1, #2147483648
|
|
||||||
; nextln: bic w0, w0, #2147483648
|
|
||||||
; nextln: orr w0, w0, w1
|
|
||||||
; nextln: mov v0.d[0], x0
|
|
||||||
; nextln: mov sp, fp
|
; nextln: mov sp, fp
|
||||||
; nextln: ldp fp, lr, [sp], #16
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
; nextln: ret
|
; nextln: ret
|
||||||
@@ -415,12 +411,8 @@ block0(v0: f64, v1: f64):
|
|||||||
|
|
||||||
; check: stp fp, lr, [sp, #-16]!
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
; nextln: mov fp, sp
|
; nextln: mov fp, sp
|
||||||
; nextln: mov x0, v0.d[0]
|
; nextln: ushr d1, d1, #63
|
||||||
; nextln: mov x1, v1.d[0]
|
; nextln: sli d0, d1, #63
|
||||||
; nextln: and x1, x1, #9223372036854775808
|
|
||||||
; nextln: bic x0, x0, #9223372036854775808
|
|
||||||
; nextln: orr x0, x0, x1
|
|
||||||
; nextln: mov v0.d[0], x0
|
|
||||||
; nextln: mov sp, fp
|
; nextln: mov sp, fp
|
||||||
; nextln: ldp fp, lr, [sp], #16
|
; nextln: ldp fp, lr, [sp], #16
|
||||||
; nextln: ret
|
; nextln: ret
|
||||||
|
|||||||
Reference in New Issue
Block a user