[AArch64] Merge 32- and 64-bit FPUOp1 (#4031)

Copyright (c) 2022, Arm Limited.
This commit is contained in:
Sam Parker
2022-04-14 22:00:48 +01:00
committed by GitHub
parent 9a02320dd4
commit dd442a4d2f
7 changed files with 237 additions and 206 deletions

View File

@@ -317,6 +317,7 @@
;; 1-op FPU instruction. ;; 1-op FPU instruction.
(FpuRR (FpuRR
(fpu_op FPUOp1) (fpu_op FPUOp1)
(size ScalarSize)
(rd WritableReg) (rd WritableReg)
(rn Reg)) (rn Reg))
@@ -940,12 +941,9 @@
;; A floating-point unit (FPU) operation with one arg. ;; A floating-point unit (FPU) operation with one arg.
(type FPUOp1 (type FPUOp1
(enum (enum
(Abs32) (Abs)
(Abs64) (Neg)
(Neg32) (Sqrt)
(Neg64)
(Sqrt32)
(Sqrt64)
(Cvt32To64) (Cvt32To64)
(Cvt64To32) (Cvt64To32)
)) ))

View File

@@ -1671,19 +1671,28 @@ impl MachInstEmit for Inst {
rn, rn,
)); ));
} }
&Inst::FpuRR { fpu_op, rd, rn } => { &Inst::FpuRR {
fpu_op,
size,
rd,
rn,
} => {
let rd = allocs.next_writable(rd); let rd = allocs.next_writable(rd);
let rn = allocs.next(rn); let rn = allocs.next(rn);
let top22 = match fpu_op { let top22 = match fpu_op {
FPUOp1::Abs32 => 0b000_11110_00_1_000001_10000, FPUOp1::Abs => 0b000_11110_00_1_000001_10000,
FPUOp1::Abs64 => 0b000_11110_01_1_000001_10000, FPUOp1::Neg => 0b000_11110_00_1_000010_10000,
FPUOp1::Neg32 => 0b000_11110_00_1_000010_10000, FPUOp1::Sqrt => 0b000_11110_00_1_000011_10000,
FPUOp1::Neg64 => 0b000_11110_01_1_000010_10000, FPUOp1::Cvt32To64 => {
FPUOp1::Sqrt32 => 0b000_11110_00_1_000011_10000, debug_assert_eq!(size, ScalarSize::Size32);
FPUOp1::Sqrt64 => 0b000_11110_01_1_000011_10000, 0b000_11110_00_1_000101_10000
FPUOp1::Cvt32To64 => 0b000_11110_00_1_000101_10000, }
FPUOp1::Cvt64To32 => 0b000_11110_01_1_000100_10000, FPUOp1::Cvt64To32 => {
debug_assert_eq!(size, ScalarSize::Size64);
0b000_11110_01_1_000100_10000
}
}; };
let top22 = top22 | size.ftype() << 12;
sink.put4(enc_fpurr(top22, rd, rn)); sink.put4(enc_fpurr(top22, rd, rn));
} }
&Inst::FpuRRR { &Inst::FpuRRR {

View File

@@ -5348,7 +5348,8 @@ fn test_aarch64_binemit() {
insns.push(( insns.push((
Inst::FpuRR { Inst::FpuRR {
fpu_op: FPUOp1::Abs32, fpu_op: FPUOp1::Abs,
size: ScalarSize::Size32,
rd: writable_vreg(15), rd: writable_vreg(15),
rn: vreg(30), rn: vreg(30),
}, },
@@ -5358,7 +5359,8 @@ fn test_aarch64_binemit() {
insns.push(( insns.push((
Inst::FpuRR { Inst::FpuRR {
fpu_op: FPUOp1::Abs64, fpu_op: FPUOp1::Abs,
size: ScalarSize::Size64,
rd: writable_vreg(15), rd: writable_vreg(15),
rn: vreg(30), rn: vreg(30),
}, },
@@ -5368,7 +5370,8 @@ fn test_aarch64_binemit() {
insns.push(( insns.push((
Inst::FpuRR { Inst::FpuRR {
fpu_op: FPUOp1::Neg32, fpu_op: FPUOp1::Neg,
size: ScalarSize::Size32,
rd: writable_vreg(15), rd: writable_vreg(15),
rn: vreg(30), rn: vreg(30),
}, },
@@ -5378,7 +5381,8 @@ fn test_aarch64_binemit() {
insns.push(( insns.push((
Inst::FpuRR { Inst::FpuRR {
fpu_op: FPUOp1::Neg64, fpu_op: FPUOp1::Neg,
size: ScalarSize::Size64,
rd: writable_vreg(15), rd: writable_vreg(15),
rn: vreg(30), rn: vreg(30),
}, },
@@ -5388,7 +5392,8 @@ fn test_aarch64_binemit() {
insns.push(( insns.push((
Inst::FpuRR { Inst::FpuRR {
fpu_op: FPUOp1::Sqrt32, fpu_op: FPUOp1::Sqrt,
size: ScalarSize::Size32,
rd: writable_vreg(15), rd: writable_vreg(15),
rn: vreg(30), rn: vreg(30),
}, },
@@ -5398,7 +5403,8 @@ fn test_aarch64_binemit() {
insns.push(( insns.push((
Inst::FpuRR { Inst::FpuRR {
fpu_op: FPUOp1::Sqrt64, fpu_op: FPUOp1::Sqrt,
size: ScalarSize::Size64,
rd: writable_vreg(15), rd: writable_vreg(15),
rn: vreg(30), rn: vreg(30),
}, },
@@ -5409,6 +5415,7 @@ fn test_aarch64_binemit() {
insns.push(( insns.push((
Inst::FpuRR { Inst::FpuRR {
fpu_op: FPUOp1::Cvt32To64, fpu_op: FPUOp1::Cvt32To64,
size: ScalarSize::Size32,
rd: writable_vreg(15), rd: writable_vreg(15),
rn: vreg(30), rn: vreg(30),
}, },
@@ -5419,6 +5426,7 @@ fn test_aarch64_binemit() {
insns.push(( insns.push((
Inst::FpuRR { Inst::FpuRR {
fpu_op: FPUOp1::Cvt64To32, fpu_op: FPUOp1::Cvt64To32,
size: ScalarSize::Size64,
rd: writable_vreg(15), rd: writable_vreg(15),
rn: vreg(30), rn: vreg(30),
}, },

View File

@@ -1675,19 +1675,25 @@ impl Inst {
let rn = pretty_print_vreg_scalar(rn, size, allocs); let rn = pretty_print_vreg_scalar(rn, size, allocs);
format!("fmov {}, {}", rd, rn) format!("fmov {}, {}", rd, rn)
} }
&Inst::FpuRR { fpu_op, rd, rn } => { &Inst::FpuRR {
let (op, sizesrc, sizedest) = match fpu_op { fpu_op,
FPUOp1::Abs32 => ("fabs", ScalarSize::Size32, ScalarSize::Size32), size,
FPUOp1::Abs64 => ("fabs", ScalarSize::Size64, ScalarSize::Size64), rd,
FPUOp1::Neg32 => ("fneg", ScalarSize::Size32, ScalarSize::Size32), rn,
FPUOp1::Neg64 => ("fneg", ScalarSize::Size64, ScalarSize::Size64), } => {
FPUOp1::Sqrt32 => ("fsqrt", ScalarSize::Size32, ScalarSize::Size32), let op = match fpu_op {
FPUOp1::Sqrt64 => ("fsqrt", ScalarSize::Size64, ScalarSize::Size64), FPUOp1::Abs => "fabs",
FPUOp1::Cvt32To64 => ("fcvt", ScalarSize::Size32, ScalarSize::Size64), FPUOp1::Neg => "fneg",
FPUOp1::Cvt64To32 => ("fcvt", ScalarSize::Size64, ScalarSize::Size32), FPUOp1::Sqrt => "fsqrt",
FPUOp1::Cvt32To64 | FPUOp1::Cvt64To32 => "fcvt",
}; };
let rd = pretty_print_vreg_scalar(rd.to_reg(), sizedest, allocs); let dst_size = match fpu_op {
let rn = pretty_print_vreg_scalar(rn, sizesrc, allocs); FPUOp1::Cvt32To64 => ScalarSize::Size64,
FPUOp1::Cvt64To32 => ScalarSize::Size32,
_ => size,
};
let rd = pretty_print_vreg_scalar(rd.to_reg(), dst_size, allocs);
let rn = pretty_print_vreg_scalar(rn, size, allocs);
format!("{} {}, {}", op, rd, rn) format!("{} {}, {}", op, rd, rn)
} }
&Inst::FpuRRR { &Inst::FpuRRR {

View File

@@ -1,4 +1,4 @@
src/clif.isle 443b34b797fc8ace src/clif.isle 443b34b797fc8ace
src/prelude.isle afd037c4d91c875c src/prelude.isle afd037c4d91c875c
src/isa/aarch64/inst.isle 54184fdac4e4ca23 src/isa/aarch64/inst.isle 950bb0092242218e
src/isa/aarch64/lower.isle 71c7e603b0e4bdef src/isa/aarch64/lower.isle 71c7e603b0e4bdef

File diff suppressed because it is too large Load Diff

View File

@@ -1694,27 +1694,39 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
Opcode::Sqrt | Opcode::Fneg | Opcode::Fabs | Opcode::Fpromote | Opcode::Fdemote => { Opcode::Sqrt | Opcode::Fneg | Opcode::Fabs | Opcode::Fpromote | Opcode::Fdemote => {
let ty = ty.unwrap(); let ty = ty.unwrap();
let bits = ty_bits(ty);
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
if !ty.is_vector() { if !ty.is_vector() {
let fpu_op = match (op, bits) { let fpu_op = match op {
(Opcode::Sqrt, 32) => FPUOp1::Sqrt32, Opcode::Sqrt => FPUOp1::Sqrt,
(Opcode::Sqrt, 64) => FPUOp1::Sqrt64, Opcode::Fneg => FPUOp1::Neg,
(Opcode::Fneg, 32) => FPUOp1::Neg32, Opcode::Fabs => FPUOp1::Abs,
(Opcode::Fneg, 64) => FPUOp1::Neg64, Opcode::Fpromote => {
(Opcode::Fabs, 32) => FPUOp1::Abs32, if ty != F64 {
(Opcode::Fabs, 64) => FPUOp1::Abs64,
(Opcode::Fpromote, 64) => FPUOp1::Cvt32To64,
(Opcode::Fdemote, 32) => FPUOp1::Cvt64To32,
_ => {
return Err(CodegenError::Unsupported(format!( return Err(CodegenError::Unsupported(format!(
"{}: Unsupported type: {:?}", "Fpromote: Unsupported type: {:?}",
op, ty ty
))) )));
} }
FPUOp1::Cvt32To64
}
Opcode::Fdemote => {
if ty != F32 {
return Err(CodegenError::Unsupported(format!(
"Fdemote: Unsupported type: {:?}",
ty
)));
}
FPUOp1::Cvt64To32
}
_ => unreachable!(),
}; };
ctx.emit(Inst::FpuRR { fpu_op, rd, rn }); ctx.emit(Inst::FpuRR {
fpu_op,
size: ScalarSize::from_ty(ctx.input_ty(insn, 0)),
rd,
rn,
});
} else { } else {
let op = match op { let op = match op {
Opcode::Fabs => VecMisc2::Fabs, Opcode::Fabs => VecMisc2::Fabs,