[AArch64] Merge 32- and 64-bit FPUOp2 (#4029)
And remove the unused saturating add/sub opcodes. Copyright (c) 2022, Arm Limited.
This commit is contained in:
@@ -323,6 +323,7 @@
|
|||||||
;; 2-op FPU instruction.
|
;; 2-op FPU instruction.
|
||||||
(FpuRRR
|
(FpuRRR
|
||||||
(fpu_op FPUOp2)
|
(fpu_op FPUOp2)
|
||||||
|
(size ScalarSize)
|
||||||
(rd WritableReg)
|
(rd WritableReg)
|
||||||
(rn Reg)
|
(rn Reg)
|
||||||
(rm Reg))
|
(rm Reg))
|
||||||
@@ -952,26 +953,12 @@
|
|||||||
;; A floating-point unit (FPU) operation with two args.
|
;; A floating-point unit (FPU) operation with two args.
|
||||||
(type FPUOp2
|
(type FPUOp2
|
||||||
(enum
|
(enum
|
||||||
(Add32)
|
(Add)
|
||||||
(Add64)
|
(Sub)
|
||||||
(Sub32)
|
(Mul)
|
||||||
(Sub64)
|
(Div)
|
||||||
(Mul32)
|
(Max)
|
||||||
(Mul64)
|
(Min)
|
||||||
(Div32)
|
|
||||||
(Div64)
|
|
||||||
(Max32)
|
|
||||||
(Max64)
|
|
||||||
(Min32)
|
|
||||||
(Min64)
|
|
||||||
;; Signed saturating add
|
|
||||||
(Sqadd64)
|
|
||||||
;; Unsigned saturating add
|
|
||||||
(Uqadd64)
|
|
||||||
;; Signed saturating subtract
|
|
||||||
(Sqsub64)
|
|
||||||
;; Unsigned saturating subtract
|
|
||||||
(Uqsub64)
|
|
||||||
))
|
))
|
||||||
|
|
||||||
;; A floating-point unit (FPU) operation with three args.
|
;; A floating-point unit (FPU) operation with three args.
|
||||||
|
|||||||
@@ -1686,28 +1686,25 @@ impl MachInstEmit for Inst {
|
|||||||
};
|
};
|
||||||
sink.put4(enc_fpurr(top22, rd, rn));
|
sink.put4(enc_fpurr(top22, rd, rn));
|
||||||
}
|
}
|
||||||
&Inst::FpuRRR { fpu_op, rd, rn, rm } => {
|
&Inst::FpuRRR {
|
||||||
|
fpu_op,
|
||||||
|
size,
|
||||||
|
rd,
|
||||||
|
rn,
|
||||||
|
rm,
|
||||||
|
} => {
|
||||||
let rd = allocs.next_writable(rd);
|
let rd = allocs.next_writable(rd);
|
||||||
let rn = allocs.next(rn);
|
let rn = allocs.next(rn);
|
||||||
let rm = allocs.next(rm);
|
let rm = allocs.next(rm);
|
||||||
let top22 = match fpu_op {
|
let top22 = match fpu_op {
|
||||||
FPUOp2::Add32 => 0b000_11110_00_1_00000_001010,
|
FPUOp2::Add => 0b000_11110_00_1_00000_001010,
|
||||||
FPUOp2::Add64 => 0b000_11110_01_1_00000_001010,
|
FPUOp2::Sub => 0b000_11110_00_1_00000_001110,
|
||||||
FPUOp2::Sub32 => 0b000_11110_00_1_00000_001110,
|
FPUOp2::Mul => 0b000_11110_00_1_00000_000010,
|
||||||
FPUOp2::Sub64 => 0b000_11110_01_1_00000_001110,
|
FPUOp2::Div => 0b000_11110_00_1_00000_000110,
|
||||||
FPUOp2::Mul32 => 0b000_11110_00_1_00000_000010,
|
FPUOp2::Max => 0b000_11110_00_1_00000_010010,
|
||||||
FPUOp2::Mul64 => 0b000_11110_01_1_00000_000010,
|
FPUOp2::Min => 0b000_11110_00_1_00000_010110,
|
||||||
FPUOp2::Div32 => 0b000_11110_00_1_00000_000110,
|
|
||||||
FPUOp2::Div64 => 0b000_11110_01_1_00000_000110,
|
|
||||||
FPUOp2::Max32 => 0b000_11110_00_1_00000_010010,
|
|
||||||
FPUOp2::Max64 => 0b000_11110_01_1_00000_010010,
|
|
||||||
FPUOp2::Min32 => 0b000_11110_00_1_00000_010110,
|
|
||||||
FPUOp2::Min64 => 0b000_11110_01_1_00000_010110,
|
|
||||||
FPUOp2::Sqadd64 => 0b010_11110_11_1_00000_000011,
|
|
||||||
FPUOp2::Uqadd64 => 0b011_11110_11_1_00000_000011,
|
|
||||||
FPUOp2::Sqsub64 => 0b010_11110_11_1_00000_001011,
|
|
||||||
FPUOp2::Uqsub64 => 0b011_11110_11_1_00000_001011,
|
|
||||||
};
|
};
|
||||||
|
let top22 = top22 | size.ftype() << 12;
|
||||||
sink.put4(enc_fpurrr(top22, rd, rn, rm));
|
sink.put4(enc_fpurrr(top22, rd, rn, rm));
|
||||||
}
|
}
|
||||||
&Inst::FpuRRI { fpu_op, rd, rn } => {
|
&Inst::FpuRRI { fpu_op, rd, rn } => {
|
||||||
|
|||||||
@@ -5428,7 +5428,8 @@ fn test_aarch64_binemit() {
|
|||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::FpuRRR {
|
Inst::FpuRRR {
|
||||||
fpu_op: FPUOp2::Add32,
|
fpu_op: FPUOp2::Add,
|
||||||
|
size: ScalarSize::Size32,
|
||||||
rd: writable_vreg(15),
|
rd: writable_vreg(15),
|
||||||
rn: vreg(30),
|
rn: vreg(30),
|
||||||
rm: vreg(31),
|
rm: vreg(31),
|
||||||
@@ -5439,7 +5440,8 @@ fn test_aarch64_binemit() {
|
|||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::FpuRRR {
|
Inst::FpuRRR {
|
||||||
fpu_op: FPUOp2::Add64,
|
fpu_op: FPUOp2::Add,
|
||||||
|
size: ScalarSize::Size64,
|
||||||
rd: writable_vreg(15),
|
rd: writable_vreg(15),
|
||||||
rn: vreg(30),
|
rn: vreg(30),
|
||||||
rm: vreg(31),
|
rm: vreg(31),
|
||||||
@@ -5450,7 +5452,8 @@ fn test_aarch64_binemit() {
|
|||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::FpuRRR {
|
Inst::FpuRRR {
|
||||||
fpu_op: FPUOp2::Sub32,
|
fpu_op: FPUOp2::Sub,
|
||||||
|
size: ScalarSize::Size32,
|
||||||
rd: writable_vreg(15),
|
rd: writable_vreg(15),
|
||||||
rn: vreg(30),
|
rn: vreg(30),
|
||||||
rm: vreg(31),
|
rm: vreg(31),
|
||||||
@@ -5461,7 +5464,8 @@ fn test_aarch64_binemit() {
|
|||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::FpuRRR {
|
Inst::FpuRRR {
|
||||||
fpu_op: FPUOp2::Sub64,
|
fpu_op: FPUOp2::Sub,
|
||||||
|
size: ScalarSize::Size64,
|
||||||
rd: writable_vreg(15),
|
rd: writable_vreg(15),
|
||||||
rn: vreg(30),
|
rn: vreg(30),
|
||||||
rm: vreg(31),
|
rm: vreg(31),
|
||||||
@@ -5472,7 +5476,8 @@ fn test_aarch64_binemit() {
|
|||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::FpuRRR {
|
Inst::FpuRRR {
|
||||||
fpu_op: FPUOp2::Mul32,
|
fpu_op: FPUOp2::Mul,
|
||||||
|
size: ScalarSize::Size32,
|
||||||
rd: writable_vreg(15),
|
rd: writable_vreg(15),
|
||||||
rn: vreg(30),
|
rn: vreg(30),
|
||||||
rm: vreg(31),
|
rm: vreg(31),
|
||||||
@@ -5483,7 +5488,8 @@ fn test_aarch64_binemit() {
|
|||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::FpuRRR {
|
Inst::FpuRRR {
|
||||||
fpu_op: FPUOp2::Mul64,
|
fpu_op: FPUOp2::Mul,
|
||||||
|
size: ScalarSize::Size64,
|
||||||
rd: writable_vreg(15),
|
rd: writable_vreg(15),
|
||||||
rn: vreg(30),
|
rn: vreg(30),
|
||||||
rm: vreg(31),
|
rm: vreg(31),
|
||||||
@@ -5494,7 +5500,8 @@ fn test_aarch64_binemit() {
|
|||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::FpuRRR {
|
Inst::FpuRRR {
|
||||||
fpu_op: FPUOp2::Div32,
|
fpu_op: FPUOp2::Div,
|
||||||
|
size: ScalarSize::Size32,
|
||||||
rd: writable_vreg(15),
|
rd: writable_vreg(15),
|
||||||
rn: vreg(30),
|
rn: vreg(30),
|
||||||
rm: vreg(31),
|
rm: vreg(31),
|
||||||
@@ -5505,7 +5512,8 @@ fn test_aarch64_binemit() {
|
|||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::FpuRRR {
|
Inst::FpuRRR {
|
||||||
fpu_op: FPUOp2::Div64,
|
fpu_op: FPUOp2::Div,
|
||||||
|
size: ScalarSize::Size64,
|
||||||
rd: writable_vreg(15),
|
rd: writable_vreg(15),
|
||||||
rn: vreg(30),
|
rn: vreg(30),
|
||||||
rm: vreg(31),
|
rm: vreg(31),
|
||||||
@@ -5516,7 +5524,8 @@ fn test_aarch64_binemit() {
|
|||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::FpuRRR {
|
Inst::FpuRRR {
|
||||||
fpu_op: FPUOp2::Max32,
|
fpu_op: FPUOp2::Max,
|
||||||
|
size: ScalarSize::Size32,
|
||||||
rd: writable_vreg(15),
|
rd: writable_vreg(15),
|
||||||
rn: vreg(30),
|
rn: vreg(30),
|
||||||
rm: vreg(31),
|
rm: vreg(31),
|
||||||
@@ -5527,7 +5536,8 @@ fn test_aarch64_binemit() {
|
|||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::FpuRRR {
|
Inst::FpuRRR {
|
||||||
fpu_op: FPUOp2::Max64,
|
fpu_op: FPUOp2::Max,
|
||||||
|
size: ScalarSize::Size64,
|
||||||
rd: writable_vreg(15),
|
rd: writable_vreg(15),
|
||||||
rn: vreg(30),
|
rn: vreg(30),
|
||||||
rm: vreg(31),
|
rm: vreg(31),
|
||||||
@@ -5538,7 +5548,8 @@ fn test_aarch64_binemit() {
|
|||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::FpuRRR {
|
Inst::FpuRRR {
|
||||||
fpu_op: FPUOp2::Min32,
|
fpu_op: FPUOp2::Min,
|
||||||
|
size: ScalarSize::Size32,
|
||||||
rd: writable_vreg(15),
|
rd: writable_vreg(15),
|
||||||
rn: vreg(30),
|
rn: vreg(30),
|
||||||
rm: vreg(31),
|
rm: vreg(31),
|
||||||
@@ -5549,7 +5560,8 @@ fn test_aarch64_binemit() {
|
|||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::FpuRRR {
|
Inst::FpuRRR {
|
||||||
fpu_op: FPUOp2::Min64,
|
fpu_op: FPUOp2::Min,
|
||||||
|
size: ScalarSize::Size64,
|
||||||
rd: writable_vreg(15),
|
rd: writable_vreg(15),
|
||||||
rn: vreg(30),
|
rn: vreg(30),
|
||||||
rm: vreg(31),
|
rm: vreg(31),
|
||||||
@@ -5558,50 +5570,6 @@ fn test_aarch64_binemit() {
|
|||||||
"fmin d15, d30, d31",
|
"fmin d15, d30, d31",
|
||||||
));
|
));
|
||||||
|
|
||||||
insns.push((
|
|
||||||
Inst::FpuRRR {
|
|
||||||
fpu_op: FPUOp2::Uqadd64,
|
|
||||||
rd: writable_vreg(21),
|
|
||||||
rn: vreg(22),
|
|
||||||
rm: vreg(23),
|
|
||||||
},
|
|
||||||
"D50EF77E",
|
|
||||||
"uqadd d21, d22, d23",
|
|
||||||
));
|
|
||||||
|
|
||||||
insns.push((
|
|
||||||
Inst::FpuRRR {
|
|
||||||
fpu_op: FPUOp2::Sqadd64,
|
|
||||||
rd: writable_vreg(21),
|
|
||||||
rn: vreg(22),
|
|
||||||
rm: vreg(23),
|
|
||||||
},
|
|
||||||
"D50EF75E",
|
|
||||||
"sqadd d21, d22, d23",
|
|
||||||
));
|
|
||||||
|
|
||||||
insns.push((
|
|
||||||
Inst::FpuRRR {
|
|
||||||
fpu_op: FPUOp2::Uqsub64,
|
|
||||||
rd: writable_vreg(21),
|
|
||||||
rn: vreg(22),
|
|
||||||
rm: vreg(23),
|
|
||||||
},
|
|
||||||
"D52EF77E",
|
|
||||||
"uqsub d21, d22, d23",
|
|
||||||
));
|
|
||||||
|
|
||||||
insns.push((
|
|
||||||
Inst::FpuRRR {
|
|
||||||
fpu_op: FPUOp2::Sqsub64,
|
|
||||||
rd: writable_vreg(21),
|
|
||||||
rn: vreg(22),
|
|
||||||
rm: vreg(23),
|
|
||||||
},
|
|
||||||
"D52EF75E",
|
|
||||||
"sqsub d21, d22, d23",
|
|
||||||
));
|
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::FpuRRRR {
|
Inst::FpuRRRR {
|
||||||
fpu_op: FPUOp3::MAdd32,
|
fpu_op: FPUOp3::MAdd32,
|
||||||
|
|||||||
@@ -1690,24 +1690,20 @@ impl Inst {
|
|||||||
let rn = pretty_print_vreg_scalar(rn, sizesrc, allocs);
|
let rn = pretty_print_vreg_scalar(rn, sizesrc, allocs);
|
||||||
format!("{} {}, {}", op, rd, rn)
|
format!("{} {}, {}", op, rd, rn)
|
||||||
}
|
}
|
||||||
&Inst::FpuRRR { fpu_op, rd, rn, rm } => {
|
&Inst::FpuRRR {
|
||||||
let (op, size) = match fpu_op {
|
fpu_op,
|
||||||
FPUOp2::Add32 => ("fadd", ScalarSize::Size32),
|
size,
|
||||||
FPUOp2::Add64 => ("fadd", ScalarSize::Size64),
|
rd,
|
||||||
FPUOp2::Sub32 => ("fsub", ScalarSize::Size32),
|
rn,
|
||||||
FPUOp2::Sub64 => ("fsub", ScalarSize::Size64),
|
rm,
|
||||||
FPUOp2::Mul32 => ("fmul", ScalarSize::Size32),
|
} => {
|
||||||
FPUOp2::Mul64 => ("fmul", ScalarSize::Size64),
|
let op = match fpu_op {
|
||||||
FPUOp2::Div32 => ("fdiv", ScalarSize::Size32),
|
FPUOp2::Add => "fadd",
|
||||||
FPUOp2::Div64 => ("fdiv", ScalarSize::Size64),
|
FPUOp2::Sub => "fsub",
|
||||||
FPUOp2::Max32 => ("fmax", ScalarSize::Size32),
|
FPUOp2::Mul => "fmul",
|
||||||
FPUOp2::Max64 => ("fmax", ScalarSize::Size64),
|
FPUOp2::Div => "fdiv",
|
||||||
FPUOp2::Min32 => ("fmin", ScalarSize::Size32),
|
FPUOp2::Max => "fmax",
|
||||||
FPUOp2::Min64 => ("fmin", ScalarSize::Size64),
|
FPUOp2::Min => "fmin",
|
||||||
FPUOp2::Sqadd64 => ("sqadd", ScalarSize::Size64),
|
|
||||||
FPUOp2::Uqadd64 => ("uqadd", ScalarSize::Size64),
|
|
||||||
FPUOp2::Sqsub64 => ("sqsub", ScalarSize::Size64),
|
|
||||||
FPUOp2::Uqsub64 => ("uqsub", ScalarSize::Size64),
|
|
||||||
};
|
};
|
||||||
let rd = pretty_print_vreg_scalar(rd.to_reg(), size, allocs);
|
let rd = pretty_print_vreg_scalar(rd.to_reg(), size, allocs);
|
||||||
let rn = pretty_print_vreg_scalar(rn, size, allocs);
|
let rn = pretty_print_vreg_scalar(rn, size, allocs);
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
src/clif.isle 443b34b797fc8ace
|
src/clif.isle 443b34b797fc8ace
|
||||||
src/prelude.isle afd037c4d91c875c
|
src/prelude.isle afd037c4d91c875c
|
||||||
src/isa/aarch64/inst.isle a44074e06f955750
|
src/isa/aarch64/inst.isle 54184fdac4e4ca23
|
||||||
src/isa/aarch64/lower.isle 71c7e603b0e4bdef
|
src/isa/aarch64/lower.isle 71c7e603b0e4bdef
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -1591,32 +1591,26 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
|
|
||||||
Opcode::Fadd | Opcode::Fsub | Opcode::Fmul | Opcode::Fdiv | Opcode::Fmin | Opcode::Fmax => {
|
Opcode::Fadd | Opcode::Fsub | Opcode::Fmul | Opcode::Fdiv | Opcode::Fmin | Opcode::Fmax => {
|
||||||
let ty = ty.unwrap();
|
let ty = ty.unwrap();
|
||||||
let bits = ty_bits(ty);
|
|
||||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||||
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||||
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||||
if !ty.is_vector() {
|
if !ty.is_vector() {
|
||||||
let fpu_op = match (op, bits) {
|
let fpu_op = match op {
|
||||||
(Opcode::Fadd, 32) => FPUOp2::Add32,
|
Opcode::Fadd => FPUOp2::Add,
|
||||||
(Opcode::Fadd, 64) => FPUOp2::Add64,
|
Opcode::Fsub => FPUOp2::Sub,
|
||||||
(Opcode::Fsub, 32) => FPUOp2::Sub32,
|
Opcode::Fmul => FPUOp2::Mul,
|
||||||
(Opcode::Fsub, 64) => FPUOp2::Sub64,
|
Opcode::Fdiv => FPUOp2::Div,
|
||||||
(Opcode::Fmul, 32) => FPUOp2::Mul32,
|
Opcode::Fmin => FPUOp2::Min,
|
||||||
(Opcode::Fmul, 64) => FPUOp2::Mul64,
|
Opcode::Fmax => FPUOp2::Max,
|
||||||
(Opcode::Fdiv, 32) => FPUOp2::Div32,
|
_ => unreachable!(),
|
||||||
(Opcode::Fdiv, 64) => FPUOp2::Div64,
|
|
||||||
(Opcode::Fmin, 32) => FPUOp2::Min32,
|
|
||||||
(Opcode::Fmin, 64) => FPUOp2::Min64,
|
|
||||||
(Opcode::Fmax, 32) => FPUOp2::Max32,
|
|
||||||
(Opcode::Fmax, 64) => FPUOp2::Max64,
|
|
||||||
_ => {
|
|
||||||
return Err(CodegenError::Unsupported(format!(
|
|
||||||
"{}: Unsupported type: {:?}",
|
|
||||||
op, ty
|
|
||||||
)))
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
ctx.emit(Inst::FpuRRR { fpu_op, rd, rn, rm });
|
ctx.emit(Inst::FpuRRR {
|
||||||
|
fpu_op,
|
||||||
|
size: ScalarSize::from_ty(ty),
|
||||||
|
rd,
|
||||||
|
rn,
|
||||||
|
rm,
|
||||||
|
});
|
||||||
} else {
|
} else {
|
||||||
let alu_op = match op {
|
let alu_op = match op {
|
||||||
Opcode::Fadd => VecALUOp::Fadd,
|
Opcode::Fadd => VecALUOp::Fadd,
|
||||||
@@ -2149,7 +2143,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
lower_constant_f64(ctx, rtmp1, max);
|
lower_constant_f64(ctx, rtmp1, max);
|
||||||
}
|
}
|
||||||
ctx.emit(Inst::FpuRRR {
|
ctx.emit(Inst::FpuRRR {
|
||||||
fpu_op: choose_32_64(in_ty, FPUOp2::Min32, FPUOp2::Min64),
|
fpu_op: FPUOp2::Min,
|
||||||
|
size: ScalarSize::from_ty(in_ty),
|
||||||
rd: rtmp2,
|
rd: rtmp2,
|
||||||
rn,
|
rn,
|
||||||
rm: rtmp1.to_reg(),
|
rm: rtmp1.to_reg(),
|
||||||
@@ -2160,7 +2155,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
lower_constant_f64(ctx, rtmp1, min);
|
lower_constant_f64(ctx, rtmp1, min);
|
||||||
}
|
}
|
||||||
ctx.emit(Inst::FpuRRR {
|
ctx.emit(Inst::FpuRRR {
|
||||||
fpu_op: choose_32_64(in_ty, FPUOp2::Max32, FPUOp2::Max64),
|
fpu_op: FPUOp2::Max,
|
||||||
|
size: ScalarSize::from_ty(in_ty),
|
||||||
rd: rtmp2,
|
rd: rtmp2,
|
||||||
rn: rtmp2.to_reg(),
|
rn: rtmp2.to_reg(),
|
||||||
rm: rtmp1.to_reg(),
|
rm: rtmp1.to_reg(),
|
||||||
|
|||||||
Reference in New Issue
Block a user