Aarch64 codegen quality: handle add-negative-imm as subtract.
We often see patterns like:
```
mov w2, #0xffff_ffff // uses ORR with logical immediate form
add w0, w1, w2
```
which is just `w0 := w1 - 1`. It would be much better to recognize when
the inverse of an immediate will fit in a 12-bit immediate field if the
immediate itself does not, and flip add to subtract (and vice versa), so
we can instead generate:
```
sub w0, w1, #1
```
We see this pattern in e.g. `bz2`, where this commit makes the following
difference (counting instructions with `perf stat`, filling in the
wasmtime cache first then running again to get just runtime):
pre:
```
992.762250 task-clock (msec) # 0.998 CPUs utilized
109 context-switches # 0.110 K/sec
0 cpu-migrations # 0.000 K/sec
5,035 page-faults # 0.005 M/sec
3,224,119,134 cycles # 3.248 GHz
4,000,521,171 instructions # 1.24 insn per cycle
<not supported> branches
27,573,755 branch-misses
0.995072322 seconds time elapsed
```
post:
```
993.853850 task-clock (msec) # 0.998 CPUs utilized
123 context-switches # 0.124 K/sec
1 cpu-migrations # 0.001 K/sec
5,072 page-faults # 0.005 M/sec
3,201,278,337 cycles # 3.221 GHz
3,917,061,340 instructions # 1.22 insn per cycle
<not supported> branches
28,410,633 branch-misses
0.996008047 seconds time elapsed
```
In other words, a 2.1% reduction in instruction count on `bz2`.
This commit is contained in:
@@ -424,6 +424,35 @@ pub(crate) fn put_input_in_rse_imm12<C: LowerCtx<I = Inst>>(
|
||||
ResultRSEImm12::from_rse(put_input_in_rse(ctx, input, narrow_mode))
|
||||
}
|
||||
|
||||
/// Like `put_input_in_rse_imm12` above, except is allowed to negate the
|
||||
/// argument (assuming a two's-complement representation with the given bit
|
||||
/// width) if this allows use of 12-bit immediate. Used to flip `add`s with
|
||||
/// negative immediates to `sub`s (and vice-versa).
|
||||
pub(crate) fn put_input_in_rse_imm12_maybe_negated<C: LowerCtx<I = Inst>>(
|
||||
ctx: &mut C,
|
||||
input: InsnInput,
|
||||
twos_complement_bits: usize,
|
||||
narrow_mode: NarrowValueMode,
|
||||
) -> (ResultRSEImm12, bool) {
|
||||
assert!(twos_complement_bits <= 64);
|
||||
if let Some(imm_value) = input_to_const(ctx, input) {
|
||||
if let Some(i) = Imm12::maybe_from_u64(imm_value) {
|
||||
return (ResultRSEImm12::Imm12(i), false);
|
||||
}
|
||||
let sign_extended =
|
||||
((imm_value as i64) << (64 - twos_complement_bits)) >> (64 - twos_complement_bits);
|
||||
let inverted = sign_extended.wrapping_neg();
|
||||
if let Some(i) = Imm12::maybe_from_u64(inverted as u64) {
|
||||
return (ResultRSEImm12::Imm12(i), true);
|
||||
}
|
||||
}
|
||||
|
||||
(
|
||||
ResultRSEImm12::from_rse(put_input_in_rse(ctx, input, narrow_mode)),
|
||||
false,
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) fn put_input_in_rs_immlogic<C: LowerCtx<I = Inst>>(
|
||||
ctx: &mut C,
|
||||
input: InsnInput,
|
||||
|
||||
@@ -60,8 +60,17 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let ty = ty.unwrap();
|
||||
if ty_bits(ty) < 128 {
|
||||
let rm = put_input_in_rse_imm12(ctx, inputs[1], NarrowValueMode::None);
|
||||
let alu_op = choose_32_64(ty, ALUOp::Add32, ALUOp::Add64);
|
||||
let (rm, negated) = put_input_in_rse_imm12_maybe_negated(
|
||||
ctx,
|
||||
inputs[1],
|
||||
ty_bits(ty),
|
||||
NarrowValueMode::None,
|
||||
);
|
||||
let alu_op = if !negated {
|
||||
choose_32_64(ty, ALUOp::Add32, ALUOp::Add64)
|
||||
} else {
|
||||
choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64)
|
||||
};
|
||||
ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
|
||||
} else {
|
||||
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||
@@ -79,8 +88,17 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let ty = ty.unwrap();
|
||||
if ty_bits(ty) < 128 {
|
||||
let rm = put_input_in_rse_imm12(ctx, inputs[1], NarrowValueMode::None);
|
||||
let alu_op = choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64);
|
||||
let (rm, negated) = put_input_in_rse_imm12_maybe_negated(
|
||||
ctx,
|
||||
inputs[1],
|
||||
ty_bits(ty),
|
||||
NarrowValueMode::None,
|
||||
);
|
||||
let alu_op = if !negated {
|
||||
choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64)
|
||||
} else {
|
||||
choose_32_64(ty, ALUOp::Add32, ALUOp::Add64)
|
||||
};
|
||||
ctx.emit(alu_inst_imm12(alu_op, rd, rn, rm));
|
||||
} else {
|
||||
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||
|
||||
Reference in New Issue
Block a user