Cranelift: support 14-bit Type index with some bitpacking. (#4269)

* Cranelift: make `ir::Type` a `u16`.

* Cranelift: pack ValueData back into 64 bits.

After extending `Type` to a `u16`, `ValueData` became 12 bytes rather
than 8. This packs it back down to 8 bytes (64 bits) by stealing two
bits from the `Type` for the enum discriminant (leaving 14 bits for the
type itself).

Performance comparison (3-way between original (`ty-u8`), 16-bit `Type`
(`ty-u16`), and this PR (`ty-packed`)):

```
~/work/sightglass% target/release/sightglass-cli benchmark \
    -e ~/ty-u8.so -e ~/ty-u16.so -e ~/ty-packed.so \
    --iterations-per-process 10 --processes 2 \
    benchmarks-next/spidermonkey/benchmark.wasm

compilation
  benchmarks-next/spidermonkey/benchmark.wasm
    cycles
      [20654406874 21749213920.50 22958520306] /home/cfallin/ty-packed.so
      [22227738316 22584704883.90 22916433748] /home/cfallin/ty-u16.so
      [20659150490 21598675968.60 22588108428] /home/cfallin/ty-u8.so
    nanoseconds
      [5435333269 5723139427.25 6041072883] /home/cfallin/ty-packed.so
      [5848788229 5942729637.85 6030030341] /home/cfallin/ty-u16.so
      [5436002390 5683248226.10 5943626225] /home/cfallin/ty-u8.so
```

So, when compiling SpiderMonkey.wasm, making `Type` 16 bits regresses
performance by 4.5% (5.683s -> 5.723s), while this PR gets 14 bits for a 1.0%
cost (5.683s -> 5.723s). That's still not great, and we can likely do better,
but it's a start.

* Fix test failure: entities to/from u32 via `{from,to}_bits`, not `{from,to}_u32`.
This commit is contained in:
Chris Fallin
2022-07-05 14:51:02 -07:00
committed by GitHub
parent 52ad76ed7c
commit 00f357c028
5 changed files with 184 additions and 52 deletions

View File

@@ -71,7 +71,7 @@ impl ValueType {
}
/// Find the unique number associated with this type.
pub fn number(&self) -> u8 {
pub fn number(&self) -> u16 {
match *self {
ValueType::Lane(l) => l.number(),
ValueType::Reference(r) => r.number(),
@@ -173,7 +173,7 @@ impl LaneType {
}
/// Find the unique number associated with this lane type.
pub fn number(self) -> u8 {
pub fn number(self) -> u16 {
constants::LANE_BASE
+ match self {
LaneType::Bool(shared_types::Bool::B1) => 0,
@@ -355,11 +355,11 @@ impl VectorType {
///
/// Vector types are encoded with the lane type in the low 4 bits and
/// log2(lanes) in the high 4 bits, giving a range of 2-256 lanes.
pub fn number(&self) -> u8 {
pub fn number(&self) -> u16 {
let lanes_log_2: u32 = 63 - self.lane_count().leading_zeros();
let base_num = u32::from(self.base.number());
let num = (lanes_log_2 << 4) + base_num;
num as u8
num as u16
}
}
@@ -411,7 +411,7 @@ impl SpecialType {
}
/// Find the unique number associated with this special type.
pub fn number(self) -> u8 {
pub fn number(self) -> u16 {
match self {
SpecialType::Flag(shared_types::Flag::IFlags) => 1,
SpecialType::Flag(shared_types::Flag::FFlags) => 2,
@@ -484,7 +484,7 @@ impl ReferenceType {
}
/// Find the unique number associated with this reference type.
pub fn number(self) -> u8 {
pub fn number(self) -> u16 {
constants::REFERENCE_BASE
+ match self {
ReferenceType(shared_types::Reference::R32) => 0,