CL/aarch64: implement the wasm SIMD i32x4.dot_i16x8_s instruction

This patch implements, for aarch64, the following wasm SIMD extensions

  i32x4.dot_i16x8_s instruction
  https://github.com/WebAssembly/simd/pull/127

It also updates dependencies as follows, in order that the new instruction can
be parsed, decoded, etc:

  wat          to  1.0.27
  wast         to  26.0.1
  wasmparser   to  0.65.0
  wasmprinter  to  0.2.12

The changes are straightforward:

* new CLIF instruction `widening_pairwise_dot_product_s`

* translation from wasm into `widening_pairwise_dot_product_s`

* new AArch64 instructions `smull`, `smull2` (part of the `VecRRR` group)

* translation from `widening_pairwise_dot_product_s` to `smull ; smull2 ; addv`

There is no testcase in this commit, because that is a separate repo.  The
implementation has been tested, nevertheless.
This commit is contained in:
Julian Seward
2020-10-27 15:04:32 +01:00
committed by julian-seward1
parent 54a97f784e
commit 5a5fb11979
26 changed files with 228 additions and 54 deletions

52
Cargo.lock generated
View File

@@ -349,7 +349,7 @@ dependencies = [
"souper-ir",
"target-lexicon",
"thiserror",
"wast 25.0.0",
"wast 26.0.1",
]
[[package]]
@@ -549,7 +549,7 @@ dependencies = [
"smallvec",
"target-lexicon",
"thiserror",
"wasmparser 0.63.0",
"wasmparser 0.65.0",
"wat",
]
@@ -1062,7 +1062,7 @@ dependencies = [
"staticvec",
"thiserror",
"typemap",
"wasmparser 0.63.0",
"wasmparser 0.65.0",
"wat",
]
@@ -1233,7 +1233,7 @@ dependencies = [
"peepmatic-test-operator",
"peepmatic-traits",
"serde",
"wast 25.0.0",
"wast 26.0.1",
"z3",
]
@@ -1261,7 +1261,7 @@ dependencies = [
"peepmatic-traits",
"rand 0.7.3",
"serde",
"wast 25.0.0",
"wast 26.0.1",
]
[[package]]
@@ -1286,7 +1286,7 @@ dependencies = [
"serde",
"serde_test",
"thiserror",
"wast 25.0.0",
"wast 26.0.1",
]
[[package]]
@@ -1298,7 +1298,7 @@ dependencies = [
"peepmatic",
"peepmatic-test-operator",
"souper-ir",
"wast 25.0.0",
"wast 26.0.1",
]
[[package]]
@@ -1319,7 +1319,7 @@ version = "0.67.0"
dependencies = [
"peepmatic-traits",
"serde",
"wast 25.0.0",
"wast 26.0.1",
]
[[package]]
@@ -2290,18 +2290,18 @@ checksum = "32fddd575d477c6e9702484139cf9f23dcd554b06d185ed0f56c857dd3a47aa6"
[[package]]
name = "wasmparser"
version = "0.63.0"
version = "0.65.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57da5d7300428d75d8b3cdfb736e41ee6af8926d69c1de2f201a1a22f234b7b5"
checksum = "87cc2fe6350834b4e528ba0901e7aa405d78b89dc1fa3145359eb4de0e323fcf"
[[package]]
name = "wasmprinter"
version = "0.2.10"
version = "0.2.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd1fea2e86fb107de33db6538db99f73e8d058f6fda2bbd8cceafff847dd9266"
checksum = "0fe87e79ad307dc73cb7caca2b57d049d2905c7db1317a9c21171e69aa9f93d3"
dependencies = [
"anyhow",
"wasmparser 0.63.0",
"wasmparser 0.65.0",
]
[[package]]
@@ -2321,7 +2321,7 @@ dependencies = [
"smallvec",
"target-lexicon",
"tempfile",
"wasmparser 0.63.0",
"wasmparser 0.65.0",
"wasmtime-cache",
"wasmtime-environ",
"wasmtime-jit",
@@ -2399,7 +2399,7 @@ dependencies = [
"test-programs",
"tracing-subscriber",
"wasi-common",
"wasmparser 0.63.0",
"wasmparser 0.65.0",
"wasmtime",
"wasmtime-cache",
"wasmtime-debug",
@@ -2434,7 +2434,7 @@ dependencies = [
"object 0.21.1",
"target-lexicon",
"thiserror",
"wasmparser 0.63.0",
"wasmparser 0.65.0",
"wasmtime-environ",
]
@@ -2453,7 +2453,7 @@ dependencies = [
"more-asserts",
"serde",
"thiserror",
"wasmparser 0.63.0",
"wasmparser 0.65.0",
]
[[package]]
@@ -2481,7 +2481,7 @@ dependencies = [
"log",
"rayon",
"wasm-smith",
"wasmparser 0.63.0",
"wasmparser 0.65.0",
"wasmprinter",
"wasmtime",
"wasmtime-wast",
@@ -2508,7 +2508,7 @@ dependencies = [
"serde",
"target-lexicon",
"thiserror",
"wasmparser 0.63.0",
"wasmparser 0.65.0",
"wasmtime-cranelift",
"wasmtime-debug",
"wasmtime-environ",
@@ -2525,7 +2525,7 @@ version = "0.20.0"
dependencies = [
"cranelift-codegen",
"lightbeam",
"wasmparser 0.63.0",
"wasmparser 0.65.0",
"wasmtime-environ",
]
@@ -2617,7 +2617,7 @@ version = "0.20.0"
dependencies = [
"anyhow",
"wasmtime",
"wast 25.0.0",
"wast 26.0.1",
]
[[package]]
@@ -2652,20 +2652,20 @@ dependencies = [
[[package]]
name = "wast"
version = "25.0.0"
version = "26.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d8c67a4386e4efe10563552848d8c6a4b7f941e69924a935495645c3f52b32d0"
checksum = "b3f174eed73e885ede6c8fcc3fbea8c3757afa521840676496cde56bb742ddab"
dependencies = [
"leb128",
]
[[package]]
name = "wat"
version = "1.0.26"
version = "1.0.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4766d466249e23279e92c52033429eb91141c5efea1c4478138fa6f6ef4efe3e"
checksum = "26b2dccbce4d0e14875091846e110a2369267b18ddd0d6423479b88dad914d71"
dependencies = [
"wast 25.0.0",
"wast 26.0.1",
]
[[package]]

View File

@@ -37,12 +37,12 @@ anyhow = "1.0.19"
target-lexicon = { version = "0.11.0", default-features = false }
pretty_env_logger = "0.4.0"
file-per-thread-logger = "0.1.1"
wat = "1.0.26"
wat = "1.0.27"
libc = "0.2.60"
log = "0.4.8"
rayon = "1.2.1"
humantime = "1.3.0"
wasmparser = "0.63"
wasmparser = "0.65"
[dev-dependencies]
env_logger = "0.7.1"

View File

@@ -30,7 +30,7 @@ peepmatic-traits = { path = "../peepmatic/crates/traits", optional = true, versi
peepmatic-runtime = { path = "../peepmatic/crates/runtime", optional = true, version = "0.67.0" }
regalloc = { version = "0.0.31" }
souper-ir = { version = "1", optional = true }
wast = { version = "25.0.0", optional = true }
wast = { version = "26.0.1", optional = true }
# It is a goal of the cranelift-codegen crate to have minimal external dependencies.
# Please don't add any unless they are essential to the task of creating binary
# machine code. Integration tests that need external dependencies can be

View File

@@ -4078,6 +4078,41 @@ pub(crate) fn define(
.operands_out(vec![a]),
);
let I16x8 = &TypeVar::new(
"I16x8",
"A SIMD vector type containing 8 integer lanes each 16 bits wide.",
TypeSetBuilder::new()
.ints(16..16)
.simd_lanes(8..8)
.includes_scalars(false)
.build(),
);
let x = &Operand::new("x", I16x8);
let y = &Operand::new("y", I16x8);
let a = &Operand::new("a", &I16x8.merge_lanes());
ig.push(
Inst::new(
"widening_pairwise_dot_product_s",
r#"
Takes corresponding elements in `x` and `y`, performs a sign-extending length-doubling
multiplication on them, then adds adjacent pairs of elements to form the result. For
example, if the input vectors are `[x3, x2, x1, x0]` and `[y3, y2, y1, y0]`, it produces
the vector `[r1, r0]`, where `r1 = sx(x3) * sx(y3) + sx(x2) * sx(y2)` and
`r0 = sx(x1) * sx(y1) + sx(x0) * sx(y0)`, and `sx(n)` sign-extends `n` to twice its width.
This will double the lane width and halve the number of lanes. So the resulting
vector has the same number of bits as `x` and `y` do (individually).
See https://github.com/WebAssembly/simd/pull/127 for background info.
"#,
&formats.binary,
)
.operands_in(vec![x, y])
.operands_out(vec![a]),
);
let IntTo = &TypeVar::new(
"IntTo",
"A larger integer type with the same number of lanes",

View File

@@ -677,6 +677,9 @@ impl VectorSize {
}
}
/// Produces a `VectorSize` with lanes twice as wide. Note that if the resulting
/// size would exceed 128 bits, then the number of lanes is also halved, so as to
/// ensure that the result size is at most 128 bits.
pub fn widen(&self) -> VectorSize {
match self {
VectorSize::Size8x8 => VectorSize::Size16x8,
@@ -689,6 +692,7 @@ impl VectorSize {
}
}
/// Produces a `VectorSize` that has the same lane width, but half as many lanes.
pub fn halve(&self) -> VectorSize {
match self {
VectorSize::Size8x16 => VectorSize::Size8x8,

View File

@@ -1950,11 +1950,13 @@ impl MachInstEmit for Inst {
(0b001_01110_00_1 | enc_size << 1, 0b100000)
}
VecALUOp::Zip1 => (0b01001110_00_0 | enc_size << 1, 0b001110),
VecALUOp::Smull => (0b000_01110_00_1 | enc_size << 1, 0b110000),
VecALUOp::Smull2 => (0b010_01110_00_1 | enc_size << 1, 0b110000),
};
let top11 = if is_float {
top11 | (q << 9) | enc_float_size << 1
} else {
top11 | (q << 9)
let top11 = match alu_op {
VecALUOp::Smull | VecALUOp::Smull2 => top11,
_ if is_float => top11 | (q << 9) | enc_float_size << 1,
_ => top11 | (q << 9),
};
sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd));
}

View File

@@ -3243,6 +3243,78 @@ fn test_aarch64_binemit() {
"zip1 v9.2d, v20.2d, v17.2d",
));
insns.push((
Inst::VecRRR {
alu_op: VecALUOp::Smull,
rd: writable_vreg(16),
rn: vreg(12),
rm: vreg(1),
size: VectorSize::Size8x16,
},
"90C1210E",
"smull v16.8h, v12.8b, v1.8b",
));
insns.push((
Inst::VecRRR {
alu_op: VecALUOp::Smull,
rd: writable_vreg(2),
rn: vreg(13),
rm: vreg(6),
size: VectorSize::Size16x8,
},
"A2C1660E",
"smull v2.4s, v13.4h, v6.4h",
));
insns.push((
Inst::VecRRR {
alu_op: VecALUOp::Smull,
rd: writable_vreg(8),
rn: vreg(12),
rm: vreg(14),
size: VectorSize::Size32x4,
},
"88C1AE0E",
"smull v8.2d, v12.2s, v14.2s",
));
insns.push((
Inst::VecRRR {
alu_op: VecALUOp::Smull2,
rd: writable_vreg(16),
rn: vreg(12),
rm: vreg(1),
size: VectorSize::Size8x16,
},
"90C1214E",
"smull2 v16.8h, v12.16b, v1.16b",
));
insns.push((
Inst::VecRRR {
alu_op: VecALUOp::Smull2,
rd: writable_vreg(2),
rn: vreg(13),
rm: vreg(6),
size: VectorSize::Size16x8,
},
"A2C1664E",
"smull2 v2.4s, v13.8h, v6.8h",
));
insns.push((
Inst::VecRRR {
alu_op: VecALUOp::Smull2,
rd: writable_vreg(8),
rn: vreg(12),
rm: vreg(14),
size: VectorSize::Size32x4,
},
"88C1AE4E",
"smull2 v8.2d, v12.4s, v14.4s",
));
insns.push((
Inst::VecMisc {
op: VecMisc2::Not,

View File

@@ -291,6 +291,10 @@ pub enum VecALUOp {
Umlal,
/// Zip vectors (primary) [meaning, high halves]
Zip1,
/// Signed multiply long (low halves)
Smull,
/// Signed multiply long (high halves)
Smull2,
}
/// A Vector miscellaneous operation with two registers.
@@ -3546,15 +3550,21 @@ impl Inst {
VecALUOp::Addp => ("addp", size),
VecALUOp::Umlal => ("umlal", size),
VecALUOp::Zip1 => ("zip1", size),
VecALUOp::Smull => ("smull", size),
VecALUOp::Smull2 => ("smull2", size),
};
let rd_size = if alu_op == VecALUOp::Umlal {
size.widen()
} else {
size
let rd_size = match alu_op {
VecALUOp::Umlal | VecALUOp::Smull | VecALUOp::Smull2 => size.widen(),
_ => size
};
let rn_size = match alu_op {
VecALUOp::Smull => size.halve(),
_ => size
};
let rm_size = rn_size;
let rd = show_vreg_vector(rd.to_reg(), mb_rru, rd_size);
let rn = show_vreg_vector(rn, mb_rru, size);
let rm = show_vreg_vector(rm, mb_rru, size);
let rn = show_vreg_vector(rn, mb_rru, rn_size);
let rm = show_vreg_vector(rm, mb_rru, rm_size);
format!("{} {}, {}, {}", op, rd, rn, rm)
}
&Inst::VecMisc { op, rd, rn, size } => {

View File

@@ -2375,6 +2375,47 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
});
}
Opcode::WideningPairwiseDotProductS => {
let r_y = get_output_reg(ctx, outputs[0]);
let r_a = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let r_b = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
let ty = ty.unwrap();
if ty == I32X4 {
let tmp = ctx.alloc_tmp(RegClass::V128, I8X16);
// The args have type I16X8.
// "y = i32x4.dot_i16x8_s(a, b)"
// => smull tmp, a, b
// smull2 y, a, b
// addp y, tmp, y
ctx.emit(Inst::VecRRR {
alu_op: VecALUOp::Smull,
rd: tmp,
rn: r_a,
rm: r_b,
size: VectorSize::Size16x8,
});
ctx.emit(Inst::VecRRR {
alu_op: VecALUOp::Smull2,
rd: r_y,
rn: r_a,
rm: r_b,
size: VectorSize::Size16x8,
});
ctx.emit(Inst::VecRRR {
alu_op: VecALUOp::Addp,
rd: r_y,
rn: tmp.to_reg(),
rm: r_y.to_reg(),
size: VectorSize::Size32x4,
});
} else {
return Err(CodegenError::Unsupported(format!(
"Opcode::WideningPairwiseDotProductS: unsupported laneage: {:?}",
ty
)));
}
}
Opcode::Fadd | Opcode::Fsub | Opcode::Fmul | Opcode::Fdiv | Opcode::Fmin | Opcode::Fmax => {
let ty = ty.unwrap();
let bits = ty_bits(ty);

View File

@@ -571,6 +571,7 @@ where
Opcode::AtomicLoad => unimplemented!("AtomicLoad"),
Opcode::AtomicStore => unimplemented!("AtomicStore"),
Opcode::Fence => unimplemented!("Fence"),
Opcode::WideningPairwiseDotProductS => unimplemented!("WideningPairwiseDotProductS"),
// TODO: these instructions should be removed once the new backend makes these obsolete
// (see https://github.com/bytecodealliance/wasmtime/issues/1936); additionally, the

View File

@@ -15,7 +15,7 @@ peepmatic-macro = { version = "0.67.0", path = "crates/macro" }
peepmatic-runtime = { version = "0.67.0", path = "crates/runtime", features = ["construct"] }
peepmatic-traits = { version = "0.67.0", path = "crates/traits" }
serde = { version = "1.0.105", features = ["derive"] }
wast = "25.0.0"
wast = "26.0.1"
z3 = { version = "0.7.1", features = ["static-link-z3"] }
[dev-dependencies]

View File

@@ -21,4 +21,4 @@ peepmatic-test-operator = { path = "../test-operator" }
peepmatic-traits = { path = "../traits" }
rand = { version = "0.7.3", features = ["small_rng"] }
serde = "1.0.106"
wast = "25.0.0"
wast = "26.0.1"

View File

@@ -16,7 +16,7 @@ peepmatic-automata = { version = "0.67.0", path = "../automata", features = ["se
peepmatic-traits = { version = "0.67.0", path = "../traits" }
serde = { version = "1.0.105", features = ["derive"] }
thiserror = "1.0.15"
wast = { version = "25.0.0", optional = true }
wast = { version = "26.0.1", optional = true }
[dev-dependencies]
peepmatic-test-operator = { version = "0.67.0", path = "../test-operator" }

View File

@@ -16,4 +16,4 @@ log = "0.4.8"
[dev-dependencies]
peepmatic = { path = "../..", version = "0.67.0" }
peepmatic-test-operator = { version = "0.67.0", path = "../test-operator" }
wast = "25.0.0"
wast = "26.0.1"

View File

@@ -9,4 +9,4 @@ edition = "2018"
[dependencies]
peepmatic-traits = { version = "0.67.0", path = "../traits" }
serde = { version = "1.0.105", features = ["derive"] }
wast = "25.0.0"
wast = "26.0.1"

View File

@@ -12,7 +12,7 @@ keywords = ["webassembly", "wasm"]
edition = "2018"
[dependencies]
wasmparser = { version = "0.63.0", default-features = false }
wasmparser = { version = "0.65.0", default-features = false }
cranelift-codegen = { path = "../codegen", version = "0.67.0", default-features = false }
cranelift-entity = { path = "../entity", version = "0.67.0" }
cranelift-frontend = { path = "../frontend", version = "0.67.0", default-features = false }

View File

@@ -1782,9 +1782,18 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
state.push1(builder.ins().nearest(arg));
}
Operator::I32x4DotI16x8S => {
let (a, b) = pop2_with_bitcast(state, I16X8, builder);
state.push1(builder.ins().widening_pairwise_dot_product_s(a, b));
}
Operator::ReturnCall { .. } | Operator::ReturnCallIndirect { .. } => {
return Err(wasm_unsupported!("proposed tail-call operator {:?}", op));
}
Operator::V128Load32Zero { .. } | Operator::V128Load64Zero { .. } => {
return Err(wasm_unsupported!("proposed SIMD operator {:?}", op));
}
};
Ok(())
}

View File

@@ -13,7 +13,7 @@ edition = "2018"
[dependencies]
gimli = "0.22.0"
wasmparser = "0.63.0"
wasmparser = "0.65.0"
object = { version = "0.21.1", default-features = false, features = ["read", "write"] }
wasmtime-environ = { path = "../environ", version = "0.20.0" }
target-lexicon = { version = "0.11.0", default-features = false }

View File

@@ -16,7 +16,7 @@ anyhow = "1.0"
cranelift-codegen = { path = "../../cranelift/codegen", version = "0.67.0", features = ["enable-serde"] }
cranelift-entity = { path = "../../cranelift/entity", version = "0.67.0", features = ["enable-serde"] }
cranelift-wasm = { path = "../../cranelift/wasm", version = "0.67.0", features = ["enable-serde"] }
wasmparser = "0.63.0"
wasmparser = "0.65.0"
indexmap = { version = "1.0.2", features = ["serde-1"] }
thiserror = "1.0.4"
serde = { version = "1.0.94", features = ["derive"] }

View File

@@ -12,8 +12,8 @@ arbitrary = { version = "0.4.1", features = ["derive"] }
env_logger = "0.7.1"
log = "0.4.8"
rayon = "1.2.1"
wasmparser = "0.63.0"
wasmprinter = "0.2.10"
wasmparser = "0.65.0"
wasmprinter = "0.2.12"
wasmtime = { path = "../wasmtime" }
wasmtime-wast = { path = "../wast" }
wasm-smith = "0.1.9"

View File

@@ -28,7 +28,7 @@ rayon = { version = "1.0", optional = true }
region = "2.1.0"
thiserror = "1.0.4"
target-lexicon = { version = "0.11.0", default-features = false }
wasmparser = "0.63.0"
wasmparser = "0.65.0"
more-asserts = "0.2.1"
anyhow = "1.0"
cfg-if = "1.0"

View File

@@ -24,7 +24,7 @@ smallvec = "1.0.0"
staticvec = "0.10"
thiserror = "1.0.9"
typemap = "0.3"
wasmparser = "0.63.0"
wasmparser = "0.65.0"
[dev-dependencies]
lazy_static = "1.2"

View File

@@ -13,6 +13,6 @@ edition = "2018"
[dependencies]
lightbeam = { path = "..", version = "0.20.0" }
wasmparser = "0.63"
wasmparser = "0.65"
cranelift-codegen = { path = "../../../cranelift/codegen", version = "0.67.0" }
wasmtime-environ = { path = "../../environ", version = "0.20.0" }

View File

@@ -16,7 +16,7 @@ wasmtime-jit = { path = "../jit", version = "0.20.0" }
wasmtime-cache = { path = "../cache", version = "0.20.0", optional = true }
wasmtime-profiling = { path = "../profiling", version = "0.20.0" }
target-lexicon = { version = "0.11.0", default-features = false }
wasmparser = "0.63.0"
wasmparser = "0.65.0"
anyhow = "1.0.19"
region = "2.2.0"
libc = "0.2"

View File

@@ -13,7 +13,7 @@ edition = "2018"
[dependencies]
anyhow = "1.0.19"
wasmtime = { path = "../wasmtime", version = "0.20.0", default-features = false }
wast = "25.0.0"
wast = "26.0.1"
[badges]
maintenance = { status = "actively-developed" }