diff --git a/cranelift/filetests/isa/intel/binary32.cton b/cranelift/filetests/isa/intel/binary32.cton index 4cc14b6905..77e53ceeec 100644 --- a/cranelift/filetests/isa/intel/binary32.cton +++ b/cranelift/filetests/isa/intel/binary32.cton @@ -217,6 +217,13 @@ ebb0: ; asm: movsbl -50000(%esi), %edx [-,%rdx] v129 = sload8.i32 v2-50000 ; bin: 0f be 96 ffff3cb0 + ; Bit-counting instructions. + + ; asm: popcntl %esi, %ecx + [-,%rcx] v200 = popcnt v2 ; bin: f3 0f b8 ce + ; asm: popcntl %ecx, %esi + [-,%rsi] v201 = popcnt v1 ; bin: f3 0f b8 f1 + ; asm: call foo call fn0() ; bin: e8 PCRel4(fn0) 00000000 diff --git a/cranelift/filetests/isa/intel/binary64.cton b/cranelift/filetests/isa/intel/binary64.cton index e9ee5d3941..da350d1e45 100644 --- a/cranelift/filetests/isa/intel/binary64.cton +++ b/cranelift/filetests/isa/intel/binary64.cton @@ -145,6 +145,15 @@ ebb0: ; asm: movq %rcx, %r10 [-,%r10] v112 = copy v1 ; bin: 49 89 ca + ; Bit-counting instructions. + + ; asm: popcntq %rsi, %rcx + [-,%rcx] v200 = popcnt v2 ; bin: f3 48 0f b8 ce + ; asm: popcntq %r10, %rsi + [-,%rsi] v201 = popcnt v3 ; bin: f3 49 0f b8 f2 + ; asm: popcntq %rcx, %r10 + [-,%r10] v202 = popcnt v1 ; bin: f3 4c 0f b8 d1 + return ; bin: c3 } @@ -290,5 +299,14 @@ ebb0: ; asm: movl %ecx, %r10d [-,%r10] v112 = copy v1 ; bin: 41 89 ca + ; Bit-counting instructions. + + ; asm: popcntl %esi, %ecx + [-,%rcx] v200 = popcnt v2 ; bin: f3 40 0f b8 ce + ; asm: popcntl %r10d, %esi + [-,%rsi] v201 = popcnt v3 ; bin: f3 41 0f b8 f2 + ; asm: popcntl %ecx, %r10d + [-,%r10] v202 = popcnt v1 ; bin: f3 44 0f b8 d1 + return ; bin: c3 } diff --git a/cranelift/filetests/wasm/i32-arith.cton b/cranelift/filetests/wasm/i32-arith.cton index f2fafffee3..fc730cbe23 100644 --- a/cranelift/filetests/wasm/i32-arith.cton +++ b/cranelift/filetests/wasm/i32-arith.cton @@ -19,7 +19,12 @@ ebb0: ; function %i32_clz(i32) -> i32 ; function %i32_ctz(i32) -> i32 -; function %i32_popcnt(i32) -> i32 + +function %i32_popcnt(i32) -> i32 { +ebb0(v0: i32): + v1 = popcnt v0 + return v1 +} ; Binary operations. diff --git a/lib/cretonne/meta/base/instructions.py b/lib/cretonne/meta/base/instructions.py index e1b38086e4..2aa9027a45 100644 --- a/lib/cretonne/meta/base/instructions.py +++ b/lib/cretonne/meta/base/instructions.py @@ -8,7 +8,7 @@ from __future__ import absolute_import from cdsl.operands import Operand, VARIABLE_ARGS from cdsl.typevar import TypeVar from cdsl.instructions import Instruction, InstructionGroup -from base.types import i8, f32, f64, b1 +from base.types import f32, f64, b1 from base.immediates import imm64, uimm8, ieee32, ieee64, offset32, uoffset32 from base.immediates import intcc, floatcc, memflags, regunit from base import entities @@ -1050,7 +1050,7 @@ sshr_imm = Instruction( # x = Operand('x', iB) -a = Operand('a', i8) +a = Operand('a', iB) clz = Instruction( 'clz', r""" diff --git a/lib/cretonne/meta/isa/intel/encodings.py b/lib/cretonne/meta/isa/intel/encodings.py index df6631fe4c..d8dfe53140 100644 --- a/lib/cretonne/meta/isa/intel/encodings.py +++ b/lib/cretonne/meta/isa/intel/encodings.py @@ -22,10 +22,10 @@ for inst, opc in [ # default. Otherwise reg-alloc would never use r8 and up. I64.enc(inst.i32, *r.rr(opc)) -I32.enc(base.copy.i32, *r.ur(0x89)) -I64.enc(base.copy.i64, *r.ur.rex(0x89, w=1)) -I64.enc(base.copy.i32, *r.ur.rex(0x89)) -I64.enc(base.copy.i32, *r.ur(0x89)) +I32.enc(base.copy.i32, *r.umr(0x89)) +I64.enc(base.copy.i64, *r.umr.rex(0x89, w=1)) +I64.enc(base.copy.i32, *r.umr.rex(0x89)) +I64.enc(base.copy.i32, *r.umr(0x89)) I32.enc(base.regmove.i32, *r.rmov(0x89)) I64.enc(base.regmove.i64, *r.rmov.rex(0x89, w=1)) @@ -80,6 +80,12 @@ for inst, rrr in [ I64.enc(inst.i32.i32, *r.rc.rex(0xd3, rrr=rrr)) I64.enc(inst.i32.i32, *r.rc(0xd3, rrr=rrr)) +# Population count. +I32.enc(base.popcnt.i32, *r.urm(0xf3, 0x0f, 0xb8)) +I64.enc(base.popcnt.i64, *r.urm.rex(0xf3, 0x0f, 0xb8, w=1)) +I64.enc(base.popcnt.i32, *r.urm.rex(0xf3, 0x0f, 0xb8)) +I64.enc(base.popcnt.i32, *r.urm(0xf3, 0x0f, 0xb8)) + # Loads and stores. I32.enc(base.store.i32.i32, *r.st(0x89)) I32.enc(base.store.i32.i32, *r.stDisp8(0x89)) diff --git a/lib/cretonne/meta/isa/intel/recipes.py b/lib/cretonne/meta/isa/intel/recipes.py index c6c7b3f2af..b7e6aa5575 100644 --- a/lib/cretonne/meta/isa/intel/recipes.py +++ b/lib/cretonne/meta/isa/intel/recipes.py @@ -198,14 +198,23 @@ rr = TailRecipe( ''') # XX /r, but for a unary operator with separate input/output register, like -# copies. -ur = TailRecipe( - 'ur', Unary, size=1, ins=GPR, outs=GPR, +# copies. MR form. +umr = TailRecipe( + 'umr', Unary, size=1, ins=GPR, outs=GPR, emit=''' PUT_OP(bits, rex2(out_reg0, in_reg0), sink); modrm_rr(out_reg0, in_reg0, sink); ''') +# XX /r, but for a unary operator with separate input/output register. +# RM form. +urm = TailRecipe( + 'urm', Unary, size=1, ins=GPR, outs=GPR, + emit=''' + PUT_OP(bits, rex2(in_reg0, out_reg0), sink); + modrm_rr(in_reg0, out_reg0, sink); + ''') + # XX /r, for regmove instructions. rmov = TailRecipe( 'ur', RegMove, size=1, ins=GPR, outs=(), diff --git a/lib/cretonne/src/isa/intel/binemit.rs b/lib/cretonne/src/isa/intel/binemit.rs index 9db0ee1234..56f0c2f7f0 100644 --- a/lib/cretonne/src/isa/intel/binemit.rs +++ b/lib/cretonne/src/isa/intel/binemit.rs @@ -57,7 +57,7 @@ fn rex_prefix(bits: u16, rex: u8, sink: &mut CS) { // Emit a single-byte opcode with no REX prefix. fn put_op1(bits: u16, rex: u8, sink: &mut CS) { debug_assert_eq!(bits & 0x8f00, 0, "Invalid encoding bits for Op1*"); - debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less encoding"); + debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Op1 encoding"); sink.put1(bits as u8); } @@ -71,17 +71,37 @@ fn put_rexop1(bits: u16, rex: u8, sink: &mut CS) { // Emit two-byte opcode: 0F XX fn put_op2(bits: u16, rex: u8, sink: &mut CS) { debug_assert_eq!(bits & 0x8f00, 0x0400, "Invalid encoding bits for Op2*"); - debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less encoding"); + debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Op2 encoding"); sink.put1(0x0f); sink.put1(bits as u8); } // Emit single-byte opcode with mandatory prefix. fn put_mp1(bits: u16, rex: u8, sink: &mut CS) { - debug_assert_eq!(bits & 0x0c00, 0, "Invalid encoding bits for Mp1*"); + debug_assert_eq!(bits & 0x8c00, 0, "Invalid encoding bits for Mp1*"); let pp = (bits >> 8) & 3; sink.put1(PREFIX[(pp - 1) as usize]); - debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less encoding"); + debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp1 encoding"); + sink.put1(bits as u8); +} + +// Emit two-byte opcode (0F XX) with mandatory prefix. +fn put_mp2(bits: u16, rex: u8, sink: &mut CS) { + debug_assert_eq!(bits & 0x8c00, 0x0400, "Invalid encoding bits for Mp2*"); + let pp = (bits >> 8) & 3; + sink.put1(PREFIX[(pp - 1) as usize]); + debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp2 encoding"); + sink.put1(0x0f); + sink.put1(bits as u8); +} + +// Emit two-byte opcode (0F XX) with mandatory prefix and REX. +fn put_rexmp2(bits: u16, rex: u8, sink: &mut CS) { + debug_assert_eq!(bits & 0x0c00, 0x0400, "Invalid encoding bits for Mp2*"); + let pp = (bits >> 8) & 3; + sink.put1(PREFIX[(pp - 1) as usize]); + rex_prefix(bits, rex, sink); + sink.put1(0x0f); sink.put1(bits as u8); }