From 9dc92eb8b3ac3e033b6d08377dbe482426d37261 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Wed, 12 Jul 2017 16:28:33 -0700 Subject: [PATCH] Add Intel BMI1 ctz and clz encodings. --- cranelift/filetests/isa/intel/binary32.cton | 12 ++++++- cranelift/filetests/isa/intel/binary64.cton | 36 ++++++++++++++++++--- cranelift/filetests/wasm/i32-arith.cton | 17 +++++++--- lib/cretonne/meta/isa/intel/encodings.py | 14 ++++++++ 4 files changed, 70 insertions(+), 9 deletions(-) diff --git a/cranelift/filetests/isa/intel/binary32.cton b/cranelift/filetests/isa/intel/binary32.cton index 33df4794b3..bd3ff06289 100644 --- a/cranelift/filetests/isa/intel/binary32.cton +++ b/cranelift/filetests/isa/intel/binary32.cton @@ -1,6 +1,6 @@ ; binary emission of 32-bit code. test binemit -isa intel has_sse42 has_popcnt +isa intel haswell ; The binary encodings can be verified with the command: ; @@ -224,6 +224,16 @@ ebb0: ; asm: popcntl %ecx, %esi [-,%rsi] v201 = popcnt v1 ; bin: f3 0f b8 f1 + ; asm: lzcntl %esi, %ecx + [-,%rcx] v202 = clz v2 ; bin: f3 0f bd ce + ; asm: lzcntl %ecx, %esi + [-,%rsi] v203 = clz v1 ; bin: f3 0f bd f1 + + ; asm: tzcntl %esi, %ecx + [-,%rcx] v204 = ctz v2 ; bin: f3 0f bc ce + ; asm: tzcntl %ecx, %esi + [-,%rsi] v205 = ctz v1 ; bin: f3 0f bc f1 + ; asm: call foo call fn0() ; bin: e8 PCRel4(fn0) 00000000 diff --git a/cranelift/filetests/isa/intel/binary64.cton b/cranelift/filetests/isa/intel/binary64.cton index 908b42ac0e..ecb4b8a40d 100644 --- a/cranelift/filetests/isa/intel/binary64.cton +++ b/cranelift/filetests/isa/intel/binary64.cton @@ -1,7 +1,7 @@ ; binary emission of 64-bit code. test binemit set is_64bit -isa intel has_sse42 has_popcnt +isa intel haswell ; The binary encodings can be verified with the command: ; @@ -154,6 +154,20 @@ ebb0: ; asm: popcntq %rcx, %r10 [-,%r10] v202 = popcnt v1 ; bin: f3 4c 0f b8 d1 + ; asm: lzcntq %rsi, %rcx + [-,%rcx] v203 = clz v2 ; bin: f3 48 0f bd ce + ; asm: lzcntq %r10, %rsi + [-,%rsi] v204 = clz v3 ; bin: f3 49 0f bd f2 + ; asm: lzcntq %rcx, %r10 + [-,%r10] v205 = clz v1 ; bin: f3 4c 0f bd d1 + + ; asm: tzcntq %rsi, %rcx + [-,%rcx] v206 = ctz v2 ; bin: f3 48 0f bc ce + ; asm: tzcntq %r10, %rsi + [-,%rsi] v207 = ctz v3 ; bin: f3 49 0f bc f2 + ; asm: tzcntq %rcx, %r10 + [-,%r10] v208 = ctz v1 ; bin: f3 4c 0f bc d1 + return ; bin: c3 } @@ -302,11 +316,25 @@ ebb0: ; Bit-counting instructions. ; asm: popcntl %esi, %ecx - [-,%rcx] v200 = popcnt v2 ; bin: f3 40 0f b8 ce + [-,%rcx] v200 = popcnt v2 ; bin: f3 40 0f b8 ce ; asm: popcntl %r10d, %esi - [-,%rsi] v201 = popcnt v3 ; bin: f3 41 0f b8 f2 + [-,%rsi] v201 = popcnt v3 ; bin: f3 41 0f b8 f2 ; asm: popcntl %ecx, %r10d - [-,%r10] v202 = popcnt v1 ; bin: f3 44 0f b8 d1 + [-,%r10] v202 = popcnt v1 ; bin: f3 44 0f b8 d1 + + ; asm: lzcntl %esi, %ecx + [-,%rcx] v203 = clz v2 ; bin: f3 40 0f bd ce + ; asm: lzcntl %r10d, %esi + [-,%rsi] v204 = clz v3 ; bin: f3 41 0f bd f2 + ; asm: lzcntl %ecx, %r10d + [-,%r10] v205 = clz v1 ; bin: f3 44 0f bd d1 + + ; asm: tzcntl %esi, %ecx + [-,%rcx] v206 = ctz v2 ; bin: f3 40 0f bc ce + ; asm: tzcntl %r10d, %esi + [-,%rsi] v207 = ctz v3 ; bin: f3 41 0f bc f2 + ; asm: tzcntl %ecx, %r10d + [-,%r10] v208 = ctz v1 ; bin: f3 44 0f bc d1 return ; bin: c3 } diff --git a/cranelift/filetests/wasm/i32-arith.cton b/cranelift/filetests/wasm/i32-arith.cton index 2992dcf5b8..bddec1b52b 100644 --- a/cranelift/filetests/wasm/i32-arith.cton +++ b/cranelift/filetests/wasm/i32-arith.cton @@ -2,10 +2,10 @@ test compile set is_64bit=0 -isa intel has_sse42 has_popcnt +isa intel haswell set is_64bit=1 -isa intel has_sse42 has_popcnt +isa intel haswell ; Constants. @@ -17,8 +17,17 @@ ebb0: ; Unary operations. -; function %i32_clz(i32) -> i32 -; function %i32_ctz(i32) -> i32 +function %i32_clz(i32) -> i32 { +ebb0(v0: i32): + v1 = clz v0 + return v1 +} + +function %i32_ctz(i32) -> i32 { +ebb0(v0: i32): + v1 = ctz v0 + return v1 +} function %i32_popcnt(i32) -> i32 { ebb0(v0: i32): diff --git a/lib/cretonne/meta/isa/intel/encodings.py b/lib/cretonne/meta/isa/intel/encodings.py index dc54819a8b..c2c361ea86 100644 --- a/lib/cretonne/meta/isa/intel/encodings.py +++ b/lib/cretonne/meta/isa/intel/encodings.py @@ -88,6 +88,20 @@ I64.enc(base.popcnt.i64, *r.urm.rex(0xf3, 0x0f, 0xb8, w=1), I64.enc(base.popcnt.i32, *r.urm.rex(0xf3, 0x0f, 0xb8), isap=cfg.use_popcnt) I64.enc(base.popcnt.i32, *r.urm(0xf3, 0x0f, 0xb8), isap=cfg.use_popcnt) +# Count leading zero bits. +I32.enc(base.clz.i32, *r.urm(0xf3, 0x0f, 0xbd), isap=cfg.use_lzcnt) +I64.enc(base.clz.i64, *r.urm.rex(0xf3, 0x0f, 0xbd, w=1), + isap=cfg.use_lzcnt) +I64.enc(base.clz.i32, *r.urm.rex(0xf3, 0x0f, 0xbd), isap=cfg.use_lzcnt) +I64.enc(base.clz.i32, *r.urm(0xf3, 0x0f, 0xbd), isap=cfg.use_lzcnt) + +# Count trailing zero bits. +I32.enc(base.ctz.i32, *r.urm(0xf3, 0x0f, 0xbc), isap=cfg.use_bmi1) +I64.enc(base.ctz.i64, *r.urm.rex(0xf3, 0x0f, 0xbc, w=1), + isap=cfg.use_bmi1) +I64.enc(base.ctz.i32, *r.urm.rex(0xf3, 0x0f, 0xbc), isap=cfg.use_bmi1) +I64.enc(base.ctz.i32, *r.urm(0xf3, 0x0f, 0xbc), isap=cfg.use_bmi1) + # Loads and stores. I32.enc(base.store.i32.i32, *r.st(0x89)) I32.enc(base.store.i32.i32, *r.stDisp8(0x89))