diff --git a/cranelift/filetests/isa/intel/baseline_clz_ctz_popcount.cton b/cranelift/filetests/isa/intel/baseline_clz_ctz_popcount.cton index 62a793de60..b87815aa12 100644 --- a/cranelift/filetests/isa/intel/baseline_clz_ctz_popcount.cton +++ b/cranelift/filetests/isa/intel/baseline_clz_ctz_popcount.cton @@ -47,28 +47,23 @@ ebb1(v20: i32): function %i64_popcount(i64) -> i64 { ebb0(v30: i64): v31 = popcnt v30; - ; check: iconst.i32 - ; check: ushr + ; check: ushr_imm ; check: iconst.i64 ; check: band ; check: isub - ; check: iconst.i32 - ; check: ushr + ; check: ushr_imm ; check: band ; check: isub - ; check: iconst.i32 - ; check: ushr + ; check: ushr_imm ; check: band ; check: isub - ; check: iconst.i32 - ; check: ushr + ; check: ushr_imm ; check: iadd ; check: iconst.i64 ; check: band ; check: iconst.i64 ; check: imul - ; check: iconst.i32 - ; check: ushr + ; check: ushr_imm return v31; } @@ -78,27 +73,22 @@ ebb0(v30: i64): function %i32_popcount(i32) -> i32 { ebb0(v40: i32): v41 = popcnt v40; - ; check: iconst.i32 - ; check: ushr + ; check: ushr_imm ; check: iconst.i32 ; check: band ; check: isub - ; check: iconst.i32 - ; check: ushr + ; check: ushr_imm ; check: band ; check: isub - ; check: iconst.i32 - ; check: ushr + ; check: ushr_imm ; check: band ; check: isub - ; check: iconst.i32 - ; check: ushr + ; check: ushr_imm ; check: iadd ; check: iconst.i32 ; check: band ; check: iconst.i32 ; check: imul - ; check: iconst.i32 - ; check: ushr + ; check: ushr_imm return v41; } diff --git a/cranelift/filetests/isa/intel/binary32.cton b/cranelift/filetests/isa/intel/binary32.cton index df98a86515..56301607cc 100644 --- a/cranelift/filetests/isa/intel/binary32.cton +++ b/cranelift/filetests/isa/intel/binary32.cton @@ -403,6 +403,13 @@ ebb0: ; asm: addl $-2147483648, %esp adjust_sp_imm -2147483648 ; bin: 81 c4 80000000 + ; Shift immediates + ; asm: shll $2, %esi + [-,%rsi] v513 = ishl_imm v2, 2 ; bin: c1 e6 02 + ; asm: sarl $5, %esi + [-,%rsi] v514 = sshr_imm v2, 5 ; bin: c1 fe 05 + ; asm: shrl $8, %esi + [-,%rsi] v515 = ushr_imm v2, 8 ; bin: c1 ee 08 ; asm: testl %ecx, %ecx ; asm: je ebb1 diff --git a/cranelift/filetests/isa/intel/binary64.cton b/cranelift/filetests/isa/intel/binary64.cton index 9eca950ee6..5741f18041 100644 --- a/cranelift/filetests/isa/intel/binary64.cton +++ b/cranelift/filetests/isa/intel/binary64.cton @@ -529,6 +529,21 @@ ebb0: ; asm: addq $-2147483648, %rsp adjust_sp_imm -2147483648 ; bin: 48 81 c4 80000000 + ; Shift immediates + ; asm: shlq $12, %rsi + [-,%rsi] v515 = ishl_imm v2, 12 ; bin: 48 c1 e6 0c + ; asm: shlq $13, %r8 + [-,%r8] v516 = ishl_imm v4, 13 ; bin: 49 c1 e0 0d + ; asm: sarq $32, %rsi + [-,%rsi] v517 = sshr_imm v2, 32 ; bin: 48 c1 fe 20 + ; asm: sarq $33, %r8 + [-,%r8] v518 = sshr_imm v4, 33 ; bin: 49 c1 f8 21 + ; asm: shrl $62, %rsi + [-,%rsi] v519 = ushr_imm v2, 62 ; bin: 48 c1 ee 3e + ; asm: shrl $63, %r8 + [-,%r8] v520 = ushr_imm v4, 63 ; bin: 49 c1 e8 3f + + ; asm: testq %rcx, %rcx ; asm: je ebb1 brz v1, ebb1 ; bin: 48 85 c9 74 1b @@ -653,6 +668,7 @@ ebb1: ; asm: cmpq $10000, %r10 [-,%eflags] v525 = ifcmp_imm v2, 10000 ; bin: 49 81 fa 00002710 + return } @@ -1053,6 +1069,19 @@ ebb0: ; asm: cmpl $10000, %r10d [-,%eflags] v525 = ifcmp_imm v3, 10000 ; bin: 41 81 fa 00002710 + ; asm: shll $2, %esi + [-,%rsi] v526 = ishl_imm v2, 2 ; bin: c1 e6 02 + ; asm: shll $12, %r10d + [-,%r10] v527 = ishl_imm v3, 12 ; bin: 41 c1 e2 0c + ; asm: sarl $5, %esi + [-,%rsi] v529 = sshr_imm v2, 5 ; bin: c1 fe 05 + ; asm: sarl $32, %r10d + [-,%r10] v530 = sshr_imm v3, 32 ; bin: 41 c1 fa 20 + ; asm: shrl $8, %esi + [-,%rsi] v532 = ushr_imm v2, 8 ; bin: c1 ee 08 + ; asm: shrl $31, %r10d + [-,%r10] v533 = ushr_imm v3, 31 ; bin: 41 c1 ea 1f + ; asm: testl %ecx, %ecx ; asm: je ebb1x brz v1, ebb1 ; bin: 85 c9 74 18 @@ -1082,6 +1111,7 @@ ebb1: ; asm: ebb2x: ebb2: jump ebb1 ; bin: eb fd + } ; Tests for i32/i8 conversion instructions. diff --git a/cranelift/filetests/isa/intel/legalize-div-traps.cton b/cranelift/filetests/isa/intel/legalize-div-traps.cton index fa070fba1a..3869e66325 100644 --- a/cranelift/filetests/isa/intel/legalize-div-traps.cton +++ b/cranelift/filetests/isa/intel/legalize-div-traps.cton @@ -40,7 +40,7 @@ ebb0(v0: i64, v1: i64): ; nextln: brif eq $fm1, $(m1=$EBB) ; nextln: $(fz=$V) = ifcmp_imm v1, 0 ; nextln: trapif eq $fz, int_divz - ; check: $(hi=$V) = sshr + ; check: $(hi=$V) = sshr_imm ; nextln: $(q=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1 ; nextln: jump $(done=$EBB)($q) ; check: $m1: @@ -60,7 +60,7 @@ ebb0(v0: i64, v1: i64): v2 = srem v0, v1 ; nextln: $(fm1=$V) = ifcmp_imm v1, -1 ; nextln: brif eq $fm1, $(m1=$EBB) - ; check: $(hi=$V) = sshr + ; check: $(hi=$V) = sshr_imm ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1 ; nextln: jump $(done=$EBB)($r) ; check: $m1: diff --git a/cranelift/filetests/isa/intel/legalize-div.cton b/cranelift/filetests/isa/intel/legalize-div.cton index ec3f8ec5d3..d6179b2611 100644 --- a/cranelift/filetests/isa/intel/legalize-div.cton +++ b/cranelift/filetests/isa/intel/legalize-div.cton @@ -32,7 +32,7 @@ function %sdiv(i64, i64) -> i64 { ebb0(v0: i64, v1: i64): ; check: ebb0( v2 = sdiv v0, v1 - ; check: $(hi=$V) = sshr + ; check: $(hi=$V) = sshr_imm ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1 return v2 ; nextln: return $d @@ -46,7 +46,7 @@ ebb0(v0: i64, v1: i64): v2 = srem v0, v1 ; nextln: $(fm1=$V) = ifcmp_imm v1, -1 ; nextln: brif eq $fm1, $(m1=$EBB) - ; check: $(hi=$V) = sshr + ; check: $(hi=$V) = sshr_imm ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1 ; nextln: jump $(done=$EBB)($r) ; check: $m1: diff --git a/lib/cretonne/meta/isa/intel/encodings.py b/lib/cretonne/meta/isa/intel/encodings.py index 489e61be9f..afb544cb9a 100644 --- a/lib/cretonne/meta/isa/intel/encodings.py +++ b/lib/cretonne/meta/isa/intel/encodings.py @@ -164,11 +164,19 @@ for inst, rrr in [ (base.ishl, 4), (base.ushr, 5), (base.sshr, 7)]: + # Cannot use enc_i32_i64 for this pattern because instructions require + # .any suffix. X86_32.enc(inst.i32.any, *r.rc(0xd3, rrr=rrr)) X86_64.enc(inst.i64.any, *r.rc.rex(0xd3, rrr=rrr, w=1)) X86_64.enc(inst.i32.any, *r.rc.rex(0xd3, rrr=rrr)) X86_64.enc(inst.i32.any, *r.rc(0xd3, rrr=rrr)) +for inst, rrr in [ + (base.ishl_imm, 4), + (base.ushr_imm, 5), + (base.sshr_imm, 7)]: + enc_i32_i64(inst, r.rib, 0xc1, rrr=rrr) + # Population count. X86_32.enc(base.popcnt.i32, *r.urm(0xf3, 0x0f, 0xb8), isap=cfg.use_popcnt) X86_64.enc(base.popcnt.i64, *r.urm.rex(0xf3, 0x0f, 0xb8, w=1),