cranelift: Mask high bits on bmask for types smaller than a register (#5118)
* aarch64: Fix incorrect masking for small types on bmask
`bmask` was accidentally relying on the uppermost bits of the register
for small types.
This was found by fuzzgen, when it generated a shift left followed by
a bmask, the shift left shifted the bits out of the range of the input
type (i8), however these are not automatically cleared since they
remained inside the 32 bits of the register.
That caused issues when the bmask tried to compare the whole register
instead of just the bottom bits. The solution here is to mask the upper
bits for small types.
* aarch64: Emit 32bit cmp on bmask
This fixes an issue where bmask was accidentally comparing the
upper bits of the register by always using a 64bit cmp.
* riscv: Mask high bits in bmask
* riscv: Add compile tests for br{z,nz}
* riscv: Use shifts to mask 32bit values
This produces less code than the AND since that version needs to
load an immediate constant from memory.
* cranelift: Update test input to hexadecimal values
This makes it a bit more clear what is being tested.
* riscv: Use addiw for masking 32 bit values
Co-authored-by: Trevor Elliott <telliott@fastly.com>
* aarch64: Update bmask rule priority
Co-authored-by: Trevor Elliott <telliott@fastly.com>
This commit is contained in:
@@ -81,7 +81,7 @@ block0(v0: i32):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; subs xzr, x0, #0
|
||||
; subs wzr, w0, #0
|
||||
; csetm x1, ne
|
||||
; mov x0, x1
|
||||
; ret
|
||||
@@ -93,7 +93,8 @@ block0(v0: i16):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; subs xzr, x0, #0
|
||||
; and w4, w0, #65535
|
||||
; subs wzr, w4, #0
|
||||
; csetm x1, ne
|
||||
; mov x0, x1
|
||||
; ret
|
||||
@@ -105,7 +106,8 @@ block0(v0: i8):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; subs xzr, x0, #0
|
||||
; and w4, w0, #255
|
||||
; subs wzr, w4, #0
|
||||
; csetm x1, ne
|
||||
; mov x0, x1
|
||||
; ret
|
||||
|
||||
@@ -399,3 +399,167 @@ block1:
|
||||
; block3:
|
||||
; ret
|
||||
|
||||
|
||||
|
||||
function %i8_brz(i8){
|
||||
block0(v0: i8):
|
||||
brz v0, block1
|
||||
jump block1
|
||||
|
||||
block1:
|
||||
nop
|
||||
return
|
||||
}
|
||||
|
||||
; block0:
|
||||
; andi t2,a0,255
|
||||
; beq t2,zero,taken(label1),not_taken(label2)
|
||||
; block1:
|
||||
; j label3
|
||||
; block2:
|
||||
; j label3
|
||||
; block3:
|
||||
; ret
|
||||
|
||||
function %i8_brnz(i8){
|
||||
block0(v0: i8):
|
||||
brnz v0, block1
|
||||
jump block1
|
||||
|
||||
block1:
|
||||
nop
|
||||
return
|
||||
}
|
||||
|
||||
; block0:
|
||||
; andi t2,a0,255
|
||||
; bne t2,zero,taken(label1),not_taken(label2)
|
||||
; block1:
|
||||
; j label3
|
||||
; block2:
|
||||
; j label3
|
||||
; block3:
|
||||
; ret
|
||||
|
||||
function %i16_brz(i16){
|
||||
block0(v0: i16):
|
||||
brz v0, block1
|
||||
jump block1
|
||||
|
||||
block1:
|
||||
nop
|
||||
return
|
||||
}
|
||||
|
||||
; block0:
|
||||
; lui t2,16
|
||||
; addi t2,t2,4095
|
||||
; and a2,a0,t2
|
||||
; beq a2,zero,taken(label1),not_taken(label2)
|
||||
; block1:
|
||||
; j label3
|
||||
; block2:
|
||||
; j label3
|
||||
; block3:
|
||||
; ret
|
||||
|
||||
function %i16_brnz(i16){
|
||||
block0(v0: i16):
|
||||
brnz v0, block1
|
||||
jump block1
|
||||
|
||||
block1:
|
||||
nop
|
||||
return
|
||||
}
|
||||
|
||||
; block0:
|
||||
; lui t2,16
|
||||
; addi t2,t2,4095
|
||||
; and a2,a0,t2
|
||||
; bne a2,zero,taken(label1),not_taken(label2)
|
||||
; block1:
|
||||
; j label3
|
||||
; block2:
|
||||
; j label3
|
||||
; block3:
|
||||
; ret
|
||||
|
||||
function %i32_brz(i32){
|
||||
block0(v0: i32):
|
||||
brz v0, block1
|
||||
jump block1
|
||||
|
||||
block1:
|
||||
nop
|
||||
return
|
||||
}
|
||||
|
||||
; block0:
|
||||
; addiw t2,a0,0
|
||||
; beq t2,zero,taken(label1),not_taken(label2)
|
||||
; block1:
|
||||
; j label3
|
||||
; block2:
|
||||
; j label3
|
||||
; block3:
|
||||
; ret
|
||||
|
||||
function %i32_brnz(i32){
|
||||
block0(v0: i32):
|
||||
brnz v0, block1
|
||||
jump block1
|
||||
|
||||
block1:
|
||||
nop
|
||||
return
|
||||
}
|
||||
|
||||
; block0:
|
||||
; addiw t2,a0,0
|
||||
; bne t2,zero,taken(label1),not_taken(label2)
|
||||
; block1:
|
||||
; j label3
|
||||
; block2:
|
||||
; j label3
|
||||
; block3:
|
||||
; ret
|
||||
|
||||
function %i64_brz(i64){
|
||||
block0(v0: i64):
|
||||
brz v0, block1
|
||||
jump block1
|
||||
|
||||
block1:
|
||||
nop
|
||||
return
|
||||
}
|
||||
|
||||
; block0:
|
||||
; beq a0,zero,taken(label1),not_taken(label2)
|
||||
; block1:
|
||||
; j label3
|
||||
; block2:
|
||||
; j label3
|
||||
; block3:
|
||||
; ret
|
||||
|
||||
function %i64_brnz(i64){
|
||||
block0(v0: i64):
|
||||
brnz v0, block1
|
||||
jump block1
|
||||
|
||||
block1:
|
||||
nop
|
||||
return
|
||||
}
|
||||
|
||||
; block0:
|
||||
; bne a0,zero,taken(label1),not_taken(label2)
|
||||
; block1:
|
||||
; j label3
|
||||
; block2:
|
||||
; j label3
|
||||
; block3:
|
||||
; ret
|
||||
|
||||
|
||||
@@ -82,8 +82,9 @@ block0(v0: i32):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; li a1,-1
|
||||
; select_reg a1,zero,a1##condition=(zero eq a0)
|
||||
; addiw a1,a0,0
|
||||
; li a3,-1
|
||||
; select_reg a1,zero,a3##condition=(zero eq a1)
|
||||
; mv a0,a1
|
||||
; ret
|
||||
|
||||
@@ -94,8 +95,11 @@ block0(v0: i16):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; li a1,-1
|
||||
; select_reg a1,zero,a1##condition=(zero eq a0)
|
||||
; lui a1,16
|
||||
; addi a1,a1,4095
|
||||
; and a4,a0,a1
|
||||
; li a6,-1
|
||||
; select_reg a1,zero,a6##condition=(zero eq a4)
|
||||
; mv a0,a1
|
||||
; ret
|
||||
|
||||
@@ -106,8 +110,9 @@ block0(v0: i8):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; li a1,-1
|
||||
; select_reg a1,zero,a1##condition=(zero eq a0)
|
||||
; andi a1,a0,255
|
||||
; li a3,-1
|
||||
; select_reg a1,zero,a3##condition=(zero eq a1)
|
||||
; mv a0,a1
|
||||
; ret
|
||||
|
||||
|
||||
@@ -131,3 +131,27 @@ block0(v0: i8):
|
||||
}
|
||||
; run: %bmask_i8_i8(1) == -1
|
||||
; run: %bmask_i8_i8(0) == 0
|
||||
|
||||
|
||||
; This is a regression test for AArch64, where the high bits weren't
|
||||
; correctly being masked off for smaller types
|
||||
function %bmask_masks_small_types() -> i8 {
|
||||
block0:
|
||||
v0 = iconst.i8 120
|
||||
v1 = iconst.i8 7
|
||||
v2 = ishl.i8 v0, v1
|
||||
v3 = bmask.i8 v2
|
||||
return v3
|
||||
}
|
||||
; run: %bmask_masks_small_types() == 0
|
||||
|
||||
; Similar to the above, this issue happened due to us always using a 64 bit
|
||||
; comparison, even on a 32 bit type. This is triggered by ireduce since it
|
||||
; doesn't actually produce any instructions, but is just a "type cast".
|
||||
function %bmask_uses_32bit_cmp(i64) -> i8 {
|
||||
block0(v0: i64):
|
||||
v1 = ireduce.i32 v0
|
||||
v2 = bmask.i8 v1
|
||||
return v2
|
||||
}
|
||||
; run: %bmask_uses_32bit_cmp(0x2520B6E9_00000000) == 0
|
||||
|
||||
Reference in New Issue
Block a user