Merge pull request #2490 from cfallin/fix-popcnt-load-width
x64 lowering fix: i32.popcnt should not merge load and make it 64-bit.
This commit is contained in:
@@ -1530,7 +1530,10 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
let src = if let Some(ext_spec) = ext_spec {
|
||||
RegMem::reg(extend_input_to_reg(ctx, inputs[0], ext_spec))
|
||||
} else {
|
||||
input_to_reg_mem(ctx, inputs[0])
|
||||
// N.B.: explicitly put input in a reg here because the width of the instruction
|
||||
// into which this RM op goes may not match the width of the input type (in fact,
|
||||
// it won't for i32.popcnt), and we don't want a larger than necessary load.
|
||||
RegMem::reg(put_input_in_reg(ctx, inputs[0]))
|
||||
};
|
||||
let dst = get_output_reg(ctx, outputs[0]);
|
||||
|
||||
|
||||
113
cranelift/filetests/filetests/isa/x64/popcnt.clif
Normal file
113
cranelift/filetests/filetests/isa/x64/popcnt.clif
Normal file
@@ -0,0 +1,113 @@
|
||||
test compile
|
||||
target x86_64
|
||||
feature "experimental_x64"
|
||||
|
||||
; TODO: test with popcnt feature available too, once new backend supports that.
|
||||
|
||||
function %popcnt64(i64) -> i64 {
|
||||
block0(v0: i64):
|
||||
v1 = popcnt v0
|
||||
; check: movq %rdi, %rsi
|
||||
; nextln: shrq $$1, %rsi
|
||||
; nextln: movabsq $$8608480567731124087, %rax
|
||||
; nextln: andq %rax, %rsi
|
||||
; nextln: subq %rsi, %rdi
|
||||
; nextln: shrq $$1, %rsi
|
||||
; nextln: andq %rax, %rsi
|
||||
; nextln: subq %rsi, %rdi
|
||||
; nextln: shrq $$1, %rsi
|
||||
; nextln: andq %rax, %rsi
|
||||
; nextln: subq %rsi, %rdi
|
||||
; nextln: movq %rdi, %rsi
|
||||
; nextln: shrq $$4, %rsi
|
||||
; nextln: addq %rdi, %rsi
|
||||
; nextln: movabsq $$1085102592571150095, %rdi
|
||||
; nextln: andq %rdi, %rsi
|
||||
; nextln: movabsq $$72340172838076673, %rdi
|
||||
; nextln: imulq %rdi, %rsi
|
||||
; nextln: shrq $$56, %rsi
|
||||
; nextln: movq %rsi, %rax
|
||||
return v1
|
||||
}
|
||||
|
||||
function %popcnt64load(i64) -> i64 {
|
||||
block0(v0: i64):
|
||||
v1 = load.i64 v0
|
||||
v2 = popcnt v1
|
||||
return v2
|
||||
; check: movq 0(%rdi), %rdi
|
||||
; nextln: movq %rdi, %rsi
|
||||
; nextln: shrq $$1, %rsi
|
||||
; nextln: movabsq $$8608480567731124087, %rax
|
||||
; nextln: andq %rax, %rsi
|
||||
; nextln: subq %rsi, %rdi
|
||||
; nextln: shrq $$1, %rsi
|
||||
; nextln: andq %rax, %rsi
|
||||
; nextln: subq %rsi, %rdi
|
||||
; nextln: shrq $$1, %rsi
|
||||
; nextln: andq %rax, %rsi
|
||||
; nextln: subq %rsi, %rdi
|
||||
; nextln: movq %rdi, %rsi
|
||||
; nextln: shrq $$4, %rsi
|
||||
; nextln: addq %rdi, %rsi
|
||||
; nextln: movabsq $$1085102592571150095, %rdi
|
||||
; nextln: andq %rdi, %rsi
|
||||
; nextln: movabsq $$72340172838076673, %rdi
|
||||
; nextln: imulq %rdi, %rsi
|
||||
; nextln: shrq $$56, %rsi
|
||||
; nextln: movq %rsi, %rax
|
||||
}
|
||||
|
||||
function %popcnt32(i32) -> i32 {
|
||||
block0(v0: i32):
|
||||
v1 = popcnt v0
|
||||
return v1
|
||||
; check: movq %rdi, %rsi
|
||||
; nextln: shrl $$1, %esi
|
||||
; nextln: andl $$2004318071, %esi
|
||||
; nextln: subl %esi, %edi
|
||||
; nextln: shrl $$1, %esi
|
||||
; nextln: andl $$2004318071, %esi
|
||||
; nextln: subl %esi, %edi
|
||||
; nextln: shrl $$1, %esi
|
||||
; nextln: andl $$2004318071, %esi
|
||||
; nextln: subl %esi, %edi
|
||||
; nextln: movq %rdi, %rsi
|
||||
; nextln: shrl $$4, %esi
|
||||
; nextln: addl %edi, %esi
|
||||
; nextln: andl $$252645135, %esi
|
||||
; nextln: imull $$16843009, %esi
|
||||
; nextln: shrl $$24, %esi
|
||||
; nextln: movq %rsi, %rax
|
||||
; nextln: movq %rbp, %rsp
|
||||
; nextln: popq %rbp
|
||||
; nextln: ret
|
||||
}
|
||||
|
||||
function %popcnt32load(i64) -> i32 {
|
||||
block0(v0: i64):
|
||||
v1 = load.i32 v0
|
||||
v2 = popcnt v1
|
||||
return v2
|
||||
; check: movl 0(%rdi), %edi
|
||||
; nextln: movq %rdi, %rsi
|
||||
; nextln: shrl $$1, %esi
|
||||
; nextln: andl $$2004318071, %esi
|
||||
; nextln: subl %esi, %edi
|
||||
; nextln: shrl $$1, %esi
|
||||
; nextln: andl $$2004318071, %esi
|
||||
; nextln: subl %esi, %edi
|
||||
; nextln: shrl $$1, %esi
|
||||
; nextln: andl $$2004318071, %esi
|
||||
; nextln: subl %esi, %edi
|
||||
; nextln: movq %rdi, %rsi
|
||||
; nextln: shrl $$4, %esi
|
||||
; nextln: addl %edi, %esi
|
||||
; nextln: andl $$252645135, %esi
|
||||
; nextln: imull $$16843009, %esi
|
||||
; nextln: shrl $$24, %esi
|
||||
; nextln: movq %rsi, %rax
|
||||
; nextln: movq %rbp, %rsp
|
||||
; nextln: popq %rbp
|
||||
; nextln: ret
|
||||
}
|
||||
Reference in New Issue
Block a user