Files
wasmtime/cranelift/filetests/filetests/isa/riscv64/bitops.clif
Alex Crichton de0e0bea3f Legalize b{and,or,xor}_not into component instructions (#5709)
* Remove trailing whitespace in `lower.isle` files

* Legalize the `band_not` instruction into simpler form

This commit legalizes the `band_not` instruction into `band`-of-`bnot`,
or two instructions. This is intended to assist with egraph-based
optimizations where the `band_not` instruction doesn't have to be
specifically included in other bit-operation-patterns.

Lowerings of the `band_not` instruction have been moved to a
specialization of the `band` instruction.

* Legalize `bor_not` into components

Same as prior commit, but for the `bor_not` instruction.

* Legalize bxor_not into bxor-of-bnot

Same as prior commits. I think this also ended up fixing a bug in the
s390x backend where `bxor_not x y` was actually translated as `bnot
(bxor x y)` by accident given the test update changes.

* Simplify not-fused operands for riscv64

Looks like some delegated-to rules have special-cases for "if this
feature is enabled use the fused instruction" so move the clause for
testing the feature up to the lowering phase to help trigger other rules
if the feature isn't enabled. This should make the riscv64 backend more
consistent with how other backends are implemented.

* Remove B{and,or,xor}Not from cost of egraph metrics

These shouldn't ever reach egraphs now that they're legalized away.

* Add an egraph optimization for `x^-1 => ~x`

This adds a simplification node to translate xor-against-minus-1 to a
`bnot` instruction. This helps trigger various other optimizations in
the egraph implementation and also various backend lowering rules for
instructions. This is chiefly useful as wasm doesn't have a `bnot`
equivalent, so it's encoded as `x^-1`.

* Add a wasm test for end-to-end bitwise lowerings

Test that end-to-end various optimizations are being applied for input
wasm modules.

* Specifically don't self-update rustup on CI

I forget why this was here originally, but this is failing on Windows
CI. In general there's no need to update rustup, so leave it as-is.

* Cleanup some aarch64 lowering rules

Previously a 32/64 split was necessary due to the `ALUOp` being different
but that's been refactored away no so there's no longer any need for
duplicate rules.

* Narrow a x64 lowering rule

This previously made more sense when it was `band_not` and rarely used,
but be more specific in the type-filter on this rule that it's only
applicable to SIMD types with lanes.

* Simplify xor-against-minus-1 rule

No need to have the commutative version since constants are already
shuffled right for egraphs

* Optimize band-of-bnot when bnot is on the left

Use some more rules in the egraph algebraic optimizations to
canonicalize band/bor/bxor with a `bnot` operand to put the operand on
the right. That way the lowerings in the backends only have to list the
rule once, with the operand on the right, to optimize both styles of
input.

* Add commutative lowering rules

* Update cranelift/codegen/src/isa/x64/lower.isle

Co-authored-by: Jamey Sharp <jamey@minilop.net>

---------

Co-authored-by: Jamey Sharp <jamey@minilop.net>
2023-02-06 13:53:40 -06:00

923 lines
14 KiB
Plaintext

test compile precise-output
set unwind_info=false
target riscv64
function %a(i8) -> i8 {
block0(v0: i8):
v1 = bitrev v0
return v1
}
; block0:
; mv a3,a0
; brev8 a0,a3##tmp=t2 tmp2=a2 step=a1 ty=i8
; ret
function %a(i16) -> i16 {
block0(v0: i16):
v1 = bitrev v0
return v1
}
; block0:
; mv a7,a0
; brev8 a2,a7##tmp=t2 tmp2=a0 step=a1 ty=i16
; rev8 a4,a2##step=a6 tmp=a5
; srli a0,a4,48
; ret
function %a(i32) -> i32 {
block0(v0: i32):
v1 = bitrev v0
return v1
}
; block0:
; mv a7,a0
; brev8 a2,a7##tmp=t2 tmp2=a0 step=a1 ty=i32
; rev8 a4,a2##step=a6 tmp=a5
; srli a0,a4,32
; ret
function %a(i64) -> i64 {
block0(v0: i64):
v1 = bitrev v0
return v1
}
; block0:
; mv a6,a0
; rev8 t2,a6##step=a1 tmp=a0
; brev8 a0,t2##tmp=a3 tmp2=a4 step=a5 ty=i64
; ret
function %a(i128) -> i128 {
block0(v0: i128):
v1 = bitrev v0
return v1
}
; block0:
; mv a3,a0
; mv a7,a1
; rev8 a0,a3##step=a2 tmp=a1
; brev8 a1,a0##tmp=a4 tmp2=a5 step=a6 ty=i64
; mv a3,a7
; rev8 t4,a3##step=t1 tmp=t0
; brev8 a0,t4##tmp=a4 tmp2=a3 step=a2 ty=i64
; ret
function %b(i8) -> i8 {
block0(v0: i8):
v1 = clz v0
return v1
}
; block0:
; mv a2,a0
; clz a0,a2##ty=i8 tmp=t2 step=a1
; ret
function %b(i16) -> i16 {
block0(v0: i16):
v1 = clz v0
return v1
}
; block0:
; mv a2,a0
; clz a0,a2##ty=i16 tmp=t2 step=a1
; ret
function %b(i32) -> i32 {
block0(v0: i32):
v1 = clz v0
return v1
}
; block0:
; mv a2,a0
; clz a0,a2##ty=i32 tmp=t2 step=a1
; ret
function %b(i64) -> i64 {
block0(v0: i64):
v1 = clz v0
return v1
}
; block0:
; mv a2,a0
; clz a0,a2##ty=i64 tmp=t2 step=a1
; ret
function %b(i128) -> i128 {
block0(v0: i128):
v1 = clz v0
return v1
}
; block0:
; mv t0,a1
; clz a2,t0##ty=i64 tmp=a3 step=a1
; clz a6,a0##ty=i64 tmp=a4 step=a5
; li t3,64
; select_reg t0,a6,zero##condition=(t3 eq a2)
; add a0,a2,t0
; li a1,0
; ret
function %c(i8) -> i8 {
block0(v0: i8):
v1 = cls v0
return v1
}
; block0:
; sext.b t2,a0
; not a1,a0
; select_reg a3,a1,a0##condition=(t2 slt zero)
; clz a7,a3##ty=i8 tmp=a5 step=a6
; addi a0,a7,-1
; ret
function %c(i16) -> i16 {
block0(v0: i16):
v1 = cls v0
return v1
}
; block0:
; sext.h t2,a0
; not a1,a0
; select_reg a3,a1,a0##condition=(t2 slt zero)
; clz a7,a3##ty=i16 tmp=a5 step=a6
; addi a0,a7,-1
; ret
function %c(i32) -> i32 {
block0(v0: i32):
v1 = cls v0
return v1
}
; block0:
; sext.w t2,a0
; not a1,a0
; select_reg a3,a1,a0##condition=(t2 slt zero)
; clz a7,a3##ty=i32 tmp=a5 step=a6
; addi a0,a7,-1
; ret
function %c(i64) -> i64 {
block0(v0: i64):
v1 = cls v0
return v1
}
; block0:
; not t2,a0
; select_reg a1,t2,a0##condition=(a0 slt zero)
; clz a5,a1##ty=i64 tmp=a3 step=a4
; addi a0,a5,-1
; ret
function %c(i128) -> i128 {
block0(v0: i128):
v1 = cls v0
return v1
}
; block0:
; not a2,a0
; select_reg a2,a2,a0##condition=(a1 slt zero)
; not a4,a1
; select_reg a6,a4,a1##condition=(a1 slt zero)
; clz t0,a6##ty=i64 tmp=t3 step=t4
; clz a1,a2##ty=i64 tmp=t2 step=a0
; li a3,64
; select_reg a5,a1,zero##condition=(a3 eq t0)
; add a7,t0,a5
; li t4,0
; addi a0,a7,-1
; li a1,0
; ret
function %d(i8) -> i8 {
block0(v0: i8):
v1 = ctz v0
return v1
}
; block0:
; mv a2,a0
; ctz a0,a2##ty=i8 tmp=t2 step=a1
; ret
function %d(i16) -> i16 {
block0(v0: i16):
v1 = ctz v0
return v1
}
; block0:
; mv a2,a0
; ctz a0,a2##ty=i16 tmp=t2 step=a1
; ret
function %d(i32) -> i32 {
block0(v0: i32):
v1 = ctz v0
return v1
}
; block0:
; mv a2,a0
; ctz a0,a2##ty=i32 tmp=t2 step=a1
; ret
function %d(i64) -> i64 {
block0(v0: i64):
v1 = ctz v0
return v1
}
; block0:
; mv a2,a0
; ctz a0,a2##ty=i64 tmp=t2 step=a1
; ret
function %d(i128) -> i128 {
block0(v0: i128):
v1 = ctz v0
return v1
}
; block0:
; mv t0,a0
; ctz a2,t0##ty=i64 tmp=a0 step=a3
; ctz a6,a1##ty=i64 tmp=a4 step=a5
; li t3,64
; select_reg t0,a6,zero##condition=(t3 eq a2)
; add a0,a2,t0
; li a1,0
; ret
function %d(i128) -> i128 {
block0(v0: i128):
v1 = popcnt v0
return v1
}
; block0:
; mv t3,a0
; popcnt a2,t3##ty=i64 tmp=a0 step=a3
; popcnt a6,a1##ty=i64 tmp=a4 step=a5
; add a0,a2,a6
; li a1,0
; ret
function %d(i64) -> i64 {
block0(v0: i64):
v1 = popcnt v0
return v1
}
; block0:
; mv a2,a0
; popcnt a0,a2##ty=i64 tmp=t2 step=a1
; ret
function %d(i32) -> i32 {
block0(v0: i32):
v1 = popcnt v0
return v1
}
; block0:
; mv a2,a0
; popcnt a0,a2##ty=i32 tmp=t2 step=a1
; ret
function %d(i16) -> i16 {
block0(v0: i16):
v1 = popcnt v0
return v1
}
; block0:
; mv a2,a0
; popcnt a0,a2##ty=i16 tmp=t2 step=a1
; ret
function %d(i8) -> i8 {
block0(v0: i8):
v1 = popcnt v0
return v1
}
; block0:
; mv a2,a0
; popcnt a0,a2##ty=i8 tmp=t2 step=a1
; ret
function %bnot_i32(i32) -> i32 {
block0(v0: i32):
v1 = bnot v0
return v1
}
; block0:
; not a0,a0
; ret
function %bnot_i64(i64) -> i64 {
block0(v0: i64):
v1 = bnot v0
return v1
}
; block0:
; not a0,a0
; ret
function %bnot_i64_with_shift(i64) -> i64 {
block0(v0: i64):
v1 = iconst.i64 3
v2 = ishl.i64 v0, v1
v3 = bnot v2
return v3
}
; block0:
; slli a0,a0,3
; not a0,a0
; ret
function %bnot_i128(i128) -> i128 {
block0(v0: i128):
v1 = bnot v0
return v1
}
; block0:
; not a0,a0
; not a1,a1
; ret
function %band_i32(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = band v0, v1
return v2
}
; block0:
; and a0,a0,a1
; ret
function %band_i64(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = band v0, v1
return v2
}
; block0:
; and a0,a0,a1
; ret
function %band_i128(i128, i128) -> i128 {
block0(v0: i128, v1: i128):
v2 = band v0, v1
return v2
}
; block0:
; and a0,a0,a2
; and a1,a1,a3
; ret
function %band_i64_constant(i64) -> i64 {
block0(v0: i64):
v1 = iconst.i64 3
v2 = band v0, v1
return v2
}
; block0:
; andi a0,a0,3
; ret
function %band_i64_constant2(i64) -> i64 {
block0(v0: i64):
v1 = iconst.i64 3
v2 = band v1, v0
return v2
}
; block0:
; andi a0,a0,3
; ret
function %band_i64_constant_shift(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = iconst.i64 3
v3 = ishl.i64 v1, v2
v4 = band v0, v3
return v4
}
; block0:
; slli a1,a1,3
; and a0,a0,a1
; ret
function %band_i64_constant_shift2(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = iconst.i64 3
v3 = ishl.i64 v1, v2
v4 = band v3, v0
return v4
}
; block0:
; slli a1,a1,3
; and a0,a1,a0
; ret
function %bor_i32(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = bor v0, v1
return v2
}
; block0:
; or a0,a0,a1
; ret
function %bor_i64(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = bor v0, v1
return v2
}
; block0:
; or a0,a0,a1
; ret
function %bor_i128(i128, i128) -> i128 {
block0(v0: i128, v1: i128):
v2 = bor v0, v1
return v2
}
; block0:
; or a0,a0,a2
; or a1,a1,a3
; ret
function %bor_i64_constant(i64) -> i64 {
block0(v0: i64):
v1 = iconst.i64 3
v2 = bor v0, v1
return v2
}
; block0:
; ori a0,a0,3
; ret
function %bor_i64_constant2(i64) -> i64 {
block0(v0: i64):
v1 = iconst.i64 3
v2 = bor v1, v0
return v2
}
; block0:
; ori a0,a0,3
; ret
function %bor_i64_constant_shift(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = iconst.i64 3
v3 = ishl.i64 v1, v2
v4 = bor v0, v3
return v4
}
; block0:
; slli a1,a1,3
; or a0,a0,a1
; ret
function %bor_i64_constant_shift2(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = iconst.i64 3
v3 = ishl.i64 v1, v2
v4 = bor v3, v0
return v4
}
; block0:
; slli a1,a1,3
; or a0,a1,a0
; ret
function %bxor_i32(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = bxor v0, v1
return v2
}
; block0:
; xor a0,a0,a1
; ret
function %bxor_i64(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = bxor v0, v1
return v2
}
; block0:
; xor a0,a0,a1
; ret
function %bxor_i128(i128, i128) -> i128 {
block0(v0: i128, v1: i128):
v2 = bxor v0, v1
return v2
}
; block0:
; xor a0,a0,a2
; xor a1,a1,a3
; ret
function %bxor_i64_constant(i64) -> i64 {
block0(v0: i64):
v1 = iconst.i64 3
v2 = bxor v0, v1
return v2
}
; block0:
; xori a0,a0,3
; ret
function %bxor_i64_constant2(i64) -> i64 {
block0(v0: i64):
v1 = iconst.i64 3
v2 = bxor v1, v0
return v2
}
; block0:
; xori a0,a0,3
; ret
function %bxor_i64_constant_shift(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = iconst.i64 3
v3 = ishl.i64 v1, v2
v4 = bxor v0, v3
return v4
}
; block0:
; slli a1,a1,3
; xor a0,a0,a1
; ret
function %bxor_i64_constant_shift2(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = iconst.i64 3
v3 = ishl.i64 v1, v2
v4 = bxor v3, v0
return v4
}
; block0:
; slli a1,a1,3
; xor a0,a1,a0
; ret
function %band_not_i32(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = band_not v0, v1
return v2
}
; block0:
; not a1,a1
; and a0,a0,a1
; ret
function %band_not_i64(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = band_not v0, v1
return v2
}
; block0:
; not a1,a1
; and a0,a0,a1
; ret
function %band_not_i128(i128, i128) -> i128 {
block0(v0: i128, v1: i128):
v2 = band_not v0, v1
return v2
}
; block0:
; not a4,a2
; not a6,a3
; and a0,a0,a4
; and a1,a1,a6
; ret
function %band_not_i64_constant(i64) -> i64 {
block0(v0: i64):
v1 = iconst.i64 4
v2 = band_not v0, v1
return v2
}
; block0:
; li a1,4
; not a2,a1
; and a0,a0,a2
; ret
function %band_not_i64_constant_shift(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = iconst.i64 4
v3 = ishl.i64 v1, v2
v4 = band_not v0, v3
return v4
}
; block0:
; slli a2,a1,4
; not a2,a2
; and a0,a0,a2
; ret
function %bor_not_i32(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = bor_not v0, v1
return v2
}
; block0:
; not a1,a1
; or a0,a0,a1
; ret
function %bor_not_i64(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = bor_not v0, v1
return v2
}
; block0:
; not a1,a1
; or a0,a0,a1
; ret
function %bor_not_i128(i128, i128) -> i128 {
block0(v0: i128, v1: i128):
v2 = bor_not v0, v1
return v2
}
; block0:
; not a4,a2
; not a6,a3
; or a0,a0,a4
; or a1,a1,a6
; ret
function %bor_not_i64_constant(i64) -> i64 {
block0(v0: i64):
v1 = iconst.i64 4
v2 = bor_not v0, v1
return v2
}
; block0:
; li a1,4
; not a2,a1
; or a0,a0,a2
; ret
function %bor_not_i64_constant_shift(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = iconst.i64 4
v3 = ishl.i64 v1, v2
v4 = bor_not v0, v3
return v4
}
; block0:
; slli a2,a1,4
; not a2,a2
; or a0,a0,a2
; ret
function %bxor_not_i32(i32, i32) -> i32 {
block0(v0: i32, v1: i32):
v2 = bxor_not v0, v1
return v2
}
; block0:
; not a1,a1
; xor a0,a0,a1
; ret
function %bxor_not_i64(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = bxor_not v0, v1
return v2
}
; block0:
; not a1,a1
; xor a0,a0,a1
; ret
function %bxor_not_i128(i128, i128) -> i128 {
block0(v0: i128, v1: i128):
v2 = bxor_not v0, v1
return v2
}
; block0:
; not a4,a2
; not a6,a3
; xor a0,a0,a4
; xor a1,a1,a6
; ret
function %bxor_not_i64_constant(i64) -> i64 {
block0(v0: i64):
v1 = iconst.i64 4
v2 = bxor_not v0, v1
return v2
}
; block0:
; li a1,4
; not a2,a1
; xor a0,a0,a2
; ret
function %bxor_not_i64_constant_shift(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = iconst.i64 4
v3 = ishl.i64 v1, v2
v4 = bxor_not v0, v3
return v4
}
; block0:
; slli a2,a1,4
; not a2,a2
; xor a0,a0,a2
; ret
function %ishl_i128_i8(i128, i8) -> i128 {
block0(v0: i128, v1: i8):
v2 = ishl.i128 v0, v1
return v2
}
; block0:
; andi a3,a2,63
; li a4,64
; sub a5,a4,a3
; sll a7,a0,a3
; srl t4,a0,a5
; select_reg t1,zero,t4##condition=(a3 eq zero)
; sll a0,a1,a3
; or a3,t1,a0
; li a4,64
; andi a6,a2,127
; select_reg a0,zero,a7##condition=(a6 uge a4)
; select_reg a1,a7,a3##condition=(a6 uge a4)
; ret
function %ishl_i128_i128(i128, i128) -> i128 {
block0(v0: i128, v1: i128):
v2 = ishl.i128 v0, v1
return v2
}
; block0:
; andi a3,a2,63
; li a4,64
; sub a6,a4,a3
; sll t3,a0,a3
; srl t0,a0,a6
; select_reg t2,zero,t0##condition=(a3 eq zero)
; sll a1,a1,a3
; or a3,t2,a1
; li a5,64
; andi a7,a2,127
; select_reg a0,zero,t3##condition=(a7 uge a5)
; select_reg a1,t3,a3##condition=(a7 uge a5)
; ret
function %ushr_i128_i8(i128, i8) -> i128 {
block0(v0: i128, v1: i8):
v2 = ushr.i128 v0, v1
return v2
}
; block0:
; andi a3,a2,63
; li a4,64
; sub a5,a4,a3
; sll a7,a1,a5
; select_reg t4,zero,a7##condition=(a3 eq zero)
; srl t1,a0,a3
; or a0,t4,t1
; li a4,64
; srl a5,a1,a3
; andi a6,a2,127
; select_reg a0,a5,a0##condition=(a6 uge a4)
; select_reg a1,zero,a5##condition=(a6 uge a4)
; ret
function %ushr_i128_i128(i128, i128) -> i128 {
block0(v0: i128, v1: i128):
v2 = ushr.i128 v0, v1
return v2
}
; block0:
; andi a3,a2,63
; li a4,64
; sub a6,a4,a3
; sll t3,a1,a6
; select_reg t0,zero,t3##condition=(a3 eq zero)
; srl t2,a0,a3
; or a5,t0,t2
; li a4,64
; srl a6,a1,a3
; andi a7,a2,127
; select_reg a0,a6,a5##condition=(a7 uge a4)
; select_reg a1,zero,a6##condition=(a7 uge a4)
; ret
function %sshr_i128_i8(i128, i8) -> i128 {
block0(v0: i128, v1: i8):
v2 = sshr.i128 v0, v1
return v2
}
; block0:
; andi a3,a2,63
; li a4,64
; sub a5,a4,a3
; sll a7,a1,a5
; select_reg t4,zero,a7##condition=(a3 eq zero)
; srl t1,a0,a3
; or a0,t4,t1
; li a4,64
; sra a4,a1,a3
; li a6,-1
; select_reg t3,a6,zero##condition=(a1 slt zero)
; li t0,64
; andi t2,a2,127
; select_reg a0,a4,a0##condition=(t2 uge t0)
; select_reg a1,t3,a4##condition=(t2 uge t0)
; ret
function %sshr_i128_i128(i128, i128) -> i128 {
block0(v0: i128, v1: i128):
v2 = sshr.i128 v0, v1
return v2
}
; block0:
; andi a3,a2,63
; li a4,64
; sub a6,a4,a3
; sll t3,a1,a6
; select_reg t0,zero,t3##condition=(a3 eq zero)
; srl t2,a0,a3
; or a4,t0,t2
; li a5,64
; sra a5,a1,a3
; li a7,-1
; select_reg t4,a7,zero##condition=(a1 slt zero)
; li t1,64
; andi a1,a2,127
; select_reg a0,a5,a4##condition=(a1 uge t1)
; select_reg a1,t4,a5##condition=(a1 uge t1)
; ret