aarch64: Add basic i128 bit ops to the AArch64 backend
Currently we just basically use a two instruction version of the same i64 ops. IMMLogic doesn't really support multiple register inputs, so its left as a TODO for future optimizations.
This commit is contained in:
@@ -661,14 +661,31 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
}
|
||||
|
||||
Opcode::Bnot => {
|
||||
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
let out_regs = get_output_reg(ctx, outputs[0]);
|
||||
let ty = ty.unwrap();
|
||||
if !ty.is_vector() {
|
||||
if ty == I128 {
|
||||
// TODO: We can merge this block with the one below once we support immlogic here
|
||||
let in_regs = put_input_in_regs(ctx, inputs[0]);
|
||||
ctx.emit(Inst::AluRRR {
|
||||
alu_op: ALUOp::OrrNot64,
|
||||
rd: out_regs.regs()[0],
|
||||
rn: zero_reg(),
|
||||
rm: in_regs.regs()[0],
|
||||
});
|
||||
ctx.emit(Inst::AluRRR {
|
||||
alu_op: ALUOp::OrrNot64,
|
||||
rd: out_regs.regs()[1],
|
||||
rn: zero_reg(),
|
||||
rm: in_regs.regs()[1],
|
||||
});
|
||||
} else if !ty.is_vector() {
|
||||
let rd = out_regs.only_reg().unwrap();
|
||||
let rm = put_input_in_rs_immlogic(ctx, inputs[0], NarrowValueMode::None);
|
||||
let alu_op = choose_32_64(ty, ALUOp::OrrNot32, ALUOp::OrrNot64);
|
||||
// NOT rd, rm ==> ORR_NOT rd, zero, rm
|
||||
ctx.emit(alu_inst_immlogic(alu_op, rd, zero_reg(), rm));
|
||||
} else {
|
||||
let rd = out_regs.only_reg().unwrap();
|
||||
let rm = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
ctx.emit(Inst::VecMisc {
|
||||
op: VecMisc2::Not,
|
||||
@@ -685,9 +702,36 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
| Opcode::BandNot
|
||||
| Opcode::BorNot
|
||||
| Opcode::BxorNot => {
|
||||
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
let out_regs = get_output_reg(ctx, outputs[0]);
|
||||
let ty = ty.unwrap();
|
||||
if !ty.is_vector() {
|
||||
if ty == I128 {
|
||||
// TODO: Support immlogic here
|
||||
let lhs = put_input_in_regs(ctx, inputs[0]);
|
||||
let rhs = put_input_in_regs(ctx, inputs[1]);
|
||||
let alu_op = match op {
|
||||
Opcode::Band => ALUOp::And64,
|
||||
Opcode::Bor => ALUOp::Orr64,
|
||||
Opcode::Bxor => ALUOp::Eor64,
|
||||
Opcode::BandNot => ALUOp::AndNot64,
|
||||
Opcode::BorNot => ALUOp::OrrNot64,
|
||||
Opcode::BxorNot => ALUOp::EorNot64,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
ctx.emit(Inst::AluRRR {
|
||||
alu_op,
|
||||
rd: out_regs.regs()[0],
|
||||
rn: lhs.regs()[0],
|
||||
rm: rhs.regs()[0],
|
||||
});
|
||||
ctx.emit(Inst::AluRRR {
|
||||
alu_op,
|
||||
rd: out_regs.regs()[1],
|
||||
rn: lhs.regs()[1],
|
||||
rm: rhs.regs()[1],
|
||||
});
|
||||
} else if !ty.is_vector() {
|
||||
let rd = out_regs.only_reg().unwrap();
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rm = put_input_in_rs_immlogic(ctx, inputs[1], NarrowValueMode::None);
|
||||
let alu_op = match op {
|
||||
@@ -711,7 +755,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||
let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
let rd = out_regs.only_reg().unwrap();
|
||||
|
||||
ctx.emit(Inst::VecRRR {
|
||||
alu_op,
|
||||
|
||||
@@ -293,3 +293,94 @@ block0:
|
||||
; nextln: sbfx w0, w0, #0, #1
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %bnot_i128(i128) -> i128 {
|
||||
block0(v0: i128):
|
||||
v1 = bnot v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: orn x0, xzr, x0
|
||||
; nextln: orn x1, xzr, x1
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %band_i128(i128, i128) -> i128 {
|
||||
block0(v0: i128, v1: i128):
|
||||
v2 = band v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: and x0, x0, x2
|
||||
; nextln: and x1, x1, x3
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %bor_i128(i128, i128) -> i128 {
|
||||
block0(v0: i128, v1: i128):
|
||||
v2 = bor v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: orr x0, x0, x2
|
||||
; nextln: orr x1, x1, x3
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %bxor_i128(i128, i128) -> i128 {
|
||||
block0(v0: i128, v1: i128):
|
||||
v2 = bxor v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: eor x0, x0, x2
|
||||
; nextln: eor x1, x1, x3
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %band_not_i128(i128, i128) -> i128 {
|
||||
block0(v0: i128, v1: i128):
|
||||
v2 = band_not v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: bic x0, x0, x2
|
||||
; nextln: bic x1, x1, x3
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %bor_not_i128(i128, i128) -> i128 {
|
||||
block0(v0: i128, v1: i128):
|
||||
v2 = bor_not v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: orn x0, x0, x2
|
||||
; nextln: orn x1, x1, x3
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
|
||||
function %bxor_not_i128(i128, i128) -> i128 {
|
||||
block0(v0: i128, v1: i128):
|
||||
v2 = bxor_not v0, v1
|
||||
return v2
|
||||
}
|
||||
|
||||
; check: stp fp, lr, [sp, #-16]!
|
||||
; nextln: mov fp, sp
|
||||
; nextln: eon x0, x0, x2
|
||||
; nextln: eon x1, x1, x3
|
||||
; nextln: ldp fp, lr, [sp], #16
|
||||
; nextln: ret
|
||||
135
cranelift/filetests/filetests/runtests/i128-bitops.clif
Normal file
135
cranelift/filetests/filetests/runtests/i128-bitops.clif
Normal file
@@ -0,0 +1,135 @@
|
||||
test run
|
||||
target aarch64
|
||||
; target s390x TODO: Not yet implemented on s390x
|
||||
; target x86_64 TODO: Not yet implemented on x86_64
|
||||
|
||||
; i128 tests
|
||||
; TODO: Cleanup these tests when we have native support for i128 immediates in CLIF's parser
|
||||
function %bnot_i128(i64, i64) -> i64, i64 {
|
||||
block0(v0: i64,v1: i64):
|
||||
v2 = iconcat v0, v1
|
||||
|
||||
v3 = bnot v2
|
||||
|
||||
v4, v5 = isplit v3
|
||||
return v4, v5
|
||||
}
|
||||
; run: %bnot_i128(0, 0) == [-1, -1]
|
||||
; run: %bnot_i128(-1, -1) == [0, 0]
|
||||
; run: %bnot_i128(-1, 0) == [0, -1]
|
||||
|
||||
; run: %bnot_i128(0x3F001111_3F001111, 0x21350000_21350000) == [0xC0FFEEEE_C0FFEEEE, 0xDECAFFFF_DECAFFFF]
|
||||
|
||||
|
||||
function %band_i128(i64, i64, i64, i64) -> i64, i64 {
|
||||
block0(v0: i64,v1: i64,v2: i64,v3: i64):
|
||||
v4 = iconcat v0, v1
|
||||
v5 = iconcat v2, v3
|
||||
|
||||
v6 = band v4, v5
|
||||
|
||||
v7, v8 = isplit v6
|
||||
return v7, v8
|
||||
}
|
||||
; run: %band_i128(0, 0, 0, 0) == [0, 0]
|
||||
; run: %band_i128(-1, -1, 0, 0) == [0, 0]
|
||||
; run: %band_i128(-1, -1, -1, -1) == [-1, -1]
|
||||
; run: %band_i128(-1, -1, 0, -1) == [0, -1]
|
||||
|
||||
; run: %band_i128(0x01234567_89ABCDEF, 0xFEDCBA98_76543210, 0xFEDCBA98_76543210, 0x01234567_89ABCDEF) == [0, 0]
|
||||
; run: %band_i128(0xF1FFFEFE_F1FFFEFE, 0xFEEEFFFF_FEEEFFFF, 0xCEFFEFEF_CEFFEFEF, 0xDFDBFFFF_DFDBFFFF) == [0xC0FFEEEE_C0FFEEEE, 0xDECAFFFF_DECAFFFF]
|
||||
|
||||
|
||||
function %bor_i128(i64, i64, i64, i64) -> i64, i64 {
|
||||
block0(v0: i64,v1: i64,v2: i64,v3: i64):
|
||||
v4 = iconcat v0, v1
|
||||
v5 = iconcat v2, v3
|
||||
|
||||
v6 = bor v4, v5
|
||||
|
||||
v7, v8 = isplit v6
|
||||
return v7, v8
|
||||
}
|
||||
; run: %bor_i128(0, 0, 0, 0) == [0, 0]
|
||||
; run: %bor_i128(-1, -1, 0, 0) == [-1, -1]
|
||||
; run: %bor_i128(-1, -1, -1, -1) == [-1, -1]
|
||||
; run: %bor_i128(0, 0, 0, -1) == [0, -1]
|
||||
|
||||
; run: %bor_i128(0x01234567_89ABCDEF, 0xFEDCBA98_76543210, 0xFEDCBA98_76543210, 0x01234567_89ABCDEF) == [-1, -1]
|
||||
; run: %bor_i128(0x80AAAAAA_80AAAAAA, 0x8A8AAAAA_8A8AAAAA, 0x40554444_40554444, 0x54405555_54405555) == [0xC0FFEEEE_C0FFEEEE, 0xDECAFFFF_DECAFFFF]
|
||||
|
||||
|
||||
function %bxor_i128(i64, i64, i64, i64) -> i64, i64 {
|
||||
block0(v0: i64,v1: i64,v2: i64,v3: i64):
|
||||
v4 = iconcat v0, v1
|
||||
v5 = iconcat v2, v3
|
||||
|
||||
v6 = bxor v4, v5
|
||||
|
||||
v7, v8 = isplit v6
|
||||
return v7, v8
|
||||
}
|
||||
; run: %bxor_i128(0, 0, 0, 0) == [0, 0]
|
||||
; run: %bxor_i128(-1, -1, 0, 0) == [-1, -1]
|
||||
; run: %bxor_i128(-1, -1, -1, -1) == [0, 0]
|
||||
; run: %bxor_i128(-1, -1, 0, -1) == [-1, 0]
|
||||
|
||||
; run: %bxor_i128(0x01234567_89ABCDEF, 0xFEDCBA98_76543210, 0xFEDCBA98_76543210, 0x01234567_89ABCDEF) == [-1, -1]
|
||||
; run: %bxor_i128(0x8FA50A64_8FA50A64, 0x9440A07D_9440A07D, 0x4F5AE48A_4F5AE48A, 0x4A8A5F82_4A8A5F82) == [0xC0FFEEEE_C0FFEEEE, 0xDECAFFFF_DECAFFFF]
|
||||
|
||||
|
||||
function %band_not_i128(i64, i64, i64, i64) -> i64, i64 {
|
||||
block0(v0: i64,v1: i64,v2: i64,v3: i64):
|
||||
v4 = iconcat v0, v1
|
||||
v5 = iconcat v2, v3
|
||||
|
||||
v6 = band_not v4, v5
|
||||
|
||||
v7, v8 = isplit v6
|
||||
return v7, v8
|
||||
}
|
||||
; run: %band_not_i128(0, 0, 0, 0) == [0, 0]
|
||||
; run: %band_not_i128(-1, -1, 0, 0) == [-1, -1]
|
||||
; run: %band_not_i128(-1, -1, -1, -1) == [0, 0]
|
||||
; run: %band_not_i128(-1, -1, 0, -1) == [-1, 0]
|
||||
|
||||
; run: %band_not_i128(0x01234567_89ABCDEF, 0xFEDCBA98_76543210, 0xFEDCBA98_76543210, 0x01234567_89ABCDEF) == [0x01234567_89ABCDEF, 0xFEDCBA98_76543210]
|
||||
; run: %band_not_i128(0xF1FFFEFE_F1FFFEFE, 0xFEEEFFFF_FEEEFFFF, 0x31001010_31001010, 0x20240000_20240000) == [0xC0FFEEEE_C0FFEEEE, 0xDECAFFFF_DECAFFFF]
|
||||
|
||||
|
||||
function %bor_not_i128(i64, i64, i64, i64) -> i64, i64 {
|
||||
block0(v0: i64,v1: i64,v2: i64,v3: i64):
|
||||
v4 = iconcat v0, v1
|
||||
v5 = iconcat v2, v3
|
||||
|
||||
v6 = bor_not v4, v5
|
||||
|
||||
v7, v8 = isplit v6
|
||||
return v7, v8
|
||||
}
|
||||
; run: %bor_not_i128(0, 0, 0, 0) == [-1, -1]
|
||||
; run: %bor_not_i128(-1, -1, 0, 0) == [-1, -1]
|
||||
; run: %bor_not_i128(-1, -1, -1, -1) == [-1, -1]
|
||||
; run: %bor_not_i128(-1, 0, 0, -1) == [-1, 0]
|
||||
|
||||
; run: %bor_not_i128(0x01234567_89ABCDEF, 0xFEDCBA98_76543210, 0xFEDCBA98_76543210, 0x01234567_89ABCDEF) == [0x01234567_89ABCDEF, 0xFEDCBA98_76543210]
|
||||
; run: %bor_not_i128(0x80AAAAAA_80AAAAAA, 0x8A8AAAAA_8A8AAAAA, 0xBFAABBBB_BFAABBBB, 0xABBFAAAA_ABBFAAAA) == [0xC0FFEEEE_C0FFEEEE, 0xDECAFFFF_DECAFFFF]
|
||||
|
||||
|
||||
function %bxor_not_i128(i64, i64, i64, i64) -> i64, i64 {
|
||||
block0(v0: i64,v1: i64,v2: i64,v3: i64):
|
||||
v4 = iconcat v0, v1
|
||||
v5 = iconcat v2, v3
|
||||
|
||||
v6 = bxor_not v4, v5
|
||||
|
||||
v7, v8 = isplit v6
|
||||
return v7, v8
|
||||
}
|
||||
; run: %bxor_not_i128(0, 0, 0, 0) == [-1, -1]
|
||||
; run: %bxor_not_i128(-1, -1, 0, 0) == [0, 0]
|
||||
; run: %bxor_not_i128(-1, -1, -1, -1) == [-1, -1]
|
||||
; run: %bxor_not_i128(-1, -1, 0, -1) == [0, -1]
|
||||
|
||||
; run: %bxor_not_i128(0x01234567_89ABCDEF, 0xFEDCBA98_76543210, 0xFEDCBA98_76543210, 0x01234567_89ABCDEF) == [0, 0]
|
||||
; run: %bxor_not_i128(0x8FA50A64_8FA50A64, 0x9440A07D_9440A07D, 0xB0A51B75_B0A51B75, 0xB575A07D_B575A07D) == [0xC0FFEEEE_C0FFEEEE, 0xDECAFFFF_DECAFFFF]
|
||||
Reference in New Issue
Block a user