aarch64 isel: collect_address_addends: correctly handle ExtendOp::UXTW(negative immediate).

The current code doesn't correctly handle the case where `ExtendOp::UXTW` has as source, a constant-producing insn that produces a negative (32-bit) value. Then the value is incorrectly sign-extended to 64 bits (in fact, this has already been done by `ctx.get_constant(insn)`), whereas it needs to be zero extended. The obvious fix, done here, is just to force bits 63:32 of the extension to zero, hence zero-extending it.
2020-12-04 18:29:39 +01:00
parent d07fffeb41
commit 8f34d2dc59
2 changed files with 67 additions and 1 deletions
--- a/cranelift/filetests/filetests/isa/aarch64/amodes.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/amodes.clif
@@ -344,3 +344,69 @@ block0(v0: i64, v1: i32):
 ; nextln: mov sp, fp
 ; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
+
+function %f18(i64, i64, i64) -> i32 {
+block0(v0: i64, v1: i64, v2: i64):
+  v3 = iconst.i32 -4098
+  v6 = uextend.i64 v3
+  v5 = sload16.i32 v6+0
+  return v5
+}
+
+; check: stp fp, lr, [sp, #-16]!
+; nextln: mov fp, sp
+; nextln: movn w0, #4097
+; nextln: ldrsh x0, [x0]
+; nextln: mov sp, fp
+; nextln: ldp fp, lr, [sp], #16
+; nextln: ret
+
+function %f19(i64, i64, i64) -> i32 {
+block0(v0: i64, v1: i64, v2: i64):
+  v3 = iconst.i32 4098
+  v6 = uextend.i64 v3
+  v5 = sload16.i32 v6+0
+  return v5
+}
+
+; check: stp fp, lr, [sp, #-16]!
+; nextln: mov fp, sp
+; nextln: movz x0, #4098
+; nextln: ldrsh x0, [x0]
+; nextln: mov sp, fp
+; nextln: ldp fp, lr, [sp], #16
+; nextln: ret
+
+function %f20(i64, i64, i64) -> i32 {
+block0(v0: i64, v1: i64, v2: i64):
+  v3 = iconst.i32 -4098
+  v6 = sextend.i64 v3
+  v5 = sload16.i32 v6+0
+  return v5
+}
+
+; check: stp fp, lr, [sp, #-16]!
+; nextln: mov fp, sp
+; nextln: movn w0, #4097
+; nextln: sxtw x0, w0
+; nextln: ldrsh x0, [x0]
+; nextln: mov sp, fp
+; nextln: ldp fp, lr, [sp], #16
+; nextln: ret
+
+function %f21(i64, i64, i64) -> i32 {
+block0(v0: i64, v1: i64, v2: i64):
+  v3 = iconst.i32 4098
+  v6 = sextend.i64 v3
+  v5 = sload16.i32 v6+0
+  return v5
+}
+
+; check: stp fp, lr, [sp, #-16]!
+; nextln: mov fp, sp
+; nextln: movz x0, #4098
+; nextln: sxtw x0, w0
+; nextln: ldrsh x0, [x0]
+; nextln: mov sp, fp
+; nextln: ldp fp, lr, [sp], #16
+; nextln: ret