diff --git a/cranelift/codegen/src/isa/aarch64/inst.isle b/cranelift/codegen/src/isa/aarch64/inst.isle index 60aa7fc45c..67efbc6dd2 100644 --- a/cranelift/codegen/src/isa/aarch64/inst.isle +++ b/cranelift/codegen/src/isa/aarch64/inst.isle @@ -280,6 +280,9 @@ ;; ish". This instruction is sequentially consistent. (Fence) + ;; Consumption of speculative data barrier. + (Csdb) + ;; FPU move. Note that this is distinct from a vector-register ;; move; moving just 64 bits seems to be significantly faster. (FpuMove64 diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index d7c1c8949c..b504e675be 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -1656,6 +1656,9 @@ impl MachInstEmit for Inst { &Inst::Fence {} => { sink.put4(enc_dmb_ish()); // dmb ish } + &Inst::Csdb {} => { + sink.put4(0xd503229f); + } &Inst::FpuMove64 { rd, rn } => { let rd = allocs.next_writable(rd); let rn = allocs.next(rn); @@ -2910,6 +2913,8 @@ impl MachInstEmit for Inst { rm: ridx, }; inst.emit(&[], sink, emit_info, state); + // Prevent any data value speculation. + Inst::Csdb.emit(&[], sink, emit_info, state); // Load address of jump table let inst = Inst::Adr { rd: rtmp1, off: 16 }; diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index 4217c13810..95971c52c3 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -40,6 +40,7 @@ fn test_aarch64_binemit() { insns.push((Inst::Ret { rets: vec![] }, "C0035FD6", "ret")); insns.push((Inst::Nop0, "", "nop-zero-len")); insns.push((Inst::Nop4, "1F2003D5", "nop")); + insns.push((Inst::Csdb, "9F2203D5", "csdb")); insns.push(( Inst::Udf { use_allocated_encoding: false, diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 05c51459ce..74a094d9f5 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -703,7 +703,7 @@ fn aarch64_get_operands VReg>(inst: &Inst, collector: &mut Operan collector.reg_use(rn); collector.reg_use(rt); } - &Inst::Fence {} => {} + &Inst::Fence {} | &Inst::Csdb {} => {} &Inst::FpuMove64 { rd, rn } => { collector.reg_def(rd); collector.reg_use(rn); @@ -1679,6 +1679,9 @@ impl Inst { &Inst::Fence {} => { format!("dmb ish") } + &Inst::Csdb {} => { + format!("csdb") + } &Inst::FpuMove64 { rd, rn } => { let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64, allocs); let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size64, allocs); @@ -2545,6 +2548,8 @@ impl Inst { format!( concat!( "b.hs {} ; ", + "csel {}, xzr, {}, hs ; ", + "csdb ; ", "adr {}, pc+16 ; ", "ldrsw {}, [{}, {}, LSL 2] ; ", "add {}, {}, {} ; ", @@ -2552,10 +2557,12 @@ impl Inst { "jt_entries {:?}" ), default_target, + rtmp2, + ridx, rtmp1, rtmp2, rtmp1, - ridx, + rtmp2, rtmp1, rtmp1, rtmp2, diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 1272197002..3570286c04 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -375,6 +375,10 @@ pub(crate) fn lower_insn_to_regs>( op, ty ))); } + + if op == Opcode::SelectifSpectreGuard { + ctx.emit(Inst::Csdb); + } } Opcode::Bitselect | Opcode::Vselect => implemented_in_isle(ctx), diff --git a/cranelift/codegen/src/isa/aarch64/mod.rs b/cranelift/codegen/src/isa/aarch64/mod.rs index 9c225a0bb8..702b7d2977 100644 --- a/cranelift/codegen/src/isa/aarch64/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/mod.rs @@ -358,25 +358,26 @@ mod test { let code = result.buffer.data(); // 0: 7100081f cmp w0, #0x2 - // 4: 54000102 b.cs 0x24 // b.hs, b.nlast + // 4: 54000122 b.cs 0x28 // b.hs, b.nlast // 8: 9a8023e9 csel x9, xzr, x0, cs // cs = hs, nlast - // c: 10000088 adr x8, 0x1c - // 10: b8a95909 ldrsw x9, [x8, w9, uxtw #2] - // 14: 8b090108 add x8, x8, x9 - // 18: d61f0100 br x8 - // 1c: 00000010 udf #16 - // 20: 00000018 udf #24 - // 24: d2800060 mov x0, #0x3 // #3 - // 28: d65f03c0 ret - // 2c: d2800020 mov x0, #0x1 // #1 - // 30: d65f03c0 ret - // 34: d2800040 mov x0, #0x2 // #2 - // 38: d65f03c0 ret + // c: d503229f csdb + // 10: 10000088 adr x8, 0x1c + // 14: b8a95909 ldrsw x9, [x8, w9, uxtw #2] + // 18: 8b090108 add x8, x8, x9 + // 1c: d61f0100 br x8 + // 20: 00000010 udf #16 + // 24: 00000018 udf #24 + // 28: d2800060 mov x0, #0x3 // #3 + // 2c: d65f03c0 ret + // 30: d2800020 mov x0, #0x1 // #1 + // 34: d65f03c0 ret + // 38: d2800040 mov x0, #0x2 // #2 + // 3c: d65f03c0 ret let golden = vec![ - 31, 8, 0, 113, 2, 1, 0, 84, 233, 35, 128, 154, 136, 0, 0, 16, 9, 89, 169, 184, 8, 1, 9, - 139, 0, 1, 31, 214, 16, 0, 0, 0, 24, 0, 0, 0, 96, 0, 128, 210, 192, 3, 95, 214, 32, 0, - 128, 210, 192, 3, 95, 214, 64, 0, 128, 210, 192, 3, 95, 214, + 31, 8, 0, 113, 34, 1, 0, 84, 233, 35, 128, 154, 159, 34, 3, 213, 136, 0, 0, 16, 9, 89, + 169, 184, 8, 1, 9, 139, 0, 1, 31, 214, 16, 0, 0, 0, 24, 0, 0, 0, 96, 0, 128, 210, 192, + 3, 95, 214, 32, 0, 128, 210, 192, 3, 95, 214, 64, 0, 128, 210, 192, 3, 95, 214, ]; assert_eq!(code, &golden[..]); diff --git a/cranelift/filetests/filetests/isa/aarch64/heap_addr.clif b/cranelift/filetests/filetests/isa/aarch64/heap_addr.clif index 6fd8d2fab7..c8056c3d9e 100644 --- a/cranelift/filetests/filetests/isa/aarch64/heap_addr.clif +++ b/cranelift/filetests/filetests/isa/aarch64/heap_addr.clif @@ -24,6 +24,7 @@ block0(v0: i64, v1: i32): ; subs xzr, x10, x11 ; movz x14, #0 ; csel x0, x14, x13, hi +; csdb ; ret ; block2: ; udf #0xc11f @@ -46,7 +47,7 @@ block0(v0: i64, v1: i32): ; subs xzr, x8, #65536 ; movz x11, #0 ; csel x0, x11, x10, hi +; csdb ; ret ; block2: ; udf #0xc11f - diff --git a/cranelift/filetests/filetests/isa/aarch64/jumptable.clif b/cranelift/filetests/filetests/isa/aarch64/jumptable.clif index b60795cccd..efd0697d82 100644 --- a/cranelift/filetests/filetests/isa/aarch64/jumptable.clif +++ b/cranelift/filetests/filetests/isa/aarch64/jumptable.clif @@ -32,7 +32,7 @@ block5(v5: i32): ; block0: ; emit_island 36 ; subs wzr, w0, #3 -; b.hs label1 ; adr x15, pc+16 ; ldrsw x1, [x15, x0, LSL 2] ; add x15, x15, x1 ; br x15 ; jt_entries [Label(MachLabel(3)), Label(MachLabel(5)), Label(MachLabel(7))] +; b.hs label1 ; csel x1, xzr, x0, hs ; csdb ; adr x15, pc+16 ; ldrsw x1, [x15, x1, LSL 2] ; add x15, x15, x1 ; br x15 ; jt_entries [Label(MachLabel(3)), Label(MachLabel(5)), Label(MachLabel(7))] ; block1: ; movz x5, #4 ; b label2