diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs index d4ae442085..303b1bfaeb 100644 --- a/cranelift/codegen/meta/src/isa/x86/encodings.rs +++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs @@ -1395,6 +1395,7 @@ fn define_alu( let rotr = shared.by_name("rotr"); let rotr_imm = shared.by_name("rotr_imm"); let selectif = shared.by_name("selectif"); + let selectif_spectre_guard = shared.by_name("selectif_spectre_guard"); let sshr = shared.by_name("sshr"); let sshr_imm = shared.by_name("sshr_imm"); let trueff = shared.by_name("trueff"); @@ -1608,6 +1609,11 @@ fn define_alu( // Conditional move (a.k.a integer select). e.enc_i32_i64(selectif, rec_cmov.opcodes(&CMOV_OVERFLOW)); + // A Spectre-guard integer select is exactly the same as a selectif, but + // is not associated with any other legalization rules and is not + // recognized by any optimizations, so it must arrive here unmodified + // and in its original place. + e.enc_i32_i64(selectif_spectre_guard, rec_cmov.opcodes(&CMOV_OVERFLOW)); } #[inline(never)] diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs index b191ae554c..fb91ae0ae9 100644 --- a/cranelift/codegen/meta/src/shared/instructions.rs +++ b/cranelift/codegen/meta/src/shared/instructions.rs @@ -1748,6 +1748,34 @@ pub(crate) fn define( .operands_out(vec![a]), ); + ig.push( + Inst::new( + "selectif_spectre_guard", + r#" + Conditional select intended for Spectre guards. + + This operation is semantically equivalent to a selectif instruction. + However, it is guaranteed to not be removed or otherwise altered by any + optimization pass, and is guaranteed to result in a conditional-move + instruction, not a branch-based lowering. As such, it is suitable + for use when producing Spectre guards. For example, a bounds-check + may guard against unsafe speculation past a bounds-check conditional + branch by passing the address or index to be accessed through a + conditional move, also gated on the same condition. Because no + Spectre-vulnerable processors are known to perform speculation on + conditional move instructions, this is guaranteed to pick the + correct input. If the selected input in case of overflow is a "safe" + value, for example a null pointer that causes an exception in the + speculative path, this ensures that no Spectre vulnerability will + exist. + "#, + &formats.int_select, + ) + .operands_in(vec![cc, flags, x, y]) + .operands_out(vec![a]) + .other_side_effects(true), + ); + let c = &Operand::new("c", Any).with_doc("Controlling value to test"); ig.push( Inst::new( diff --git a/cranelift/codegen/meta/src/shared/settings.rs b/cranelift/codegen/meta/src/shared/settings.rs index 3d15387896..88db45d9ed 100644 --- a/cranelift/codegen/meta/src/shared/settings.rs +++ b/cranelift/codegen/meta/src/shared/settings.rs @@ -264,5 +264,23 @@ pub(crate) fn define() -> SettingGroup { true, ); + // Spectre options. + + settings.add_bool( + "enable_heap_access_spectre_mitigation", + r#" + Enable Spectre mitigation on heap bounds checks. + + This is a no-op for any heap that needs no bounds checks; e.g., + if the limit is static and the guard region is large enough that + the index cannot reach past it. + + This option is enabled by default because it is highly + recommended for secure sandboxing. The embedder should consider + the security implications carefully before disabling this option. + "#, + true, + ); + settings.build() } diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 3ed3b08108..efcee9b36d 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -1023,7 +1023,7 @@ pub(crate) fn lower_insn_to_regs>( // Nothing. } - Opcode::Select | Opcode::Selectif => { + Opcode::Select | Opcode::Selectif | Opcode::SelectifSpectreGuard => { let cond = if op == Opcode::Select { let (cmp_op, narrow_mode) = if ty_bits(ctx.input_ty(insn, 0)) > 32 { (ALUOp::SubS64, NarrowValueMode::ZeroExtend64) diff --git a/cranelift/codegen/src/legalizer/heap.rs b/cranelift/codegen/src/legalizer/heap.rs index 663736793b..26a0640f08 100644 --- a/cranelift/codegen/src/legalizer/heap.rs +++ b/cranelift/codegen/src/legalizer/heap.rs @@ -66,19 +66,14 @@ fn dynamic_addr( // Start with the bounds check. Trap if `offset + access_size > bound`. let bound = pos.ins().global_value(offset_ty, bound_gv); - let oob; - if access_size == 1 { + let (cc, lhs, bound) = if access_size == 1 { // `offset > bound - 1` is the same as `offset >= bound`. - oob = pos - .ins() - .icmp(IntCC::UnsignedGreaterThanOrEqual, offset, bound); + (IntCC::UnsignedGreaterThanOrEqual, offset, bound) } else if access_size <= min_size { // We know that bound >= min_size, so here we can compare `offset > bound - access_size` // without wrapping. let adj_bound = pos.ins().iadd_imm(bound, -(access_size as i64)); - oob = pos - .ins() - .icmp(IntCC::UnsignedGreaterThan, offset, adj_bound); + (IntCC::UnsignedGreaterThan, offset, adj_bound) } else { // We need an overflow check for the adjusted offset. let access_size_val = pos.ins().iconst(offset_ty, access_size as i64); @@ -88,13 +83,27 @@ fn dynamic_addr( overflow, ir::TrapCode::HeapOutOfBounds, ); - oob = pos - .ins() - .icmp(IntCC::UnsignedGreaterThan, adj_offset, bound); - } + (IntCC::UnsignedGreaterThan, adj_offset, bound) + }; + let oob = pos.ins().icmp(cc, lhs, bound); pos.ins().trapnz(oob, ir::TrapCode::HeapOutOfBounds); - compute_addr(isa, inst, heap, addr_ty, offset, offset_ty, pos.func); + let spectre_oob_comparison = if isa.flags().enable_heap_access_spectre_mitigation() { + Some((cc, lhs, bound)) + } else { + None + }; + + compute_addr( + isa, + inst, + heap, + addr_ty, + offset, + offset_ty, + pos.func, + spectre_oob_comparison, + ); } /// Expand a `heap_addr` for a static heap. @@ -146,20 +155,35 @@ fn static_addr( // With that we have an optimization here where with 32-bit offsets and // `bound - access_size >= 4GB` we can omit a bounds check. let limit = bound - access_size; + let mut spectre_oob_comparison = None; if offset_ty != ir::types::I32 || limit < 0xffff_ffff { - let oob = if limit & 1 == 1 { + let (cc, lhs, limit_imm) = if limit & 1 == 1 { // Prefer testing `offset >= limit - 1` when limit is odd because an even number is // likely to be a convenient constant on ARM and other RISC architectures. - pos.ins() - .icmp_imm(IntCC::UnsignedGreaterThanOrEqual, offset, limit as i64 - 1) + let limit = limit as i64 - 1; + (IntCC::UnsignedGreaterThanOrEqual, offset, limit) } else { - pos.ins() - .icmp_imm(IntCC::UnsignedGreaterThan, offset, limit as i64) + let limit = limit as i64; + (IntCC::UnsignedGreaterThan, offset, limit) }; + let oob = pos.ins().icmp_imm(cc, lhs, limit_imm); pos.ins().trapnz(oob, ir::TrapCode::HeapOutOfBounds); + if isa.flags().enable_heap_access_spectre_mitigation() { + let limit = pos.ins().iconst(offset_ty, limit_imm); + spectre_oob_comparison = Some((cc, lhs, limit)); + } } - compute_addr(isa, inst, heap, addr_ty, offset, offset_ty, pos.func); + compute_addr( + isa, + inst, + heap, + addr_ty, + offset, + offset_ty, + pos.func, + spectre_oob_comparison, + ); } /// Emit code for the base address computation of a `heap_addr` instruction. @@ -171,6 +195,11 @@ fn compute_addr( mut offset: ir::Value, offset_ty: ir::Type, func: &mut ir::Function, + // If we are performing Spectre mitigation with conditional selects, the + // values to compare and the condition code that indicates an out-of bounds + // condition; on this condition, the conditional move will choose a + // speculatively safe address (a zero / null pointer) instead. + spectre_oob_comparison: Option<(IntCC, ir::Value, ir::Value)>, ) { let mut pos = FuncCursor::new(func).at_inst(inst); pos.use_srcloc(inst); @@ -198,5 +227,15 @@ fn compute_addr( pos.ins().global_value(addr_ty, base_gv) }; - pos.func.dfg.replace(inst).iadd(base, offset); + if let Some((cc, a, b)) = spectre_oob_comparison { + let final_addr = pos.ins().iadd(base, offset); + let zero = pos.ins().iconst(addr_ty, 0); + let flags = pos.ins().ifcmp(a, b); + pos.func + .dfg + .replace(inst) + .selectif_spectre_guard(addr_ty, cc, flags, zero, final_addr); + } else { + pos.func.dfg.replace(inst).iadd(base, offset); + } } diff --git a/cranelift/codegen/src/settings.rs b/cranelift/codegen/src/settings.rs index bd6d7bc360..4879d5c2a8 100644 --- a/cranelift/codegen/src/settings.rs +++ b/cranelift/codegen/src/settings.rs @@ -399,6 +399,7 @@ emit_all_ones_funcaddrs = false enable_probestack = true probestack_func_adjusts_sp = false enable_jump_tables = true +enable_heap_access_spectre_mitigation = true "# ); assert_eq!(f.opt_level(), super::OptLevel::None); diff --git a/cranelift/filetests/filetests/isa/x86/legalize-heaps.clif b/cranelift/filetests/filetests/isa/x86/legalize-heaps.clif index 5fb080f8a6..d47a880320 100644 --- a/cranelift/filetests/filetests/isa/x86/legalize-heaps.clif +++ b/cranelift/filetests/filetests/isa/x86/legalize-heaps.clif @@ -1,4 +1,5 @@ test legalizer +set enable_heap_access_spectre_mitigation=false target x86_64 ; Test legalization for various forms of heap addresses. diff --git a/cranelift/filetests/filetests/isa/x86/legalize-memory.clif b/cranelift/filetests/filetests/isa/x86/legalize-memory.clif index 78d1796d00..2c99d285b2 100644 --- a/cranelift/filetests/filetests/isa/x86/legalize-memory.clif +++ b/cranelift/filetests/filetests/isa/x86/legalize-memory.clif @@ -1,5 +1,6 @@ ; Test the legalization of memory objects. test legalizer +set enable_heap_access_spectre_mitigation=false target x86_64 ; regex: V=v\d+ diff --git a/cranelift/filetests/filetests/vcode/aarch64/heap_addr.clif b/cranelift/filetests/filetests/vcode/aarch64/heap_addr.clif index ab97cb5190..6ceba929e9 100644 --- a/cranelift/filetests/filetests/vcode/aarch64/heap_addr.clif +++ b/cranelift/filetests/filetests/vcode/aarch64/heap_addr.clif @@ -1,4 +1,5 @@ test compile +set enable_heap_access_spectre_mitigation=true target aarch64 function %dynamic_heap_check(i64 vmctx, i32) -> i64 { @@ -11,20 +12,23 @@ block0(v0: i64, v1: i32): return v2 } -; check: stp fp, lr, [sp, #-16]! -; nextln: mov fp, sp -; nextln: ldur w2, [x0] -; nextln: add w2, w2, #0 -; nextln: subs wzr, w1, w2 -; nextln: b.ls label1 ; b label2 -; nextln: Block 1: -; check: add x0, x0, x1, UXTW -; nextln: mov sp, fp -; nextln: ldp fp, lr, [sp], #16 -; nextln: ret -; nextln: Block 2: -; check: udf - +; check: Block 0: +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: ldur w2, [x0] +; nextln: add w2, w2, #0 +; nextln: subs wzr, w1, w2 +; nextln: b.ls label1 ; b label2 +; check: Block 1: +; check: add x0, x0, x1, UXTW +; nextln: subs wzr, w1, w2 +; nextln: movz x1, #0 +; nextln: csel x0, x1, x0, hi +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret +; check: Block 2: +; check: udf function %static_heap_check(i64 vmctx, i32) -> i64 { gv0 = vmctx @@ -35,15 +39,18 @@ block0(v0: i64, v1: i32): return v2 } -; check: stp fp, lr, [sp, #-16]! -; nextln: mov fp, sp -; nextln: subs wzr, w1, #65536 -; nextln: b.ls label1 ; b label2 -; nextln: Block 1: -; check: add x0, x0, x1, UXTW -; nextln: mov sp, fp -; nextln: ldp fp, lr, [sp], #16 -; nextln: ret -; nextln: Block 2: -; check: udf - +; check: Block 0: +; check: stp fp, lr, [sp, #-16]! +; nextln: mov fp, sp +; nextln: subs wzr, w1, #65536 +; nextln: b.ls label1 ; b label2 +; check: Block 1: +; check: add x0, x0, x1, UXTW +; nextln: subs wzr, w1, #65536 +; nextln: movz x1, #0 +; nextln: csel x0, x1, x0, hi +; nextln: mov sp, fp +; nextln: ldp fp, lr, [sp], #16 +; nextln: ret +; check: Block 2: +; check: udf