From e694fb131253c9c06f991937d333bc0513a59d4f Mon Sep 17 00:00:00 2001
From: Chris Fallin <cfallin@mozilla.com>
Date: Mon, 29 Jun 2020 14:04:26 -0700
Subject: [PATCH] Spectre mitigation on heap access overflow checks.

This PR adds a conditional move following a heap bounds check through
which the address to be accessed flows. This conditional move ensures
that even if the branch is mispredicted (access is actually out of
bounds, but speculation goes down in-bounds path), the acually accessed
address is zero (a NULL pointer) rather than the out-of-bounds address.

The mitigation is controlled by a flag that is off by default, but can
be set by the embedding. Note that in order to turn it on by default,
we would need to add conditional-move support to the current x86
backend; this does not appear to be present. Once the deprecated
backend is removed in favor of the new backend, IMHO we should turn
this flag on by default.

Note that the mitigation is unneccessary when we use the "huge heap"
technique on 64-bit systems, in which we allocate a range of virtual
address space such that no 32-bit offset can reach other data. Hence,
this only affects small-heap configurations.
---
 .../codegen/meta/src/isa/x86/encodings.rs     |  6 ++
 .../codegen/meta/src/shared/instructions.rs   | 28 +++++++
 cranelift/codegen/meta/src/shared/settings.rs | 18 +++++
 .../codegen/src/isa/aarch64/lower_inst.rs     |  2 +-
 cranelift/codegen/src/legalizer/heap.rs       | 79 ++++++++++++++-----
 cranelift/codegen/src/settings.rs             |  1 +
 .../filetests/isa/x86/legalize-heaps.clif     |  1 +
 .../filetests/isa/x86/legalize-memory.clif    |  1 +
 .../filetests/vcode/aarch64/heap_addr.clif    | 59 ++++++++------
 9 files changed, 148 insertions(+), 47 deletions(-)

diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs
index 71a549647c..ca02beea69 100644
--- a/cranelift/codegen/meta/src/isa/x86/encodings.rs
+++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs
@@ -1355,6 +1355,7 @@ fn define_alu(
     let rotr = shared.by_name("rotr");
     let rotr_imm = shared.by_name("rotr_imm");
     let selectif = shared.by_name("selectif");
+    let selectif_spectre_guard = shared.by_name("selectif_spectre_guard");
     let sshr = shared.by_name("sshr");
     let sshr_imm = shared.by_name("sshr_imm");
     let trueff = shared.by_name("trueff");
@@ -1568,6 +1569,11 @@ fn define_alu(
 
     // Conditional move (a.k.a integer select).
     e.enc_i32_i64(selectif, rec_cmov.opcodes(&CMOV_OVERFLOW));
+    // A Spectre-guard integer select is exactly the same as a selectif, but
+    // is not associated with any other legalization rules and is not
+    // recognized by any optimizations, so it must arrive here unmodified
+    // and in its original place.
+    e.enc_i32_i64(selectif_spectre_guard, rec_cmov.opcodes(&CMOV_OVERFLOW));
 }
 
 #[inline(never)]
diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs
index be8d65ecc2..5ed3c6857e 100644
--- a/cranelift/codegen/meta/src/shared/instructions.rs
+++ b/cranelift/codegen/meta/src/shared/instructions.rs
@@ -1748,6 +1748,34 @@ pub(crate) fn define(
         .operands_out(vec![a]),
     );
 
+    ig.push(
+        Inst::new(
+            "selectif_spectre_guard",
+            r#"
+            Conditional select intended for Spectre guards.
+
+            This operation is semantically equivalent to a selectif instruction.
+            However, it is guaranteed to not be removed or otherwise altered by any
+            optimization pass, and is guaranteed to result in a conditional-move
+            instruction, not a branch-based lowering.  As such, it is suitable
+            for use when producing Spectre guards. For example, a bounds-check
+            may guard against unsafe speculation past a bounds-check conditional
+            branch by passing the address or index to be accessed through a
+            conditional move, also gated on the same condition. Because no
+            Spectre-vulnerable processors are known to perform speculation on
+            conditional move instructions, this is guaranteed to pick the
+            correct input. If the selected input in case of overflow is a "safe"
+            value, for example a null pointer that causes an exception in the
+            speculative path, this ensures that no Spectre vulnerability will
+            exist.
+            "#,
+            &formats.int_select,
+        )
+        .operands_in(vec![cc, flags, x, y])
+        .operands_out(vec![a])
+        .other_side_effects(true),
+    );
+
     let c = &Operand::new("c", Any).with_doc("Controlling value to test");
     ig.push(
         Inst::new(
diff --git a/cranelift/codegen/meta/src/shared/settings.rs b/cranelift/codegen/meta/src/shared/settings.rs
index 3d15387896..88db45d9ed 100644
--- a/cranelift/codegen/meta/src/shared/settings.rs
+++ b/cranelift/codegen/meta/src/shared/settings.rs
@@ -264,5 +264,23 @@ pub(crate) fn define() -> SettingGroup {
         true,
     );
 
+    // Spectre options.
+
+    settings.add_bool(
+        "enable_heap_access_spectre_mitigation",
+        r#"
+        Enable Spectre mitigation on heap bounds checks.
+        
+        This is a no-op for any heap that needs no bounds checks; e.g.,
+        if the limit is static and the guard region is large enough that
+        the index cannot reach past it.
+
+        This option is enabled by default because it is highly
+        recommended for secure sandboxing. The embedder should consider
+        the security implications carefully before disabling this option.
+        "#,
+        true,
+    );
+
     settings.build()
 }
diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
index fa39cbed3b..bfc651f41c 100644
--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -1023,7 +1023,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
             // Nothing.
         }
 
-        Opcode::Select | Opcode::Selectif => {
+        Opcode::Select | Opcode::Selectif | Opcode::SelectifSpectreGuard => {
             let cond = if op == Opcode::Select {
                 let (cmp_op, narrow_mode) = if ty_bits(ctx.input_ty(insn, 0)) > 32 {
                     (ALUOp::SubS64, NarrowValueMode::ZeroExtend64)
diff --git a/cranelift/codegen/src/legalizer/heap.rs b/cranelift/codegen/src/legalizer/heap.rs
index 663736793b..26a0640f08 100644
--- a/cranelift/codegen/src/legalizer/heap.rs
+++ b/cranelift/codegen/src/legalizer/heap.rs
@@ -66,19 +66,14 @@ fn dynamic_addr(
 
     // Start with the bounds check. Trap if `offset + access_size > bound`.
     let bound = pos.ins().global_value(offset_ty, bound_gv);
-    let oob;
-    if access_size == 1 {
+    let (cc, lhs, bound) = if access_size == 1 {
         // `offset > bound - 1` is the same as `offset >= bound`.
-        oob = pos
-            .ins()
-            .icmp(IntCC::UnsignedGreaterThanOrEqual, offset, bound);
+        (IntCC::UnsignedGreaterThanOrEqual, offset, bound)
     } else if access_size <= min_size {
         // We know that bound >= min_size, so here we can compare `offset > bound - access_size`
         // without wrapping.
         let adj_bound = pos.ins().iadd_imm(bound, -(access_size as i64));
-        oob = pos
-            .ins()
-            .icmp(IntCC::UnsignedGreaterThan, offset, adj_bound);
+        (IntCC::UnsignedGreaterThan, offset, adj_bound)
     } else {
         // We need an overflow check for the adjusted offset.
         let access_size_val = pos.ins().iconst(offset_ty, access_size as i64);
@@ -88,13 +83,27 @@ fn dynamic_addr(
             overflow,
             ir::TrapCode::HeapOutOfBounds,
         );
-        oob = pos
-            .ins()
-            .icmp(IntCC::UnsignedGreaterThan, adj_offset, bound);
-    }
+        (IntCC::UnsignedGreaterThan, adj_offset, bound)
+    };
+    let oob = pos.ins().icmp(cc, lhs, bound);
     pos.ins().trapnz(oob, ir::TrapCode::HeapOutOfBounds);
 
-    compute_addr(isa, inst, heap, addr_ty, offset, offset_ty, pos.func);
+    let spectre_oob_comparison = if isa.flags().enable_heap_access_spectre_mitigation() {
+        Some((cc, lhs, bound))
+    } else {
+        None
+    };
+
+    compute_addr(
+        isa,
+        inst,
+        heap,
+        addr_ty,
+        offset,
+        offset_ty,
+        pos.func,
+        spectre_oob_comparison,
+    );
 }
 
 /// Expand a `heap_addr` for a static heap.
@@ -146,20 +155,35 @@ fn static_addr(
     // With that we have an optimization here where with 32-bit offsets and
     // `bound - access_size >= 4GB` we can omit a bounds check.
     let limit = bound - access_size;
+    let mut spectre_oob_comparison = None;
     if offset_ty != ir::types::I32 || limit < 0xffff_ffff {
-        let oob = if limit & 1 == 1 {
+        let (cc, lhs, limit_imm) = if limit & 1 == 1 {
             // Prefer testing `offset >= limit - 1` when limit is odd because an even number is
             // likely to be a convenient constant on ARM and other RISC architectures.
-            pos.ins()
-                .icmp_imm(IntCC::UnsignedGreaterThanOrEqual, offset, limit as i64 - 1)
+            let limit = limit as i64 - 1;
+            (IntCC::UnsignedGreaterThanOrEqual, offset, limit)
         } else {
-            pos.ins()
-                .icmp_imm(IntCC::UnsignedGreaterThan, offset, limit as i64)
+            let limit = limit as i64;
+            (IntCC::UnsignedGreaterThan, offset, limit)
         };
+        let oob = pos.ins().icmp_imm(cc, lhs, limit_imm);
         pos.ins().trapnz(oob, ir::TrapCode::HeapOutOfBounds);
+        if isa.flags().enable_heap_access_spectre_mitigation() {
+            let limit = pos.ins().iconst(offset_ty, limit_imm);
+            spectre_oob_comparison = Some((cc, lhs, limit));
+        }
     }
 
-    compute_addr(isa, inst, heap, addr_ty, offset, offset_ty, pos.func);
+    compute_addr(
+        isa,
+        inst,
+        heap,
+        addr_ty,
+        offset,
+        offset_ty,
+        pos.func,
+        spectre_oob_comparison,
+    );
 }
 
 /// Emit code for the base address computation of a `heap_addr` instruction.
@@ -171,6 +195,11 @@ fn compute_addr(
     mut offset: ir::Value,
     offset_ty: ir::Type,
     func: &mut ir::Function,
+    // If we are performing Spectre mitigation with conditional selects, the
+    // values to compare and the condition code that indicates an out-of bounds
+    // condition; on this condition, the conditional move will choose a
+    // speculatively safe address (a zero / null pointer) instead.
+    spectre_oob_comparison: Option<(IntCC, ir::Value, ir::Value)>,
 ) {
     let mut pos = FuncCursor::new(func).at_inst(inst);
     pos.use_srcloc(inst);
@@ -198,5 +227,15 @@ fn compute_addr(
         pos.ins().global_value(addr_ty, base_gv)
     };
 
-    pos.func.dfg.replace(inst).iadd(base, offset);
+    if let Some((cc, a, b)) = spectre_oob_comparison {
+        let final_addr = pos.ins().iadd(base, offset);
+        let zero = pos.ins().iconst(addr_ty, 0);
+        let flags = pos.ins().ifcmp(a, b);
+        pos.func
+            .dfg
+            .replace(inst)
+            .selectif_spectre_guard(addr_ty, cc, flags, zero, final_addr);
+    } else {
+        pos.func.dfg.replace(inst).iadd(base, offset);
+    }
 }
diff --git a/cranelift/codegen/src/settings.rs b/cranelift/codegen/src/settings.rs
index bd6d7bc360..4879d5c2a8 100644
--- a/cranelift/codegen/src/settings.rs
+++ b/cranelift/codegen/src/settings.rs
@@ -399,6 +399,7 @@ emit_all_ones_funcaddrs = false
 enable_probestack = true
 probestack_func_adjusts_sp = false
 enable_jump_tables = true
+enable_heap_access_spectre_mitigation = true
 "#
         );
         assert_eq!(f.opt_level(), super::OptLevel::None);
diff --git a/cranelift/filetests/filetests/isa/x86/legalize-heaps.clif b/cranelift/filetests/filetests/isa/x86/legalize-heaps.clif
index 5fb080f8a6..d47a880320 100644
--- a/cranelift/filetests/filetests/isa/x86/legalize-heaps.clif
+++ b/cranelift/filetests/filetests/isa/x86/legalize-heaps.clif
@@ -1,4 +1,5 @@
 test legalizer
+set enable_heap_access_spectre_mitigation=false
 target x86_64
 
 ; Test legalization for various forms of heap addresses.
diff --git a/cranelift/filetests/filetests/isa/x86/legalize-memory.clif b/cranelift/filetests/filetests/isa/x86/legalize-memory.clif
index 78d1796d00..2c99d285b2 100644
--- a/cranelift/filetests/filetests/isa/x86/legalize-memory.clif
+++ b/cranelift/filetests/filetests/isa/x86/legalize-memory.clif
@@ -1,5 +1,6 @@
 ; Test the legalization of memory objects.
 test legalizer
+set enable_heap_access_spectre_mitigation=false
 target x86_64
 
 ; regex: V=v\d+
diff --git a/cranelift/filetests/filetests/vcode/aarch64/heap_addr.clif b/cranelift/filetests/filetests/vcode/aarch64/heap_addr.clif
index ab97cb5190..6ceba929e9 100644
--- a/cranelift/filetests/filetests/vcode/aarch64/heap_addr.clif
+++ b/cranelift/filetests/filetests/vcode/aarch64/heap_addr.clif
@@ -1,4 +1,5 @@
 test compile
+set enable_heap_access_spectre_mitigation=true
 target aarch64
 
 function %dynamic_heap_check(i64 vmctx, i32) -> i64 {
@@ -11,20 +12,23 @@ block0(v0: i64, v1: i32):
     return v2
 }
 
-; check:   stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  ldur w2, [x0]
-; nextln:  add w2, w2, #0
-; nextln:  subs wzr, w1, w2
-; nextln:  b.ls label1 ; b label2
-; nextln:  Block 1:
-; check:   add x0, x0, x1, UXTW
-; nextln:  mov sp, fp
-; nextln:  ldp fp, lr, [sp], #16
-; nextln:  ret
-; nextln:  Block 2:
-; check:  udf
-
+; check: Block 0:
+; check: stp fp, lr, [sp, #-16]!
+; nextln: mov fp, sp
+; nextln: ldur w2, [x0]
+; nextln: add w2, w2, #0
+; nextln: subs wzr, w1, w2
+; nextln: b.ls label1 ; b label2
+; check: Block 1:
+; check: add x0, x0, x1, UXTW
+; nextln: subs wzr, w1, w2
+; nextln: movz x1, #0
+; nextln: csel x0, x1, x0, hi
+; nextln: mov sp, fp
+; nextln: ldp fp, lr, [sp], #16
+; nextln: ret
+; check: Block 2:
+; check: udf
 
 function %static_heap_check(i64 vmctx, i32) -> i64 {
     gv0 = vmctx
@@ -35,15 +39,18 @@ block0(v0: i64, v1: i32):
     return v2
 }
 
-; check:   stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  subs wzr, w1, #65536
-; nextln:  b.ls label1 ; b label2
-; nextln:  Block 1:
-; check:   add x0, x0, x1, UXTW
-; nextln:  mov sp, fp
-; nextln:  ldp fp, lr, [sp], #16
-; nextln:  ret
-; nextln:  Block 2:
-; check:   udf
-
+; check: Block 0:
+; check: stp fp, lr, [sp, #-16]!
+; nextln: mov fp, sp
+; nextln: subs wzr, w1, #65536
+; nextln: b.ls label1 ; b label2
+; check: Block 1:
+; check: add x0, x0, x1, UXTW
+; nextln: subs wzr, w1, #65536
+; nextln: movz x1, #0
+; nextln: csel x0, x1, x0, hi
+; nextln: mov sp, fp
+; nextln: ldp fp, lr, [sp], #16
+; nextln: ret
+; check: Block 2:
+; check: udf