diff --git a/cranelift/codegen/src/ir/dfg.rs b/cranelift/codegen/src/ir/dfg.rs index c621253e1a..e0818df11e 100644 --- a/cranelift/codegen/src/ir/dfg.rs +++ b/cranelift/codegen/src/ir/dfg.rs @@ -595,6 +595,17 @@ impl DataFlowGraph { DisplayInst(self, inst) } + /// Returns an object that displays the given `value`'s defining instruction. + /// + /// Panics if the value is not defined by an instruction (i.e. it is a basic + /// block argument). + pub fn display_value_inst(&self, value: Value) -> DisplayInst<'_> { + match self.value_def(value) { + ir::ValueDef::Result(inst, _) => self.display_inst(inst), + ir::ValueDef::Param(_, _) => panic!("value is not defined by an instruction"), + } + } + /// Get all value arguments on `inst` as a slice. pub fn inst_args(&self, inst: Inst) -> &[Value] { self.insts[inst].arguments(&self.value_lists) diff --git a/cranelift/codegen/src/legalizer/globalvalue.rs b/cranelift/codegen/src/legalizer/globalvalue.rs index 751f4f4035..57fa29e1f5 100644 --- a/cranelift/codegen/src/legalizer/globalvalue.rs +++ b/cranelift/codegen/src/legalizer/globalvalue.rs @@ -14,6 +14,12 @@ pub fn expand_global_value( isa: &dyn TargetIsa, global_value: ir::GlobalValue, ) { + crate::trace!( + "expanding global value: {:?}: {}", + inst, + func.dfg.display_inst(inst) + ); + match func.global_values[global_value] { ir::GlobalValueData::VMContext => vmctx_addr(inst, func), ir::GlobalValueData::IAddImm { diff --git a/cranelift/codegen/src/legalizer/heap.rs b/cranelift/codegen/src/legalizer/heap.rs index d51a6244eb..96e905b3f1 100644 --- a/cranelift/codegen/src/legalizer/heap.rs +++ b/cranelift/codegen/src/legalizer/heap.rs @@ -9,6 +9,7 @@ use crate::ir::condcodes::IntCC; use crate::ir::immediates::{Uimm32, Uimm8}; use crate::ir::{self, InstBuilder, RelSourceLoc}; use crate::isa::TargetIsa; +use crate::trace; /// Expand a `heap_addr` instruction according to the definition of the heap. pub fn expand_heap_addr( @@ -21,6 +22,12 @@ pub fn expand_heap_addr( offset_immediate: Uimm32, access_size: Uimm8, ) { + trace!( + "expanding heap_addr: {:?}: {}", + inst, + func.dfg.display_inst(inst) + ); + match func.heaps[heap].style { ir::HeapStyle::Dynamic { bound_gv } => dynamic_addr( isa, @@ -76,6 +83,10 @@ fn dynamic_addr( let adj_bound = pos .ins() .iadd_imm(bound, -(offset_plus_size(offset, access_size) as i64)); + trace!( + " inserting: {}", + pos.func.dfg.display_value_inst(adj_bound) + ); (IntCC::UnsignedGreaterThan, index, adj_bound) } else { // We need an overflow check for the adjusted offset. @@ -85,14 +96,30 @@ fn dynamic_addr( let adj_offset = pos.ins() .uadd_overflow_trap(index, access_size_val, ir::TrapCode::HeapOutOfBounds); + trace!( + " inserting: {}", + pos.func.dfg.display_value_inst(adj_offset) + ); (IntCC::UnsignedGreaterThan, adj_offset, bound) }; - let oob = pos.ins().icmp(cc, lhs, bound); - pos.ins().trapnz(oob, ir::TrapCode::HeapOutOfBounds); let spectre_oob_comparison = if isa.flags().enable_heap_access_spectre_mitigation() { - Some((cc, lhs, bound)) + // When we emit a spectre-guarded heap access, we do a `select + // is_out_of_bounds, NULL, addr` to compute the address, and so the load + // will trap if the address is out of bounds, which means we don't need + // to do another explicit bounds check like we do below. + Some(SpectreOobComparison { + cc, + lhs, + rhs: bound, + }) } else { + let oob = pos.ins().icmp(cc, lhs, bound); + trace!(" inserting: {}", pos.func.dfg.display_value_inst(oob)); + + let trapnz = pos.ins().trapnz(oob, ir::TrapCode::HeapOutOfBounds); + trace!(" inserting: {}", pos.func.dfg.display_inst(trapnz)); + None }; @@ -130,8 +157,10 @@ fn static_addr( // This first case is a trivial case where we can statically trap. if offset_plus_size(offset, access_size) > bound { // This will simply always trap since `offset >= 0`. - pos.ins().trap(ir::TrapCode::HeapOutOfBounds); - pos.func.dfg.replace(inst).iconst(addr_ty, 0); + let trap = pos.ins().trap(ir::TrapCode::HeapOutOfBounds); + trace!(" inserting: {}", pos.func.dfg.display_inst(trap)); + let iconst = pos.func.dfg.replace(inst).iconst(addr_ty, 0); + trace!(" inserting: {}", pos.func.dfg.display_value_inst(iconst)); // Split the block, as the trap is a terminator instruction. let curr_block = pos.current_block().expect("Cursor is not in a block"); @@ -180,10 +209,19 @@ fn static_addr( (IntCC::UnsignedGreaterThan, index, limit) }; let oob = pos.ins().icmp_imm(cc, lhs, limit_imm); - pos.ins().trapnz(oob, ir::TrapCode::HeapOutOfBounds); + trace!(" inserting: {}", pos.func.dfg.display_value_inst(oob)); + + let trapnz = pos.ins().trapnz(oob, ir::TrapCode::HeapOutOfBounds); + trace!(" inserting: {}", pos.func.dfg.display_inst(trapnz)); + if isa.flags().enable_heap_access_spectre_mitigation() { let limit = pos.ins().iconst(addr_ty, limit_imm); - spectre_oob_comparison = Some((cc, lhs, limit)); + trace!(" inserting: {}", pos.func.dfg.display_value_inst(limit)); + spectre_oob_comparison = Some(SpectreOobComparison { + cc, + lhs, + rhs: limit, + }); } } @@ -229,6 +267,12 @@ fn cast_index_to_pointer_ty( extended_index } +struct SpectreOobComparison { + cc: IntCC, + lhs: ir::Value, + rhs: ir::Value, +} + /// Emit code for the base address computation of a `heap_addr` instruction. fn compute_addr( isa: &dyn TargetIsa, @@ -242,7 +286,7 @@ fn compute_addr( // values to compare and the condition code that indicates an out-of bounds // condition; on this condition, the conditional move will choose a // speculatively safe address (a zero / null pointer) instead. - spectre_oob_comparison: Option<(IntCC, ir::Value, ir::Value)>, + spectre_oob_comparison: Option, ) { debug_assert_eq!(func.dfg.value_type(index), addr_ty); let mut pos = FuncCursor::new(func).at_inst(inst); @@ -250,13 +294,17 @@ fn compute_addr( // Add the heap base address base let base = if isa.flags().enable_pinned_reg() && isa.flags().use_pinned_reg_as_heap_base() { - pos.ins().get_pinned_reg(isa.pointer_type()) + let base = pos.ins().get_pinned_reg(isa.pointer_type()); + trace!(" inserting: {}", pos.func.dfg.display_value_inst(base)); + base } else { let base_gv = pos.func.heaps[heap].base; - pos.ins().global_value(addr_ty, base_gv) + let base = pos.ins().global_value(addr_ty, base_gv); + trace!(" inserting: {}", pos.func.dfg.display_value_inst(base)); + base }; - if let Some((cc, a, b)) = spectre_oob_comparison { + if let Some(SpectreOobComparison { cc, lhs, rhs }) = spectre_oob_comparison { let final_base = pos.ins().iadd(base, index); // NB: The addition of the offset immediate must happen *before* the // `select_spectre_guard`. If it happens after, then we potentially are @@ -264,22 +312,40 @@ fn compute_addr( let final_addr = if offset == 0 { final_base } else { - pos.ins().iadd_imm(final_base, offset as i64) + let final_addr = pos.ins().iadd_imm(final_base, offset as i64); + trace!( + " inserting: {}", + pos.func.dfg.display_value_inst(final_addr) + ); + final_addr }; let zero = pos.ins().iconst(addr_ty, 0); - let cmp = pos.ins().icmp(cc, a, b); - pos.func + trace!(" inserting: {}", pos.func.dfg.display_value_inst(zero)); + + let cmp = pos.ins().icmp(cc, lhs, rhs); + trace!(" inserting: {}", pos.func.dfg.display_value_inst(cmp)); + + let value = pos + .func .dfg .replace(inst) .select_spectre_guard(cmp, zero, final_addr); + trace!(" inserting: {}", pos.func.dfg.display_value_inst(value)); } else if offset == 0 { - pos.func.dfg.replace(inst).iadd(base, index); + let addr = pos.func.dfg.replace(inst).iadd(base, index); + trace!(" inserting: {}", pos.func.dfg.display_value_inst(addr)); } else { let final_base = pos.ins().iadd(base, index); - pos.func + trace!( + " inserting: {}", + pos.func.dfg.display_value_inst(final_base) + ); + let addr = pos + .func .dfg .replace(inst) .iadd_imm(final_base, offset as i64); + trace!(" inserting: {}", pos.func.dfg.display_value_inst(addr)); } } diff --git a/cranelift/codegen/src/legalizer/mod.rs b/cranelift/codegen/src/legalizer/mod.rs index 96eccb2079..0fa8156836 100644 --- a/cranelift/codegen/src/legalizer/mod.rs +++ b/cranelift/codegen/src/legalizer/mod.rs @@ -19,6 +19,7 @@ use crate::ir::immediates::Imm64; use crate::ir::types::{I128, I64}; use crate::ir::{self, InstBuilder, InstructionData, MemFlags, Value}; use crate::isa::TargetIsa; +use crate::trace; mod globalvalue; mod heap; @@ -46,6 +47,8 @@ fn imm_const(pos: &mut FuncCursor, arg: Value, imm: Imm64, is_signed: bool) -> V /// Perform a simple legalization by expansion of the function, without /// platform-specific transforms. pub fn simple_legalize(func: &mut ir::Function, cfg: &mut ControlFlowGraph, isa: &dyn TargetIsa) { + trace!("Pre-legalization function:\n{}", func.display()); + let mut pos = FuncCursor::new(func); let func_begin = pos.position(); pos.set_position(func_begin); @@ -246,6 +249,8 @@ pub fn simple_legalize(func: &mut ir::Function, cfg: &mut ControlFlowGraph, isa: pos.set_position(prev_pos); } } + + trace!("Post-legalization function:\n{}", func.display()); } /// Custom expansion for conditional trap instructions. @@ -257,6 +262,12 @@ fn expand_cond_trap( arg: ir::Value, code: ir::TrapCode, ) { + trace!( + "expanding conditional trap: {:?}: {}", + inst, + func.dfg.display_inst(inst) + ); + // Parse the instruction. let trapz = match opcode { ir::Opcode::Trapz => true, diff --git a/cranelift/filetests/filetests/isa/aarch64/heap_addr.clif b/cranelift/filetests/filetests/isa/aarch64/heap_addr.clif index 2f893bcd71..b3b1b8ce33 100644 --- a/cranelift/filetests/filetests/isa/aarch64/heap_addr.clif +++ b/cranelift/filetests/filetests/isa/aarch64/heap_addr.clif @@ -17,17 +17,12 @@ block0(v0: i64, v1: i32): ; mov w8, w1 ; ldr x9, [x0] ; mov x9, x9 -; subs xzr, x8, x9 -; b.ls label1 ; b label2 -; block1: ; add x10, x0, x1, UXTW -; movz x11, #0 +; movz x7, #0 ; subs xzr, x8, x9 -; csel x0, x11, x10, hi +; csel x0, x7, x10, hi ; csdb ; ret -; block2: -; udf #0xc11f function %static_heap_check(i64 vmctx, i32) -> i64 { gv0 = vmctx @@ -69,18 +64,13 @@ block0(v0: i64, v1: i32): ; movz x9, #24 ; adds x11, x11, x9 ; b.lo 8 ; udf +; add x12, x0, x1, UXTW +; add x12, x12, #16 +; movz x13, #0 ; subs xzr, x11, x10 -; b.ls label1 ; b label2 -; block1: -; add x13, x0, x1, UXTW -; add x13, x13, #16 -; movz x12, #0 -; subs xzr, x11, x10 -; csel x0, x12, x13, hi +; csel x0, x13, x12, hi ; csdb ; ret -; block2: -; udf #0xc11f function %static_heap_check_with_offset(i64 vmctx, i32) -> i64 { gv0 = vmctx diff --git a/cranelift/filetests/filetests/isa/riscv64/heap-addr.clif b/cranelift/filetests/filetests/isa/riscv64/heap-addr.clif index d90da8f22c..86da6b79a3 100644 --- a/cranelift/filetests/filetests/isa/riscv64/heap-addr.clif +++ b/cranelift/filetests/filetests/isa/riscv64/heap-addr.clif @@ -13,19 +13,14 @@ block0(v0: i64, v1: i32): } ; block0: -; uext.w a7,a1 -; ld t3,0(a0) -; addi t3,t3,0 -; ule t4,a7,t3##ty=i64 -; bne t4,zero,taken(label1),not_taken(label2) -; block1: -; add t4,a0,a7 -; ugt a7,a7,t3##ty=i64 -; li t0,0 -; selectif_spectre_guard a0,t0,t4##test=a7 +; uext.w a6,a1 +; ld a7,0(a0) +; addi t3,a7,0 +; add a7,a0,a6 +; ugt a5,a6,t3##ty=i64 +; li t3,0 +; selectif_spectre_guard a0,t3,a7##test=a5 ; ret -; block2: -; udf##trap_code=heap_oob function %static_heap_check(i64 vmctx, i32) -> i64 { gv0 = vmctx @@ -62,23 +57,18 @@ block0(v0: i64, v1: i32): } ; block0: -; uext.w t1,a1 -; ld t0,0(a0) -; li t3,24 -; add t2,t1,t3 -; ult a1,t2,t1##ty=i64 -; trap_if a1,heap_oob -; ule a1,t2,t0##ty=i64 -; bne a1,zero,taken(label1),not_taken(label2) -; block1: -; add a0,a0,t1 -; addi a0,a0,16 -; ugt t1,t2,t0##ty=i64 -; li a1,0 -; selectif_spectre_guard a0,a1,a0##test=t1 +; uext.w t0,a1 +; ld t4,0(a0) +; li a7,24 +; add t1,t0,a7 +; ult t2,t1,t0##ty=i64 +; trap_if t2,heap_oob +; add t0,a0,t0 +; addi t0,t0,16 +; ugt t4,t1,t4##ty=i64 +; li t1,0 +; selectif_spectre_guard a0,t1,t0##test=t4 ; ret -; block2: -; udf##trap_code=heap_oob function %static_heap_check_with_offset(i64 vmctx, i32) -> i64 { gv0 = vmctx diff --git a/cranelift/filetests/filetests/isa/s390x/heap_addr.clif b/cranelift/filetests/filetests/isa/s390x/heap_addr.clif index 4dc22f499f..205564e0fd 100644 --- a/cranelift/filetests/filetests/isa/s390x/heap_addr.clif +++ b/cranelift/filetests/filetests/isa/s390x/heap_addr.clif @@ -12,20 +12,14 @@ block0(v0: i64, v1: i32): } ; block0: -; llgfr %r5, %r3 -; lgr %r4, %r2 -; lg %r2, 0(%r4) -; aghik %r3, %r2, 0 -; clgr %r5, %r3 -; jgnh label1 ; jg label2 -; block1: -; agrk %r2, %r4, %r5 -; lghi %r4, 0 -; clgr %r5, %r3 -; locgrh %r2, %r4 +; llgfr %r4, %r3 +; lghi %r3, 0 +; ag %r3, 0(%r2) +; agr %r2, %r4 +; lghi %r5, 0 +; clgr %r4, %r3 +; locgrh %r2, %r5 ; br %r14 -; block2: -; trap function %static_heap_check(i64 vmctx, i32) -> i64 { gv0 = vmctx @@ -66,18 +60,13 @@ block0(v0: i64, v1: i32): ; lghi %r5, 24 ; algfr %r5, %r3 ; jle 6 ; trap -; clgr %r5, %r4 -; jgnh label1 ; jg label2 -; block1: -; agrk %r3, %r2, %r7 -; aghik %r2, %r3, 16 +; agr %r2, %r7 +; aghi %r2, 16 ; lghi %r3, 0 ; clgr %r5, %r4 ; locgrh %r2, %r3 ; lmg %r7, %r15, 56(%r15) ; br %r14 -; block2: -; trap function %static_heap_check_with_offset(i64 vmctx, i32) -> i64 { gv0 = vmctx diff --git a/cranelift/filetests/filetests/isa/x64/heap.clif b/cranelift/filetests/filetests/isa/x64/heap.clif index 87444682ac..f0e472c482 100644 --- a/cranelift/filetests/filetests/isa/x64/heap.clif +++ b/cranelift/filetests/filetests/isa/x64/heap.clif @@ -33,23 +33,18 @@ block0(v0: i32, v1: i64): ; movq %rsp, %rbp ; block0: ; movl %edi, %eax -; movq 8(%rsi), %rdx -; movq %rax, %rdi -; addq %rdi, $32768, %rdi +; movq 8(%rsi), %rdi +; movq %rax, %rcx +; addq %rcx, $32768, %rcx ; jnb ; ud2 heap_oob ; -; cmpq %rdx, %rdi -; jbe label1; j label2 -; block1: ; addq %rax, 0(%rsi), %rax ; addq %rax, $32768, %rax -; xorq %rcx, %rcx, %rcx -; cmpq %rdx, %rdi -; cmovnbeq %rcx, %rax, %rax +; xorq %rsi, %rsi, %rsi +; cmpq %rdi, %rcx +; cmovnbeq %rsi, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret -; block2: -; ud2 heap_oob ;; The heap address calculation for this statically-allocated memory checks that ;; the passed offset (%r11) is within bounds (`cmp + jbe + j`) and then includes @@ -120,25 +115,18 @@ block0(v0: i64, v1: i32): ; block0: ; movq %rdi, %rax ; movl %esi, %edi -; movq %rax, %rcx -; movq 0(%rcx), %rsi -; movq %rdi, %rdx -; addq %rdx, $24, %rdx +; movq 0(%rax), %rsi +; movq %rdi, %rcx +; addq %rcx, $24, %rcx ; jnb ; ud2 heap_oob ; -; cmpq %rsi, %rdx -; jbe label1; j label2 -; block1: -; movq %rcx, %rax ; addq %rax, %rdi, %rax ; addq %rax, $16, %rax -; xorq %rcx, %rcx, %rcx -; cmpq %rsi, %rdx -; cmovnbeq %rcx, %rax, %rax +; xorq %rdi, %rdi, %rdi +; cmpq %rsi, %rcx +; cmovnbeq %rdi, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret -; block2: -; ud2 heap_oob function %static_heap_check_with_offset(i64 vmctx, i32) -> i64 { gv0 = vmctx diff --git a/cranelift/filetests/filetests/legalizer/bounds-checks.clif b/cranelift/filetests/filetests/legalizer/bounds-checks.clif new file mode 100644 index 0000000000..6368ea8b30 --- /dev/null +++ b/cranelift/filetests/filetests/legalizer/bounds-checks.clif @@ -0,0 +1,32 @@ +test legalizer +set enable_heap_access_spectre_mitigation=true +target aarch64 +target x86_64 + +;; Test that when both (1) dynamic memories and (2) heap access spectre +;; mitigations are enabled, we deduplicate the bounds check between the two. + +function %wasm_load(i64 vmctx, i32) -> i32 wasmtime_system_v { + gv0 = vmctx + gv1 = load.i64 notrap aligned gv0+88 + gv2 = load.i64 notrap aligned gv0+80 + heap0 = dynamic gv2, min 0, bound gv1, offset_guard 0x8000_0000, index_type i32 + +block0(v0: i64, v1: i32): + v2 = heap_addr.i64 heap0, v1, 0, 4 + v3 = load.i32 little heap v2 + return v3 +} + +; check: block0(v0: i64, v1: i32): +; nextln: v4 = uextend.i64 v1 +; nextln: v5 = load.i64 notrap aligned v0+88 +; nextln: v6 = iconst.i64 4 +; nextln: v7 = uadd_overflow_trap v4, v6, heap_oob ; v6 = 4 +; nextln: v8 = load.i64 notrap aligned v0+80 +; nextln: v9 = iadd v8, v4 +; nextln: v10 = iconst.i64 0 +; nextln: v11 = icmp ugt v7, v5 +; nextln: v2 = select_spectre_guard v11, v10, v9 ; v10 = 0 +; nextln: v3 = load.i32 little heap v2 +; nextln: return v3