diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index 68f3c566a6..34e3145c5f 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -4049,16 +4049,10 @@ ;; wasm-table index) and then 64-bits (address addend). The small ;; lie about the I64 type is benign, since the temporary is dead ;; after this instruction (and its Cranelift type is thus unused). - (tmp2 WritableGpr (temp_writable_gpr)) + (tmp2 WritableGpr (temp_writable_gpr))) - (size OperandSize (raw_operand_size_of_type ty)) - - (jt_size u32 (jump_table_size jt_targets))) - - (with_flags_side_effect - (x64_cmp size (RegMemImm.Imm jt_size) idx) - (ConsumesFlags.ConsumesFlagsSideEffect - (MInst.JmpTableSeq idx tmp1 tmp2 default_target jt_targets))))) + (SideEffectNoResult.Inst + (MInst.JmpTableSeq idx tmp1 tmp2 default_target jt_targets)))) ;;;; iadd_pairwise constants ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 3674c6c295..e43746c423 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1596,42 +1596,15 @@ pub(crate) fn emit( // maximum range of 2 GB. If we later consider using shorter-range label references, // this will need to be revisited. - // Save index in a tmp (the live range of ridx only goes to start of this - // sequence; rtmp1 or rtmp2 may overwrite it). - - // We generate the following sequence: - // ;; generated by lowering: cmp #jmp_table_size, %idx - // jnb $default_target - // movl %idx, %tmp2 - // mov $0, %tmp1 - // cmovnb %tmp1, %tmp2 ;; Spectre mitigation. + // We generate the following sequence. Note that the only read of %idx is before the + // write to %tmp2, so regalloc may use the same register for both; fix x64/inst/mod.rs + // if you change this. // lea start_of_jump_table_offset(%rip), %tmp1 - // movslq [%tmp1, %tmp2, 4], %tmp2 ;; shift of 2, viz. multiply index by 4 + // movslq [%tmp1, %idx, 4], %tmp2 ;; shift of 2, viz. multiply index by 4 // addq %tmp2, %tmp1 // j *%tmp1 // $start_of_jump_table: // -- jump table entries - one_way_jmp(sink, CC::NB, *default_target); // idx unsigned >= jmp table size - - // Copy the index (and make sure to clear the high 32-bits lane of tmp2). - let inst = Inst::movzx_rm_r(ExtMode::LQ, RegMem::reg(idx), tmp2); - inst.emit(&[], sink, info, state); - - // Zero `tmp1` to overwrite `tmp2` with zeroes on the - // out-of-bounds case (Spectre mitigation using CMOV). - // Note that we need to do this with a move-immediate - // form, because we cannot clobber the flags. - let inst = Inst::imm(OperandSize::Size32, 0, tmp1); - inst.emit(&[], sink, info, state); - - // Spectre mitigation: CMOV to zero the index if the out-of-bounds branch above misspeculated. - let inst = Inst::cmove( - OperandSize::Size64, - CC::NB, - RegMem::reg(tmp1.to_reg()), - tmp2, - ); - inst.emit(&[], sink, info, state); // Load base address of jump table. let start_of_jumptable = sink.get_label(); @@ -1645,7 +1618,7 @@ pub(crate) fn emit( RegMem::mem(Amode::imm_reg_reg_shift( 0, Gpr::new(tmp1.to_reg()).unwrap(), - Gpr::new(tmp2.to_reg()).unwrap(), + Gpr::new(idx).unwrap(), 2, )), tmp2, @@ -1668,7 +1641,7 @@ pub(crate) fn emit( // Emit jump table (table of 32-bit offsets). sink.bind_label(start_of_jumptable); let jt_off = sink.cur_offset(); - for &target in targets.iter() { + for &target in targets.iter().chain(std::iter::once(default_target)) { let word_off = sink.cur_offset(); // off_into_table is an addend here embedded in the label to be later patched at // the end of codegen. The offset is initially relative to this jump table entry; diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 4e15b2aea4..b0002c59f5 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -2216,7 +2216,10 @@ fn x64_get_operands VReg>(inst: &Inst, collector: &mut OperandCol } => { collector.reg_use(*idx); collector.reg_early_def(*tmp1); - collector.reg_early_def(*tmp2); + // In the sequence emitted for this pseudoinstruction in emit.rs, + // tmp2 is only written after idx is read, so it doesn't need to be + // an early def. + collector.reg_def(*tmp2); } Inst::JmpUnknown { target } => { diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index 9945f08698..ff14669e73 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -2923,7 +2923,14 @@ ;; Rules for `br_table` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower_branch (br_table idx @ (value_type ty) _) (jump_table_targets default_target jt_targets)) - (emit_side_effect (jmp_table_seq ty idx default_target jt_targets))) + (let ((size OperandSize (raw_operand_size_of_type ty)) + (jt_size u32 (jump_table_size jt_targets)) + (size_reg Reg (imm ty (u32_as_u64 jt_size))) + (idx_reg Gpr (extend_to_gpr idx $I64 (ExtendKind.Zero))) + (clamped_idx Reg (with_flags_reg + (x64_cmp size size_reg idx_reg) + (cmove ty (CC.B) idx_reg size_reg)))) + (emit_side_effect (jmp_table_seq ty clamped_idx default_target jt_targets)))) ;; Rules for `select_spectre_guard` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/cranelift/filetests/filetests/isa/x64/branches.clif b/cranelift/filetests/filetests/isa/x64/branches.clif index e22f9cc560..01d9c3abdc 100644 --- a/cranelift/filetests/filetests/isa/x64/branches.clif +++ b/cranelift/filetests/filetests/isa/x64/branches.clif @@ -314,8 +314,11 @@ block2: ; pushq %rbp ; movq %rsp, %rbp ; block0: -; cmpl $2, %edi -; br_table %rdi, %r8, %r9 +; movl $2, %r9d +; movl %edi, %r10d +; cmpl %r9d, %r10d +; cmovbl %r10d, %r9d, %r9d +; br_table %r9, %rax, %rcx ; block1: ; jmp label4 ; block2: @@ -336,25 +339,28 @@ block2: ; pushq %rbp ; movq %rsp, %rbp ; block1: ; offset 0x4 -; cmpl $2, %edi -; jae 0x40 -; movl %edi, %r9d -; movl $0, %r8d -; cmovaeq %r8, %r9 -; leaq 0xb(%rip), %r8 -; movslq (%r8, %r9, 4), %r9 -; addq %r9, %r8 -; jmpq *%r8 -; adcb $0, %al +; movl $2, %r9d +; movl %edi, %r10d +; cmpl %r9d, %r10d +; cmovbl %r10d, %r9d +; leaq 0xa(%rip), %rax +; movslq (%rax, %r9, 4), %rcx +; addq %rcx, %rax +; jmpq *%rax +; sbbb %al, (%rax) ; addb %al, (%rax) -; block2: ; offset 0x34 -; jmp 0x40 -; block3: ; offset 0x39 +; adcl %eax, (%rax) +; addb %al, (%rax) +; sbbb %al, (%rax) +; addb %al, (%rax) +; block2: ; offset 0x31 +; jmp 0x3d +; block3: ; offset 0x36 ; xorl %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; retq -; block4: ; offset 0x40 +; block4: ; offset 0x3d ; movl $1, %eax ; movq %rbp, %rsp ; popq %rbp @@ -756,27 +762,30 @@ block5(v5: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; cmpl $4, %edi -; br_table %rdi, %rsi, %rax +; movl $4, %eax +; movl %edi, %ecx +; cmpl %eax, %ecx +; cmovbl %ecx, %eax, %eax +; br_table %rax, %r9, %r10 ; block1: ; jmp label4 ; block2: ; jmp label4 ; block3: -; movl $3, %r9d +; movl $3, %esi ; jmp label7 ; block4: -; movl $2, %r9d +; movl $2, %esi ; jmp label7 ; block5: -; movl $1, %r9d +; movl $1, %esi ; jmp label7 ; block6: -; movl $4, %r9d +; movl $4, %esi ; jmp label7 ; block7: ; movq %rdi, %rax -; addl %eax, %r9d, %eax +; addl %eax, %esi, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -786,37 +795,38 @@ block5(v5: i32): ; pushq %rbp ; movq %rsp, %rbp ; block1: ; offset 0x4 -; cmpl $4, %edi -; jae 0x5f -; movl %edi, %eax -; movl $0, %esi -; cmovaeq %rsi, %rax -; leaq 0xa(%rip), %rsi -; movslq (%rsi, %rax, 4), %rax -; addq %rax, %rsi -; jmpq *%rsi -; subl (%rax), %eax +; movl $4, %eax +; movl %edi, %ecx +; cmpl %eax, %ecx +; cmovbl %ecx, %eax +; leaq 0xb(%rip), %r9 +; movslq (%r9, %rax, 4), %r10 +; addq %r10, %r9 +; jmpq *%r9 +; subl $0x23000000, %eax ; addb %al, (%rax) -; andb %al, (%rax) +; addb %ah, (%rbx) ; addb %al, (%rax) -; andb %al, (%rax) +; addb %bl, (%rcx) ; addb %al, (%rax) -; block2: ; offset 0x39 -; jmp 0x49 -; block3: ; offset 0x3e -; movl $3, %r9d -; jmp 0x65 -; block4: ; offset 0x49 -; movl $2, %r9d -; jmp 0x65 -; block5: ; offset 0x54 -; movl $1, %r9d -; jmp 0x65 -; block6: ; offset 0x5f -; movl $4, %r9d -; block7: ; offset 0x65 +; addb %dh, (%rdi) +; addb %al, (%rax) +; block2: ; offset 0x36 +; jmp 0x45 +; block3: ; offset 0x3b +; movl $3, %esi +; jmp 0x5e +; block4: ; offset 0x45 +; movl $2, %esi +; jmp 0x5e +; block5: ; offset 0x4f +; movl $1, %esi +; jmp 0x5e +; block6: ; offset 0x59 +; movl $4, %esi +; block7: ; offset 0x5e ; movq %rdi, %rax -; addl %r9d, %eax +; addl %esi, %eax ; movq %rbp, %rsp ; popq %rbp ; retq @@ -837,25 +847,28 @@ block1(v5: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movl $1, %edx -; movl $2, %r8d -; movl $3, %r9d +; movl $1, %r10d +; movl $2, %r11d +; movl $3, %esi ; movl $4, %eax -; cmpl $4, %edi -; br_table %rdi, %r11, %r10 +; movl $4, %r8d +; movl %edi, %r9d +; cmpl %r8d, %r9d +; cmovbl %r9d, %r8d, %r8d +; br_table %r8, %rdi, %rcx ; block1: ; jmp label6 ; block2: -; movq %rdx, %rax +; movq %r10, %rax ; jmp label6 ; block3: -; movq %r8, %rax +; movq %r11, %rax ; jmp label6 ; block4: -; movq %r8, %rax +; movq %r11, %rax ; jmp label6 ; block5: -; movq %r9, %rax +; movq %rsi, %rax ; jmp label6 ; block6: ; movq %rbp, %rsp @@ -867,37 +880,42 @@ block1(v5: i32): ; pushq %rbp ; movq %rsp, %rbp ; block1: ; offset 0x4 -; movl $1, %edx -; movl $2, %r8d -; movl $3, %r9d +; movl $1, %r10d +; movl $2, %r11d +; movl $3, %esi ; movl $4, %eax -; cmpl $4, %edi -; jae 0x72 -; movl %edi, %r10d -; movl $0, %r11d -; cmovaeq %r11, %r10 -; leaq 0xb(%rip), %r11 -; movslq (%r11, %r10, 4), %r10 -; addq %r10, %r11 -; jmpq *%r11 -; adcl $0x1d000000, %eax +; movl $4, %r8d +; movl %edi, %r9d +; cmpl %r8d, %r9d +; cmovbl %r9d, %r8d +; leaq 0xa(%rip), %rdi +; movslq (%rdi, %r8, 4), %rcx +; addq %rcx, %rdi +; jmpq *%rdi +; sbbl %eax, (%rax) ; addb %al, (%rax) -; addb %ah, 0x2d000000(%rip) +; andl %eax, (%rax) ; addb %al, (%rax) -; block2: ; offset 0x52 -; jmp 0x72 -; block3: ; offset 0x57 -; movq %rdx, %rax -; jmp 0x72 -; block4: ; offset 0x5f -; movq %r8, %rax -; jmp 0x72 -; block5: ; offset 0x67 -; movq %r8, %rax -; jmp 0x72 -; block6: ; offset 0x6f -; movq %r9, %rax -; block7: ; offset 0x72 +; subl %eax, (%rax) +; addb %al, (%rax) +; xorl %eax, (%rax) +; addb %al, (%rax) +; xorb $0, %al +; addb %al, (%rax) +; block2: ; offset 0x4f +; jmp 0x6f +; block3: ; offset 0x54 +; movq %r10, %rax +; jmp 0x6f +; block4: ; offset 0x5c +; movq %r11, %rax +; jmp 0x6f +; block5: ; offset 0x64 +; movq %r11, %rax +; jmp 0x6f +; block6: ; offset 0x6c +; movq %rsi, %rax +; block7: ; offset 0x6f ; movq %rbp, %rsp ; popq %rbp ; retq