diff options
author | John Levon <john.levon@joyent.com> | 2018-07-30 10:05:48 +0100 |
---|---|---|
committer | Richard Lowe <richlowe@richlowe.net> | 2018-08-07 19:46:08 +0000 |
commit | eea802b0a2c12269d15276d4657e5cd64dd541a4 (patch) | |
tree | 428299fc23a7fc774b25363db24f80dfc085937c | |
parent | 72dc11568f48cd37cf8182a82d9cb55b22d7c805 (diff) | |
download | illumos-joyent-eea802b0a2c12269d15276d4657e5cd64dd541a4.tar.gz |
9685 KPTI %cr3 handling needs fixes
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Approved by: Richard Lowe <richlowe@richlowe.net>
-rw-r--r-- | usr/src/uts/i86pc/ml/kpti_trampolines.s | 7 | ||||
-rw-r--r-- | usr/src/uts/i86pc/ml/locore.s | 23 | ||||
-rw-r--r-- | usr/src/uts/intel/kdi/kdi_asm.s | 64 | ||||
-rw-r--r-- | usr/src/uts/intel/kdi/kdi_idt.c | 24 |
4 files changed, 78 insertions, 40 deletions
diff --git a/usr/src/uts/i86pc/ml/kpti_trampolines.s b/usr/src/uts/i86pc/ml/kpti_trampolines.s index 036c254d08..83cbd69048 100644 --- a/usr/src/uts/i86pc/ml/kpti_trampolines.s +++ b/usr/src/uts/i86pc/ml/kpti_trampolines.s @@ -251,6 +251,11 @@ kpti_kbase: * This is used for all interrupts that can plausibly be taken inside another * interrupt and are using a kpti_frame stack (so #BP, #DB, #GP, #PF, #SS). * + * We also use this for #NP, even though it uses the standard IST: the + * additional %rsp checks below will catch when we get an exception doing an + * iret to userspace with a bad %cs/%ss. This appears as a kernel trap, and + * only later gets redirected via kern_gpfault(). + * * We check for whether we took the interrupt while in another trampoline, in * which case we need to use the kthread stack. */ @@ -649,7 +654,7 @@ tr_intr_ret_end: MK_INTR_TRAMPOLINE_NOERR(invoptrap) MK_INTR_TRAMPOLINE_NOERR(ndptrap) MK_INTR_TRAMPOLINE(invtsstrap) - MK_INTR_TRAMPOLINE(segnptrap) + MK_DBG_INTR_TRAMPOLINE(segnptrap) MK_DBG_INTR_TRAMPOLINE(stktrap) MK_DBG_INTR_TRAMPOLINE(gptrap) MK_DBG_INTR_TRAMPOLINE(pftrap) diff --git a/usr/src/uts/i86pc/ml/locore.s b/usr/src/uts/i86pc/ml/locore.s index acd96e271a..10db95ab51 100644 --- a/usr/src/uts/i86pc/ml/locore.s +++ b/usr/src/uts/i86pc/ml/locore.s @@ -159,7 +159,7 @@ _locore_start(struct boot_syscalls *sysp, ulong_t rsi, struct bootops *bop) * %rdi = boot services (should die someday) * %rdx = bootops * end - */ + */ leaq edata(%rip), %rbp /* reference edata for ksyms */ movq $0, (%rbp) /* limit stack back trace */ @@ -178,7 +178,7 @@ _locore_start(struct boot_syscalls *sysp, ulong_t rsi, struct bootops *bop) #endif /* * Save call back for special x86 boot services vector - */ + */ movq %rdi, sysp(%rip) movq %rdx, bootops(%rip) /* save bootops */ @@ -208,7 +208,7 @@ _locore_start(struct boot_syscalls *sysp, ulong_t rsi, struct bootops *bop) #endif /* __xpv */ /* - * (We just assert this works by virtue of being here) + * (We just assert this works by virtue of being here) */ bts $X86FSET_CPUID, x86_featureset(%rip) @@ -268,7 +268,7 @@ _locore_start(struct boot_syscalls *sysp, struct bootops *bop) /* * %ecx = boot services (should die someday) * %ebx = bootops - */ + */ mov $edata, %ebp / edata needs to be defined for ksyms movl $0, (%ebp) / limit stack back trace @@ -283,14 +283,14 @@ _locore_start(struct boot_syscalls *sysp, struct bootops *bop) */ mov %ecx, sysp / save call back for boot services - mov %ebx, bootops / save bootops + mov %ebx, bootops / save bootops movl $bootops, bootopsp /* * Save all registers and flags */ - pushal + pushal pushfl #if !defined(__xpv) @@ -443,7 +443,7 @@ port_22_free: * cycle. If the CCR index was not valid for this Cyrix model, we may * have performed an external I/O cycle as well. In these cases and * if the motherboard/chipset vendor ignores I/O address line A1, - * then the PIC will have IRQ3 set at the lowest priority as a side + * then the PIC will have IRQ3 set at the lowest priority as a side * effect of the above outb. We are reasonalbly confident that there * is not an unknown device on I/O port 0x22, so there should have been * no unpredictable side-effect of the above outb. @@ -892,7 +892,7 @@ likelyM3: * now we will call anything with a DIR0 of 0x80 or higher an MIII. * The MIII is supposed to support large pages, but we will believe * it when we see it. For now we just enable and test for MII features. - */ + */ movl $X86_TYPE_VIA_CYRIX_III, x86_type jmp likeMII @@ -930,7 +930,7 @@ coma_bug: * fixed this bug sometime late in 1997 and no other exploits other than * xchgl have been discovered is good indication that this workaround is * reasonable. - */ + */ .set CYRIX_DBR0, 0x30 / Debug Register 0 .set CYRIX_DBR1, 0x31 / Debug Register 1 @@ -939,7 +939,7 @@ coma_bug: .set CYRIX_DOR, 0x3c / Debug Opcode Register /* - * What is known about DBR1, DBR2, DBR3, and DOR is that for normal + * What is known about DBR1, DBR2, DBR3, and DOR is that for normal * cpu execution DBR1, DBR2, and DBR3 are set to 0. To obtain opcode * serialization, DBR1, DBR2, and DBR3 are loaded with 0xb8, 0x7f, * and 0xff. Then, DOR is loaded with the one byte opcode. @@ -999,7 +999,7 @@ coma_bug: /* * write DBR1 */ - movb $CYRIX_DBR1, %al + movb $CYRIX_DBR1, %al outb $CYRIX_CRI movb $0xf8, %al outb $CYRIX_CRD @@ -1201,6 +1201,7 @@ cmntrap() leaq dtrace_badtrap(%rip), %rdi xorl %eax, %eax call panic + SET_SIZE(cmntrap_pushed) SET_SIZE(cmntrap) SET_SIZE(_cmntrap) diff --git a/usr/src/uts/intel/kdi/kdi_asm.s b/usr/src/uts/intel/kdi/kdi_asm.s index 5180dbb1b2..edfbea10c7 100644 --- a/usr/src/uts/intel/kdi/kdi_asm.s +++ b/usr/src/uts/intel/kdi/kdi_asm.s @@ -452,8 +452,8 @@ pushq %rdi call kdi_trap_pass - cmpq $1, %rax - je kdi_pass_to_kernel + testq %rax, %rax + jnz kdi_pass_to_kernel popq %rax /* cpusave in %rax */ SAVE_IDTGDT @@ -569,17 +569,9 @@ /*NOTREACHED*/ SET_SIZE(kdi_resume) - ENTRY_NP(kdi_pass_to_kernel) - - popq %rdi /* cpusave */ - - movq $KDI_CPU_STATE_NONE, KRS_CPU_STATE(%rdi) /* - * Find the trap and vector off the right kernel handler. The trap - * handler will expect the stack to be in trap order, with %rip being - * the last entry, so we'll need to restore all our regs. On i86xpv - * we'll need to compensate for XPV_TRAP_POP. + * We took a trap that should be handled by the kernel, not KMDB. * * We're hard-coding the three cases where KMDB has installed permanent * handlers, since after we KDI_RESTORE_REGS(), we don't have registers @@ -587,15 +579,53 @@ * through here at the same time. * * Note that we handle T_DBGENTR since userspace might have tried it. + * + * The trap handler will expect the stack to be in trap order, with %rip + * being the last entry, so we'll need to restore all our regs. On + * i86xpv we'll need to compensate for XPV_TRAP_POP. + * + * %rax on entry is either 1 or 2, which is from kdi_trap_pass(). + * kdi_cmnint stashed the original %cr3 into KDIREG_CR3, then (probably) + * switched us to the CPU's kf_kernel_cr3. But we're about to call, for + * example: + * + * dbgtrap->trap()->tr_iret_kernel + * + * which, unlike, tr_iret_kdi, doesn't restore the original %cr3, so + * we'll do so here if needed. + * + * This isn't just a matter of tidiness: for example, consider: + * + * hat_switch(oldhat=kas.a_hat, newhat=prochat) + * setcr3() + * reset_kpti() + * *brktrap* due to fbt on reset_kpti:entry + * + * Here, we have the new hat's %cr3, but we haven't yet updated + * kf_kernel_cr3 (so its currently kas's). So if we don't restore here, + * we'll stay on kas's cr3 value on returning from the trap: not good if + * we fault on a userspace address. */ + ENTRY_NP(kdi_pass_to_kernel) + + popq %rdi /* cpusave */ + movq $KDI_CPU_STATE_NONE, KRS_CPU_STATE(%rdi) movq KRS_GREGS(%rdi), %rsp + + cmpq $2, %rax + jne no_restore_cr3 + movq REG_OFF(KDIREG_CR3)(%rsp), %r11 + movq %r11, %cr3 + +no_restore_cr3: movq REG_OFF(KDIREG_TRAPNO)(%rsp), %rdi + cmpq $T_SGLSTP, %rdi - je 1f + je kdi_pass_dbgtrap cmpq $T_BPTFLT, %rdi - je 2f + je kdi_pass_brktrap cmpq $T_DBGENTR, %rdi - je 3f + je kdi_pass_invaltrap /* * Hmm, unknown handler. Somebody forgot to update this when they * added a new trap interposition... try to drop back into kmdb. @@ -609,13 +639,13 @@ XPV_TRAP_PUSH; \ jmp %cs:name -1: +kdi_pass_dbgtrap: CALL_TRAP_HANDLER(dbgtrap) /*NOTREACHED*/ -2: +kdi_pass_brktrap: CALL_TRAP_HANDLER(brktrap) /*NOTREACHED*/ -3: +kdi_pass_invaltrap: CALL_TRAP_HANDLER(invaltrap) /*NOTREACHED*/ diff --git a/usr/src/uts/intel/kdi/kdi_idt.c b/usr/src/uts/intel/kdi/kdi_idt.c index d801588954..6ea4681bce 100644 --- a/usr/src/uts/intel/kdi/kdi_idt.c +++ b/usr/src/uts/intel/kdi/kdi_idt.c @@ -164,8 +164,8 @@ struct idt_description { { T_GPFLT, 0, kdi_traperr13, NULL }, { T_PGFLT, 0, kdi_traperr14, NULL }, { 15, 0, kdi_invaltrap, NULL }, - { T_EXTERRFLT, 0, kdi_trap16, NULL }, - { T_ALIGNMENT, 0, kdi_traperr17, NULL }, + { T_EXTERRFLT, 0, kdi_trap16, NULL }, + { T_ALIGNMENT, 0, kdi_traperr17, NULL }, { T_MCE, 0, kdi_trap18, NULL }, { T_SIMDFPE, 0, kdi_trap19, NULL }, { T_DBGENTR, 0, kdi_trap20, NULL }, @@ -366,14 +366,16 @@ kdi_deactivate(void) } /* - * We receive all breakpoints and single step traps. Some of them, - * including those from userland and those induced by DTrace providers, - * are intended for the kernel, and must be processed there. We adopt - * this ours-until-proven-otherwise position due to the painful - * consequences of sending the kernel an unexpected breakpoint or - * single step. Unless someone can prove to us that the kernel is - * prepared to handle the trap, we'll assume there's a problem and will - * give the user a chance to debug it. + * We receive all breakpoints and single step traps. Some of them, including + * those from userland and those induced by DTrace providers, are intended for + * the kernel, and must be processed there. We adopt this + * ours-until-proven-otherwise position due to the painful consequences of + * sending the kernel an unexpected breakpoint or single step. Unless someone + * can prove to us that the kernel is prepared to handle the trap, we'll assume + * there's a problem and will give the user a chance to debug it. + * + * If we return 2, then the calling code should restore the trap-time %cr3: that + * is, it really is a kernel-originated trap. */ int kdi_trap_pass(kdi_cpusave_t *cpusave) @@ -390,7 +392,7 @@ kdi_trap_pass(kdi_cpusave_t *cpusave) if (tt == T_BPTFLT && kdi_dtrace_get_state() == KDI_DTSTATE_DTRACE_ACTIVE) - return (1); + return (2); /* * See the comments in the kernel's T_SGLSTP handler for why we need to |