diff options
Diffstat (limited to 'usr/src/uts/i86pc/os/trap.c')
-rw-r--r-- | usr/src/uts/i86pc/os/trap.c | 259 |
1 files changed, 151 insertions, 108 deletions
diff --git a/usr/src/uts/i86pc/os/trap.c b/usr/src/uts/i86pc/os/trap.c index 2d0a093435..5316babf1a 100644 --- a/usr/src/uts/i86pc/os/trap.c +++ b/usr/src/uts/i86pc/os/trap.c @@ -96,6 +96,9 @@ #include <sys/bootinfo.h> #include <sys/promif.h> #include <sys/mach_mmu.h> +#if defined(__xpv) +#include <sys/hypervisor.h> +#endif #define USER 0x10000 /* user-mode flag added to trap type */ @@ -155,20 +158,13 @@ static void dump_ttrace(void); #endif /* TRAPTRACE */ static void dumpregs(struct regs *); static void showregs(uint_t, struct regs *, caddr_t); -static void dump_tss(void); static int kern_gpfault(struct regs *); -struct trap_info { - struct regs *trap_regs; - uint_t trap_type; - caddr_t trap_addr; -}; - /*ARGSUSED*/ static int die(uint_t type, struct regs *rp, caddr_t addr, processorid_t cpuid) { - struct trap_info ti; + struct panic_trap_info ti; const char *trap_name, *trap_mnemonic; if (type < TRAP_TYPES) { @@ -493,9 +489,9 @@ trap(struct regs *rp, caddr_t addr, processorid_t cpuid) errcode &= ~PF_ERR_PROT; } else { priv_violation = (errcode & PF_ERR_USER) && - !(attr & PROT_USER); + !(attr & PROT_USER); access_violation = (errcode & PF_ERR_WRITE) && - !(attr & PROT_WRITE); + !(attr & PROT_WRITE); if (!priv_violation && !access_violation) goto cleanup; } @@ -780,7 +776,7 @@ trap(struct regs *rp, caddr_t addr, processorid_t cpuid) sz, NULL, rw)) != 0) { if (ta) { do_watch_step(vaddr, sz, rw, - watchcode, rp->r_pc); + watchcode, rp->r_pc); fault_type = F_INVAL; } else { bzero(&siginfo, sizeof (siginfo)); @@ -1089,24 +1085,32 @@ trap(struct regs *rp, caddr_t addr, processorid_t cpuid) break; case T_GPFLT: /* general protection violation */ -#if defined(__amd64) +#if defined(__amd64) || defined(__xpv) /* * On amd64, we can get a #gp from referencing addresses - * in the virtual address hole e.g. from a copyin - * or in update_sregs while updating user semgent registers. + * in the virtual address hole e.g. from a copyin or in + * update_sregs while updating user segment registers. + * + * On the 32-bit hypervisor we could also generate one in + * mfn_to_pfn by reaching around or into where the hypervisor + * lives which is protected by segmentation. */ /* * If we're under on_trap() protection (see <sys/ontrap.h>), - * set ot_trap and longjmp back to the on_trap() call site. + * set ot_trap and longjmp back to the on_trap() call site + * for OT_DATA_ACCESS or OT_SEGMENT_ACCESS. */ if (ct->t_ontrap != NULL) { - if (ct->t_ontrap->ot_prot & OT_DATA_ACCESS) - ct->t_ontrap->ot_trap |= OT_DATA_ACCESS; + int ttype = ct->t_ontrap->ot_prot & + (OT_DATA_ACCESS | OT_SEGMENT_ACCESS); - if (ct->t_ontrap->ot_prot & OT_SEGMENT_ACCESS) - ct->t_ontrap->ot_trap |= OT_SEGMENT_ACCESS; - longjmp(&curthread->t_ontrap->ot_jmpbuf); + if (ttype != 0) { + ct->t_ontrap->ot_trap |= ttype; + if (tudebug) + showregs(type, rp, (caddr_t)0); + longjmp(&curthread->t_ontrap->ot_jmpbuf); + } } /* @@ -1126,7 +1130,7 @@ trap(struct regs *rp, caddr_t addr, processorid_t cpuid) goto cleanup; } /*FALLTHROUGH*/ -#endif +#endif /* __amd64 || __xpv */ case T_SEGFLT: /* segment not present fault */ #if defined(__amd64) /* @@ -1138,6 +1142,8 @@ trap(struct regs *rp, caddr_t addr, processorid_t cpuid) if (ct->t_ontrap != NULL && ct->t_ontrap->ot_prot & OT_SEGMENT_ACCESS) { ct->t_ontrap->ot_trap |= OT_SEGMENT_ACCESS; + if (tudebug) + showregs(type, rp, (caddr_t)0); longjmp(&curthread->t_ontrap->ot_jmpbuf); } #endif /* __amd64 */ @@ -1608,11 +1614,14 @@ showregs(uint_t type, struct regs *rp, caddr_t addr) (uint_t)getcr0(), FMT_CR0, (uint_t)getcr4(), FMT_CR4); #endif /* __lint */ + printf("cr2: %lx", getcr2()); +#if !defined(__xpv) + printf("cr3: %lx", getcr3()); #if defined(__amd64) - printf("cr2: %lx cr3: %lx cr8: %lx\n", getcr2(), getcr3(), getcr8()); -#elif defined(__i386) - printf("cr2: %lx cr3: %lx\n", getcr2(), getcr3()); + printf("cr8: %lx\n", getcr8()); +#endif #endif + printf("\n"); dumpregs(rp); splx(s); @@ -1657,6 +1666,82 @@ dumpregs(struct regs *rp) } /* + * Test to see if the instruction is iret on i386 or iretq on amd64. + * + * On the hypervisor we can only test for nopop_sys_rtt_syscall. If true + * then we are in the context of hypervisor's failsafe handler because it + * tried to iret and failed due to a bad selector. See xen_failsafe_callback. + */ +static int +instr_is_iret(caddr_t pc) +{ + +#if defined(__xpv) + extern void nopop_sys_rtt_syscall(void); + return ((pc == (caddr_t)nopop_sys_rtt_syscall) ? 1 : 0); + +#else + +#if defined(__amd64) + static const uint8_t iret_insn[2] = { 0x48, 0xcf }; /* iretq */ + +#elif defined(__i386) + static const uint8_t iret_insn[1] = { 0xcf }; /* iret */ +#endif /* __i386 */ + return (bcmp(pc, iret_insn, sizeof (iret_insn)) == 0); + +#endif /* __xpv */ +} + +#if defined(__i386) + +/* + * Test to see if the instruction is part of __SEGREGS_POP + * + * Note carefully the appallingly awful dependency between + * the instruction sequence used in __SEGREGS_POP and these + * instructions encoded here. + */ +static int +instr_is_segregs_pop(caddr_t pc) +{ + static const uint8_t movw_0_esp_gs[4] = { 0x8e, 0x6c, 0x24, 0x0 }; + static const uint8_t movw_4_esp_fs[4] = { 0x8e, 0x64, 0x24, 0x4 }; + static const uint8_t movw_8_esp_es[4] = { 0x8e, 0x44, 0x24, 0x8 }; + static const uint8_t movw_c_esp_ds[4] = { 0x8e, 0x5c, 0x24, 0xc }; + + if (bcmp(pc, movw_0_esp_gs, sizeof (movw_0_esp_gs)) == 0 || + bcmp(pc, movw_4_esp_fs, sizeof (movw_4_esp_fs)) == 0 || + bcmp(pc, movw_8_esp_es, sizeof (movw_8_esp_es)) == 0 || + bcmp(pc, movw_c_esp_ds, sizeof (movw_c_esp_ds)) == 0) + return (1); + + return (0); +} + +#endif /* __i386 */ + +/* + * Test to see if the instruction is part of _sys_rtt. + * + * Again on the hypervisor if we try to IRET to user land with a bad code + * or stack selector we will get vectored through xen_failsafe_callback. + * In which case we assume we got here via _sys_rtt since we only allow + * IRET to user land to take place in _sys_rtt. + */ +static int +instr_is_sys_rtt(caddr_t pc) +{ + extern void _sys_rtt(), _sys_rtt_end(); + + if ((uintptr_t)pc < (uintptr_t)_sys_rtt || + (uintptr_t)pc > (uintptr_t)_sys_rtt_end) + return (0); + + return (1); +} + +/* * Handle #gp faults in kernel mode. * * One legitimate way this can happen is if we attempt to update segment @@ -1688,33 +1773,12 @@ kern_gpfault(struct regs *rp) caddr_t pc = (caddr_t)rp->r_pc; int v; - extern void _sys_rtt(), sr_sup(); - -#if defined(__amd64) - static const uint8_t iretq_insn[2] = { 0x48, 0xcf }; - -#elif defined(__i386) - static const uint8_t iret_insn[1] = { 0xcf }; - - /* - * Note carefully the appallingly awful dependency between - * the instruction sequence used in __SEGREGS_POP and these - * instructions encoded here. - * - * XX64 Add some commentary to locore.s/privregs.h to document this. - */ - static const uint8_t movw_0_esp_gs[4] = { 0x8e, 0x6c, 0x24, 0x0 }; - static const uint8_t movw_4_esp_fs[4] = { 0x8e, 0x64, 0x24, 0x4 }; - static const uint8_t movw_8_esp_es[4] = { 0x8e, 0x44, 0x24, 0x8 }; - static const uint8_t movw_c_esp_ds[4] = { 0x8e, 0x5c, 0x24, 0xc }; -#endif /* - * if we're not an lwp, or the pc range is outside _sys_rtt, then - * we should immediately be die()ing horribly + * if we're not an lwp, or in the case of running native the + * pc range is outside _sys_rtt, then we should immediately + * be die()ing horribly. */ - if (lwp == NULL || - (uintptr_t)pc < (uintptr_t)_sys_rtt || - (uintptr_t)pc > (uintptr_t)sr_sup) + if (lwp == NULL || !instr_is_sys_rtt(pc)) return (1); /* @@ -1725,12 +1789,9 @@ kern_gpfault(struct regs *rp) * based on the order in which the stack is deconstructed in * _sys_rtt. Ew. */ - -#if defined(__amd64) - - if (bcmp(pc, iretq_insn, sizeof (iretq_insn)) == 0) { + if (instr_is_iret(pc)) { /* - * We took the #gp while trying to perform the iretq. + * We took the #gp while trying to perform the IRET. * This means that either %cs or %ss are bad. * All we know for sure is that most of the general * registers have been restored, including the @@ -1758,58 +1819,35 @@ kern_gpfault(struct regs *rp) ASSERT(trp->r_pc == lwptoregs(lwp)->r_pc); ASSERT(trp->r_err == rp->r_err); - } -#elif defined(__i386) - if (bcmp(pc, iret_insn, sizeof (iret_insn)) == 0) { - /* - * We took the #gp while trying to perform the iret. - * This means that either %cs or %ss are bad. - * All we know for sure is that most of the general - * registers have been restored, including the - * segment registers, and all we have left on the - * topmost part of the lwp's stack are the registers that - * the iret was unable to consume. - * - * All the rest of the state was crushed by the #gp - * which pushed -its- registers atop our old save area - * (because we had to decrement the stack pointer, sigh) so - * all that we can try and do is to reconstruct the - * crushed frame from the #gp trap frame itself. - */ - trp = &tmpregs; - trp->r_ss = lwptoregs(lwp)->r_ss; - trp->r_sp = lwptoregs(lwp)->r_sp; - trp->r_ps = lwptoregs(lwp)->r_ps; - trp->r_cs = lwptoregs(lwp)->r_cs; - trp->r_pc = lwptoregs(lwp)->r_pc; - bcopy(rp, trp, offsetof(struct regs, r_pc)); + } - ASSERT(trp->r_pc == lwptoregs(lwp)->r_pc); - ASSERT(trp->r_err == rp->r_err); +#if defined(__amd64) + if (trp == NULL && lwp->lwp_pcb.pcb_rupdate != 0) { - } else { /* - * Segment registers are reloaded in _sys_rtt - * via the following sequence: - * - * movw 0(%esp), %gs - * movw 4(%esp), %fs - * movw 8(%esp), %es - * movw 12(%esp), %ds - * addl $16, %esp + * This is the common case -- we're trying to load + * a bad segment register value in the only section + * of kernel code that ever loads segment registers. * - * Thus if any of them fault, we know the user - * registers are left unharmed on the stack. + * We don't need to do anything at this point because + * the pcb contains all the pending segment register + * state, and the regs are still intact because we + * didn't adjust the stack pointer yet. Given the fidelity + * of all this, we could conceivably send a signal + * to the lwp, rather than core-ing. */ - if (bcmp(pc, movw_0_esp_gs, sizeof (movw_0_esp_gs)) == 0 || - bcmp(pc, movw_4_esp_fs, sizeof (movw_4_esp_fs)) == 0 || - bcmp(pc, movw_8_esp_es, sizeof (movw_8_esp_es)) == 0 || - bcmp(pc, movw_c_esp_ds, sizeof (movw_c_esp_ds)) == 0) - trp = lwptoregs(lwp); + trp = lwptoregs(lwp); + ASSERT((caddr_t)trp == (caddr_t)rp->r_sp); } -#endif /* __amd64 */ + +#elif defined(__i386) + + if (trp == NULL && instr_is_segregs_pop(pc)) + trp = lwptoregs(lwp); + +#endif /* __i386 */ if (trp == NULL) return (1); @@ -1831,13 +1869,14 @@ kern_gpfault(struct regs *rp) if ((caddr_t)trp != (caddr_t)lwptoregs(lwp)) bcopy(trp, lwptoregs(lwp), sizeof (*trp)); + mutex_enter(&p->p_lock); lwp->lwp_cursig = SIGSEGV; mutex_exit(&p->p_lock); /* - * Terminate all LWPs but don't discard them. If another lwp beat us to - * the punch by calling exit(), evaporate now. + * Terminate all LWPs but don't discard them. If another lwp beat + * us to the punch by calling exit(), evaporate now. */ proc_is_exiting(p); if (exitlwps(1) != 0) { @@ -1862,6 +1901,7 @@ kern_gpfault(struct regs *rp) * dump_tss() - Display the TSS structure */ +#if !defined(__xpv) #if defined(__amd64) static void @@ -1909,11 +1949,12 @@ dump_tss(void) } #endif /* __amd64 */ +#endif /* !__xpv */ #if defined(TRAPTRACE) -int ttrace_nrec = 0; /* number of records to dump out */ -int ttrace_dump_nregs = 5; /* dump out this many records with regs too */ +int ttrace_nrec = 10; /* number of records to dump out */ +int ttrace_dump_nregs = 0; /* dump out this many records with regs too */ /* * Dump out the last ttrace_nrec traptrace records on each CPU @@ -1928,12 +1969,12 @@ dump_ttrace(void) int n = NCPU; #if defined(__amd64) const char banner[] = - "\ncpu address timestamp " - "type vc handler pc\n"; + "\ncpu address timestamp " + "type vc handler pc\n"; const char fmt1[] = "%3d %016lx %12llx "; #elif defined(__i386) const char banner[] = - "\ncpu address timestamp type vc handler pc\n"; + "\ncpu address timestamp type vc handler pc\n"; const char fmt1[] = "%3d %08lx %12llx "; #endif const char fmt2[] = "%4s %3x "; @@ -2139,20 +2180,22 @@ trap_trace_get_traceptr(uint8_t marker, ulong_t pc, ulong_t sp) #endif /* TRAPTRACE */ void -panic_showtrap(struct trap_info *tip) +panic_showtrap(struct panic_trap_info *tip) { showregs(tip->trap_type, tip->trap_regs, tip->trap_addr); #if defined(TRAPTRACE) dump_ttrace(); -#endif /* TRAPTRACE */ +#endif +#if !defined(__xpv) if (tip->trap_type == T_DBLFLT) dump_tss(); +#endif } void -panic_savetrap(panic_data_t *pdp, struct trap_info *tip) +panic_savetrap(panic_data_t *pdp, struct panic_trap_info *tip) { panic_saveregs(pdp, tip->trap_regs); } |