summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/os/panic.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/os/panic.c')
-rw-r--r--usr/src/uts/common/os/panic.c398
1 files changed, 398 insertions, 0 deletions
diff --git a/usr/src/uts/common/os/panic.c b/usr/src/uts/common/os/panic.c
new file mode 100644
index 0000000000..267812b627
--- /dev/null
+++ b/usr/src/uts/common/os/panic.c
@@ -0,0 +1,398 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * When the operating system detects that it is in an invalid state, a panic
+ * is initiated in order to minimize potential damage to user data and to
+ * facilitate debugging. There are three major tasks to be performed in
+ * a system panic: recording information about the panic in memory (and thus
+ * making it part of the crash dump), synchronizing the file systems to
+ * preserve user file data, and generating the crash dump. We define the
+ * system to be in one of four states with respect to the panic code:
+ *
+ * CALM - the state of the system prior to any thread initiating a panic
+ *
+ * QUIESCE - the state of the system when the first thread to initiate
+ * a system panic records information about the cause of the panic
+ * and renders the system quiescent by stopping other processors
+ *
+ * SYNC - the state of the system when we synchronize the file systems
+ * DUMP - the state when we generate the crash dump.
+ *
+ * The transitions between these states are irreversible: once we begin
+ * panicking, we only make one attempt to perform the actions associated with
+ * each state.
+ *
+ * The panic code itself must be re-entrant because actions taken during any
+ * state may lead to another system panic. Additionally, any Solaris
+ * thread may initiate a panic at any time, and so we must have synchronization
+ * between threads which attempt to initiate a state transition simultaneously.
+ * The panic code makes use of a special locking primitive, a trigger, to
+ * perform this synchronization. A trigger is simply a word which is set
+ * atomically and can only be set once. We declare three triggers, one for
+ * each transition between the four states. When a thread enters the panic
+ * code it attempts to set each trigger; if it fails it moves on to the
+ * next trigger. A special case is the first trigger: if two threads race
+ * to perform the transition to QUIESCE, the losing thread may execute before
+ * the winner has a chance to stop its CPU. To solve this problem, we have
+ * the loser look ahead to see if any other triggers are set; if not, it
+ * presumes a panic is underway and simply spins. Unfortunately, since we
+ * are panicking, it is not possible to know this with absolute certainty.
+ *
+ * There are two common reasons for re-entering the panic code once a panic
+ * has been initiated: (1) after we debug_enter() at the end of QUIESCE,
+ * the operator may type "sync" instead of "go", and the PROM's sync callback
+ * routine will invoke panic(); (2) if the clock routine decides that sync
+ * or dump is not making progress, it will invoke panic() to force a timeout.
+ * The design assumes that a third possibility, another thread causing an
+ * unrelated panic while sync or dump is still underway, is extremely unlikely.
+ * If this situation occurs, we may end up triggering dump while sync is
+ * still in progress. This third case is considered extremely unlikely because
+ * all other CPUs are stopped and low-level interrupts have been blocked.
+ *
+ * The panic code is entered via a call directly to the vpanic() function,
+ * or its varargs wrappers panic() and cmn_err(9F). The vpanic routine
+ * is implemented in assembly language to record the current machine
+ * registers, attempt to set the trigger for the QUIESCE state, and
+ * if successful, switch stacks on to the panic_stack before calling into
+ * the common panicsys() routine. The first thread to initiate a panic
+ * is allowed to make use of the reserved panic_stack so that executing
+ * the panic code itself does not overwrite valuable data on that thread's
+ * stack *ahead* of the current stack pointer. This data will be preserved
+ * in the crash dump and may prove invaluable in determining what this
+ * thread has previously been doing. The first thread, saved in panic_thread,
+ * is also responsible for stopping the other CPUs as quickly as possible,
+ * and then setting the various panic_* variables. Most important among
+ * these is panicstr, which allows threads to subsequently bypass held
+ * locks so that we can proceed without ever blocking. We must stop the
+ * other CPUs *prior* to setting panicstr in case threads running there are
+ * currently spinning to acquire a lock; we want that state to be preserved.
+ * Every thread which initiates a panic has its T_PANIC flag set so we can
+ * identify all such threads in the crash dump.
+ *
+ * The panic_thread is also allowed to make use of the special memory buffer
+ * panicbuf, which on machines with appropriate hardware is preserved across
+ * reboots. We allow the panic_thread to store its register set and panic
+ * message in this buffer, so even if we fail to obtain a crash dump we will
+ * be able to examine the machine after reboot and determine some of the
+ * state at the time of the panic. If we do get a dump, the panic buffer
+ * data is structured so that a debugger can easily consume the information
+ * therein (see <sys/panic.h>).
+ *
+ * Each platform or architecture is required to implement the functions
+ * panic_savetrap() to record trap-specific information to panicbuf,
+ * panic_saveregs() to record a register set to panicbuf, panic_stopcpus()
+ * to halt all CPUs but the panicking CPU, panic_quiesce_hw() to perform
+ * miscellaneous platform-specific tasks *after* panicstr is set,
+ * panic_showtrap() to print trap-specific information to the console,
+ * and panic_dump_hw() to perform platform tasks prior to calling dumpsys().
+ *
+ * A Note on Word Formation, courtesy of the Oxford Guide to English Usage:
+ *
+ * Words ending in -c interpose k before suffixes which otherwise would
+ * indicate a soft c, and thus the verb and adjective forms of 'panic' are
+ * spelled "panicked", "panicking", and "panicky" respectively. Use of
+ * the ill-conceived "panicing" and "panic'd" is discouraged.
+ */
+
+#include <sys/types.h>
+#include <sys/varargs.h>
+#include <sys/sysmacros.h>
+#include <sys/cmn_err.h>
+#include <sys/cpuvar.h>
+#include <sys/thread.h>
+#include <sys/t_lock.h>
+#include <sys/cred.h>
+#include <sys/systm.h>
+#include <sys/uadmin.h>
+#include <sys/callb.h>
+#include <sys/vfs.h>
+#include <sys/log.h>
+#include <sys/disp.h>
+#include <sys/param.h>
+#include <sys/dumphdr.h>
+#include <sys/ftrace.h>
+#include <sys/reboot.h>
+#include <sys/debug.h>
+#include <sys/stack.h>
+#include <sys/spl.h>
+#include <sys/errorq.h>
+#include <sys/panic.h>
+
+/*
+ * Panic variables which are set once during the QUIESCE state by the
+ * first thread to initiate a panic. These are examined by post-mortem
+ * debugging tools; the inconsistent use of 'panic' versus 'panic_' in
+ * the variable naming is historical and allows legacy tools to work.
+ */
+#pragma align STACK_ALIGN(panic_stack)
+char panic_stack[PANICSTKSIZE]; /* reserved stack for panic_thread */
+kthread_t *panic_thread; /* first thread to call panicsys() */
+cpu_t panic_cpu; /* cpu from first call to panicsys() */
+label_t panic_regs; /* setjmp label from panic_thread */
+struct regs *panic_reg; /* regs struct from first panicsys() */
+char *volatile panicstr; /* format string to first panicsys() */
+va_list panicargs; /* arguments to first panicsys() */
+clock_t panic_lbolt; /* lbolt at time of panic */
+int64_t panic_lbolt64; /* lbolt64 at time of panic */
+hrtime_t panic_hrtime; /* hrtime at time of panic */
+timespec_t panic_hrestime; /* hrestime at time of panic */
+int panic_ipl; /* ipl on panic_cpu at time of panic */
+ushort_t panic_schedflag; /* t_schedflag for panic_thread */
+cpu_t *panic_bound_cpu; /* t_bound_cpu for panic_thread */
+char panic_preempt; /* t_preempt for panic_thread */
+
+/*
+ * Panic variables which can be set via /etc/system or patched while
+ * the system is in operation. Again, the stupid names are historic.
+ */
+char *panic_bootstr = NULL; /* mdboot string to use after panic */
+int panic_bootfcn = AD_BOOT; /* mdboot function to use after panic */
+int halt_on_panic = 0; /* halt after dump instead of reboot? */
+int nopanicdebug = 0; /* reboot instead of call debugger? */
+int in_sync = 0; /* skip vfs_syncall() and just dump? */
+
+/*
+ * The do_polled_io flag is set by the panic code to inform the SCSI subsystem
+ * to use polled mode instead of interrupt-driven i/o.
+ */
+int do_polled_io = 0;
+
+/*
+ * The panic_forced flag is set by the uadmin A_DUMP code to inform the
+ * panic subsystem that it should not attempt an initial debug_enter.
+ */
+int panic_forced = 0;
+
+/*
+ * Triggers for panic state transitions:
+ */
+int panic_quiesce; /* trigger for CALM -> QUIESCE */
+int panic_sync; /* trigger for QUIESCE -> SYNC */
+int panic_dump; /* trigger for SYNC -> DUMP */
+
+void
+panicsys(const char *format, va_list alist, struct regs *rp, int on_panic_stack)
+{
+ int s = spl8();
+ kthread_t *t = curthread;
+ cpu_t *cp = CPU;
+
+ caddr_t intr_stack = NULL;
+ uint_t intr_actv;
+
+ ushort_t schedflag = t->t_schedflag;
+ cpu_t *bound_cpu = t->t_bound_cpu;
+ char preempt = t->t_preempt;
+
+ (void) setjmp(&t->t_pcb);
+ t->t_flag |= T_PANIC;
+
+ t->t_schedflag |= TS_DONT_SWAP;
+ t->t_bound_cpu = cp;
+ t->t_preempt++;
+
+ panic_enter_hw(s);
+
+ /*
+ * If we're on the interrupt stack and an interrupt thread is available
+ * in this CPU's pool, preserve the interrupt stack by detaching an
+ * interrupt thread and making its stack the intr_stack.
+ */
+ if (CPU_ON_INTR(cp) && cp->cpu_intr_thread != NULL) {
+ kthread_t *it = cp->cpu_intr_thread;
+
+ intr_stack = cp->cpu_intr_stack;
+ intr_actv = cp->cpu_intr_actv;
+
+ cp->cpu_intr_stack = thread_stk_init(it->t_stk);
+ cp->cpu_intr_thread = it->t_link;
+
+ /*
+ * Clear only the high level bits of cpu_intr_actv.
+ * We want to indicate that high-level interrupts are
+ * not active without destroying the low-level interrupt
+ * information stored there.
+ */
+ cp->cpu_intr_actv &= ((1 << (LOCK_LEVEL + 1)) - 1);
+ }
+
+ /*
+ * Record one-time panic information and quiesce the other CPUs.
+ * Then print out the panic message and stack trace.
+ */
+ if (on_panic_stack) {
+ panic_data_t *pdp = (panic_data_t *)panicbuf;
+
+ pdp->pd_version = PANICBUFVERS;
+ pdp->pd_msgoff = sizeof (panic_data_t) - sizeof (panic_nv_t);
+
+ if (t->t_panic_trap != NULL)
+ panic_savetrap(pdp, t->t_panic_trap);
+ else
+ panic_saveregs(pdp, rp);
+
+ (void) vsnprintf(&panicbuf[pdp->pd_msgoff],
+ PANICBUFSIZE - pdp->pd_msgoff, format, alist);
+
+ /*
+ * Call into the platform code to stop the other CPUs.
+ * We currently have all interrupts blocked, and expect that
+ * the platform code will lower ipl only as far as needed to
+ * perform cross-calls, and will acquire as *few* locks as is
+ * possible -- panicstr is not set so we can still deadlock.
+ */
+ panic_stopcpus(cp, t, s);
+
+ panicstr = (char *)format;
+ va_copy(panicargs, alist);
+ panic_lbolt = lbolt;
+ panic_lbolt64 = lbolt64;
+ panic_hrestime = hrestime;
+ panic_hrtime = gethrtime_waitfree();
+ panic_thread = t;
+ panic_regs = t->t_pcb;
+ panic_reg = rp;
+ panic_cpu = *cp;
+ panic_ipl = spltoipl(s);
+ panic_schedflag = schedflag;
+ panic_bound_cpu = bound_cpu;
+ panic_preempt = preempt;
+
+ if (intr_stack != NULL) {
+ panic_cpu.cpu_intr_stack = intr_stack;
+ panic_cpu.cpu_intr_actv = intr_actv;
+ }
+
+ /*
+ * Lower ipl to 10 to keep clock() from running, but allow
+ * keyboard interrupts to enter the debugger. These callbacks
+ * are executed with panicstr set so they can bypass locks.
+ */
+ splx(ipltospl(CLOCK_LEVEL));
+ panic_quiesce_hw(pdp);
+ (void) FTRACE_STOP();
+ (void) callb_execute_class(CB_CL_PANIC, NULL);
+
+ if (log_intrq != NULL)
+ log_flushq(log_intrq);
+
+ /*
+ * If log_consq has been initialized and syslogd has started,
+ * print any messages in log_consq that haven't been consumed.
+ */
+ if (log_consq != NULL && log_consq != log_backlogq)
+ log_printq(log_consq);
+
+ fm_banner();
+ errorq_panic();
+
+ printf("\n\rpanic[cpu%d]/thread=%p: ", cp->cpu_id, (void *)t);
+ vprintf(format, alist);
+ printf("\n\n");
+
+ if (t->t_panic_trap != NULL) {
+ panic_showtrap(t->t_panic_trap);
+ printf("\n");
+ }
+
+ traceregs(rp);
+ printf("\n");
+
+ if (((boothowto & RB_DEBUG) || obpdebug) &&
+ !nopanicdebug && !panic_forced) {
+ if (dumpvp != NULL) {
+ debug_enter("panic: entering debugger "
+ "(continue to save dump)");
+ } else {
+ debug_enter("panic: entering debugger "
+ "(no dump device, continue to reboot)");
+ }
+ }
+
+ } else if (panic_dump != 0 || panic_sync != 0 || panicstr != NULL) {
+ printf("\n\rpanic[cpu%d]/thread=%p: ", cp->cpu_id, (void *)t);
+ vprintf(format, alist);
+ printf("\n");
+ } else
+ goto spin;
+
+ /*
+ * Prior to performing sync or dump, we make sure that do_polled_io is
+ * set, but we'll leave ipl at 10; deadman(), a CY_HIGH_LEVEL cyclic,
+ * will re-enter panic if we are not making progress with sync or dump.
+ */
+
+ /*
+ * Sync the filesystems. Reset t_cred if not set because much of
+ * the filesystem code depends on CRED() being valid.
+ */
+ if (!in_sync && panic_trigger(&panic_sync)) {
+ if (t->t_cred == NULL)
+ t->t_cred = kcred;
+ splx(ipltospl(CLOCK_LEVEL));
+ do_polled_io = 1;
+ vfs_syncall();
+ }
+
+ /*
+ * Take the crash dump. If the dump trigger is already set, try to
+ * enter the debugger again before rebooting the system.
+ */
+ if (panic_trigger(&panic_dump)) {
+ panic_dump_hw(s);
+ splx(ipltospl(CLOCK_LEVEL));
+ do_polled_io = 1;
+ dumpsys();
+ } else if (((boothowto & RB_DEBUG) || obpdebug) && !nopanicdebug) {
+ debug_enter("panic: entering debugger (continue to reboot)");
+ } else
+ printf("dump aborted: please record the above information!\n");
+
+ if (halt_on_panic)
+ mdboot(A_REBOOT, AD_HALT, NULL);
+ else
+ mdboot(A_REBOOT, panic_bootfcn, panic_bootstr);
+spin:
+ /*
+ * Restore ipl to at most CLOCK_LEVEL so we don't end up spinning
+ * and unable to jump into the debugger.
+ */
+ splx(MIN(s, ipltospl(CLOCK_LEVEL)));
+ for (;;);
+}
+
+void
+panic(const char *format, ...)
+{
+ va_list alist;
+
+ va_start(alist, format);
+ vpanic(format, alist);
+ va_end(alist);
+}