summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/os/panic.c
blob: 87910574f5837210fdf064af94f02bc41981d696 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License, Version 1.0 only
 * (the "License").  You may not use this file except in compliance
 * with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#pragma ident	"%Z%%M%	%I%	%E% SMI"

/*
 * When the operating system detects that it is in an invalid state, a panic
 * is initiated in order to minimize potential damage to user data and to
 * facilitate debugging.  There are three major tasks to be performed in
 * a system panic: recording information about the panic in memory (and thus
 * making it part of the crash dump), synchronizing the file systems to
 * preserve user file data, and generating the crash dump.  We define the
 * system to be in one of four states with respect to the panic code:
 *
 * CALM    - the state of the system prior to any thread initiating a panic
 *
 * QUIESCE - the state of the system when the first thread to initiate
 *           a system panic records information about the cause of the panic
 *           and renders the system quiescent by stopping other processors
 *
 * SYNC    - the state of the system when we synchronize the file systems
 * DUMP    - the state when we generate the crash dump.
 *
 * The transitions between these states are irreversible: once we begin
 * panicking, we only make one attempt to perform the actions associated with
 * each state.
 *
 * The panic code itself must be re-entrant because actions taken during any
 * state may lead to another system panic.  Additionally, any Solaris
 * thread may initiate a panic at any time, and so we must have synchronization
 * between threads which attempt to initiate a state transition simultaneously.
 * The panic code makes use of a special locking primitive, a trigger, to
 * perform this synchronization.  A trigger is simply a word which is set
 * atomically and can only be set once.  We declare three triggers, one for
 * each transition between the four states.  When a thread enters the panic
 * code it attempts to set each trigger; if it fails it moves on to the
 * next trigger.  A special case is the first trigger: if two threads race
 * to perform the transition to QUIESCE, the losing thread may execute before
 * the winner has a chance to stop its CPU.  To solve this problem, we have
 * the loser look ahead to see if any other triggers are set; if not, it
 * presumes a panic is underway and simply spins.  Unfortunately, since we
 * are panicking, it is not possible to know this with absolute certainty.
 *
 * There are two common reasons for re-entering the panic code once a panic
 * has been initiated: (1) after we debug_enter() at the end of QUIESCE,
 * the operator may type "sync" instead of "go", and the PROM's sync callback
 * routine will invoke panic(); (2) if the clock routine decides that sync
 * or dump is not making progress, it will invoke panic() to force a timeout.
 * The design assumes that a third possibility, another thread causing an
 * unrelated panic while sync or dump is still underway, is extremely unlikely.
 * If this situation occurs, we may end up triggering dump while sync is
 * still in progress.  This third case is considered extremely unlikely because
 * all other CPUs are stopped and low-level interrupts have been blocked.
 *
 * The panic code is entered via a call directly to the vpanic() function,
 * or its varargs wrappers panic() and cmn_err(9F).  The vpanic routine
 * is implemented in assembly language to record the current machine
 * registers, attempt to set the trigger for the QUIESCE state, and
 * if successful, switch stacks on to the panic_stack before calling into
 * the common panicsys() routine.  The first thread to initiate a panic
 * is allowed to make use of the reserved panic_stack so that executing
 * the panic code itself does not overwrite valuable data on that thread's
 * stack *ahead* of the current stack pointer.  This data will be preserved
 * in the crash dump and may prove invaluable in determining what this
 * thread has previously been doing.  The first thread, saved in panic_thread,
 * is also responsible for stopping the other CPUs as quickly as possible,
 * and then setting the various panic_* variables.  Most important among
 * these is panicstr, which allows threads to subsequently bypass held
 * locks so that we can proceed without ever blocking.  We must stop the
 * other CPUs *prior* to setting panicstr in case threads running there are
 * currently spinning to acquire a lock; we want that state to be preserved.
 * Every thread which initiates a panic has its T_PANIC flag set so we can
 * identify all such threads in the crash dump.
 *
 * The panic_thread is also allowed to make use of the special memory buffer
 * panicbuf, which on machines with appropriate hardware is preserved across
 * reboots.  We allow the panic_thread to store its register set and panic
 * message in this buffer, so even if we fail to obtain a crash dump we will
 * be able to examine the machine after reboot and determine some of the
 * state at the time of the panic.  If we do get a dump, the panic buffer
 * data is structured so that a debugger can easily consume the information
 * therein (see <sys/panic.h>).
 *
 * Each platform or architecture is required to implement the functions
 * panic_savetrap() to record trap-specific information to panicbuf,
 * panic_saveregs() to record a register set to panicbuf, panic_stopcpus()
 * to halt all CPUs but the panicking CPU, panic_quiesce_hw() to perform
 * miscellaneous platform-specific tasks *after* panicstr is set,
 * panic_showtrap() to print trap-specific information to the console,
 * and panic_dump_hw() to perform platform tasks prior to calling dumpsys().
 *
 * A Note on Word Formation, courtesy of the Oxford Guide to English Usage:
 *
 * Words ending in -c interpose k before suffixes which otherwise would
 * indicate a soft c, and thus the verb and adjective forms of 'panic' are
 * spelled "panicked", "panicking", and "panicky" respectively.  Use of
 * the ill-conceived "panicing" and "panic'd" is discouraged.
 */

#include <sys/types.h>
#include <sys/varargs.h>
#include <sys/sysmacros.h>
#include <sys/cmn_err.h>
#include <sys/cpuvar.h>
#include <sys/thread.h>
#include <sys/t_lock.h>
#include <sys/cred.h>
#include <sys/systm.h>
#include <sys/uadmin.h>
#include <sys/callb.h>
#include <sys/vfs.h>
#include <sys/log.h>
#include <sys/disp.h>
#include <sys/param.h>
#include <sys/dumphdr.h>
#include <sys/ftrace.h>
#include <sys/reboot.h>
#include <sys/debug.h>
#include <sys/stack.h>
#include <sys/spl.h>
#include <sys/errorq.h>
#include <sys/panic.h>
#include <sys/fm/util.h>

/*
 * Panic variables which are set once during the QUIESCE state by the
 * first thread to initiate a panic.  These are examined by post-mortem
 * debugging tools; the inconsistent use of 'panic' versus 'panic_' in
 * the variable naming is historical and allows legacy tools to work.
 */
#pragma align STACK_ALIGN(panic_stack)
char panic_stack[PANICSTKSIZE];		/* reserved stack for panic_thread */
kthread_t *panic_thread;		/* first thread to call panicsys() */
cpu_t panic_cpu;			/* cpu from first call to panicsys() */
label_t panic_regs;			/* setjmp label from panic_thread */
struct regs *panic_reg;			/* regs struct from first panicsys() */
char *volatile panicstr;		/* format string to first panicsys() */
va_list panicargs;			/* arguments to first panicsys() */
clock_t panic_lbolt;			/* lbolt at time of panic */
int64_t panic_lbolt64;			/* lbolt64 at time of panic */
hrtime_t panic_hrtime;			/* hrtime at time of panic */
timespec_t panic_hrestime;		/* hrestime at time of panic */
int panic_ipl;				/* ipl on panic_cpu at time of panic */
ushort_t panic_schedflag;		/* t_schedflag for panic_thread */
cpu_t *panic_bound_cpu;			/* t_bound_cpu for panic_thread */
char panic_preempt;			/* t_preempt for panic_thread */

/*
 * Panic variables which can be set via /etc/system or patched while
 * the system is in operation.  Again, the stupid names are historic.
 */
char *panic_bootstr = NULL;		/* mdboot string to use after panic */
int panic_bootfcn = AD_BOOT;		/* mdboot function to use after panic */
int halt_on_panic = 0;  		/* halt after dump instead of reboot? */
int nopanicdebug = 0;			/* reboot instead of call debugger? */
int in_sync = 0;			/* skip vfs_syncall() and just dump? */

/*
 * The do_polled_io flag is set by the panic code to inform the SCSI subsystem
 * to use polled mode instead of interrupt-driven i/o.
 */
int do_polled_io = 0;

/*
 * The panic_forced flag is set by the uadmin A_DUMP code to inform the
 * panic subsystem that it should not attempt an initial debug_enter.
 */
int panic_forced = 0;

/*
 * Triggers for panic state transitions:
 */
int panic_quiesce;			/* trigger for CALM    -> QUIESCE */
int panic_sync;				/* trigger for QUIESCE -> SYNC */
int panic_dump;				/* trigger for SYNC    -> DUMP */

void
panicsys(const char *format, va_list alist, struct regs *rp, int on_panic_stack)
{
	int s = spl8();
	kthread_t *t = curthread;
	cpu_t *cp = CPU;

	caddr_t intr_stack = NULL;
	uint_t intr_actv;

	ushort_t schedflag = t->t_schedflag;
	cpu_t *bound_cpu = t->t_bound_cpu;
	char preempt = t->t_preempt;

	(void) setjmp(&t->t_pcb);
	t->t_flag |= T_PANIC;

	t->t_schedflag |= TS_DONT_SWAP;
	t->t_bound_cpu = cp;
	t->t_preempt++;

	panic_enter_hw(s);

	/*
	 * If we're on the interrupt stack and an interrupt thread is available
	 * in this CPU's pool, preserve the interrupt stack by detaching an
	 * interrupt thread and making its stack the intr_stack.
	 */
	if (CPU_ON_INTR(cp) && cp->cpu_intr_thread != NULL) {
		kthread_t *it = cp->cpu_intr_thread;

		intr_stack = cp->cpu_intr_stack;
		intr_actv = cp->cpu_intr_actv;

		cp->cpu_intr_stack = thread_stk_init(it->t_stk);
		cp->cpu_intr_thread = it->t_link;

		/*
		 * Clear only the high level bits of cpu_intr_actv.
		 * We want to indicate that high-level interrupts are
		 * not active without destroying the low-level interrupt
		 * information stored there.
		 */
		cp->cpu_intr_actv &= ((1 << (LOCK_LEVEL + 1)) - 1);
	}

	/*
	 * Record one-time panic information and quiesce the other CPUs.
	 * Then print out the panic message and stack trace.
	 */
	if (on_panic_stack) {
		panic_data_t *pdp = (panic_data_t *)panicbuf;

		pdp->pd_version = PANICBUFVERS;
		pdp->pd_msgoff = sizeof (panic_data_t) - sizeof (panic_nv_t);

		if (t->t_panic_trap != NULL)
			panic_savetrap(pdp, t->t_panic_trap);
		else
			panic_saveregs(pdp, rp);

		(void) vsnprintf(&panicbuf[pdp->pd_msgoff],
		    PANICBUFSIZE - pdp->pd_msgoff, format, alist);

		/*
		 * Call into the platform code to stop the other CPUs.
		 * We currently have all interrupts blocked, and expect that
		 * the platform code will lower ipl only as far as needed to
		 * perform cross-calls, and will acquire as *few* locks as is
		 * possible -- panicstr is not set so we can still deadlock.
		 */
		panic_stopcpus(cp, t, s);

		panicstr = (char *)format;
		va_copy(panicargs, alist);
		panic_lbolt = lbolt;
		panic_lbolt64 = lbolt64;
		panic_hrestime = hrestime;
		panic_hrtime = gethrtime_waitfree();
		panic_thread = t;
		panic_regs = t->t_pcb;
		panic_reg = rp;
		panic_cpu = *cp;
		panic_ipl = spltoipl(s);
		panic_schedflag = schedflag;
		panic_bound_cpu = bound_cpu;
		panic_preempt = preempt;

		if (intr_stack != NULL) {
			panic_cpu.cpu_intr_stack = intr_stack;
			panic_cpu.cpu_intr_actv = intr_actv;
		}

		/*
		 * Lower ipl to 10 to keep clock() from running, but allow
		 * keyboard interrupts to enter the debugger.  These callbacks
		 * are executed with panicstr set so they can bypass locks.
		 */
		splx(ipltospl(CLOCK_LEVEL));
		panic_quiesce_hw(pdp);
		(void) FTRACE_STOP();
		(void) callb_execute_class(CB_CL_PANIC, NULL);

		if (log_intrq != NULL)
			log_flushq(log_intrq);

		/*
		 * If log_consq has been initialized and syslogd has started,
		 * print any messages in log_consq that haven't been consumed.
		 */
		if (log_consq != NULL && log_consq != log_backlogq)
			log_printq(log_consq);

		fm_banner();
		errorq_panic();

		printf("\n\rpanic[cpu%d]/thread=%p: ", cp->cpu_id, (void *)t);
		vprintf(format, alist);
		printf("\n\n");

		if (t->t_panic_trap != NULL) {
			panic_showtrap(t->t_panic_trap);
			printf("\n");
		}

		traceregs(rp);
		printf("\n");

		if (((boothowto & RB_DEBUG) || obpdebug) &&
		    !nopanicdebug && !panic_forced) {
			if (dumpvp != NULL) {
				debug_enter("panic: entering debugger "
				    "(continue to save dump)");
			} else {
				debug_enter("panic: entering debugger "
				    "(no dump device, continue to reboot)");
			}
		}

	} else if (panic_dump != 0 || panic_sync != 0 || panicstr != NULL) {
		printf("\n\rpanic[cpu%d]/thread=%p: ", cp->cpu_id, (void *)t);
		vprintf(format, alist);
		printf("\n");
	} else
		goto spin;

	/*
	 * Prior to performing sync or dump, we make sure that do_polled_io is
	 * set, but we'll leave ipl at 10; deadman(), a CY_HIGH_LEVEL cyclic,
	 * will re-enter panic if we are not making progress with sync or dump.
	 */

	/*
	 * Sync the filesystems.  Reset t_cred if not set because much of
	 * the filesystem code depends on CRED() being valid.
	 */
	if (!in_sync && panic_trigger(&panic_sync)) {
		if (t->t_cred == NULL)
			t->t_cred = kcred;
		splx(ipltospl(CLOCK_LEVEL));
		do_polled_io = 1;
		vfs_syncall();
	}

	/*
	 * Take the crash dump.  If the dump trigger is already set, try to
	 * enter the debugger again before rebooting the system.
	 */
	if (panic_trigger(&panic_dump)) {
		panic_dump_hw(s);
		splx(ipltospl(CLOCK_LEVEL));
		do_polled_io = 1;
		dumpsys();
	} else if (((boothowto & RB_DEBUG) || obpdebug) && !nopanicdebug) {
		debug_enter("panic: entering debugger (continue to reboot)");
	} else
		printf("dump aborted: please record the above information!\n");

	if (halt_on_panic)
		mdboot(A_REBOOT, AD_HALT, NULL, B_FALSE);
	else
		mdboot(A_REBOOT, panic_bootfcn, panic_bootstr, B_FALSE);
spin:
	/*
	 * Restore ipl to at most CLOCK_LEVEL so we don't end up spinning
	 * and unable to jump into the debugger.
	 */
	splx(MIN(s, ipltospl(CLOCK_LEVEL)));
	for (;;);
}

void
panic(const char *format, ...)
{
	va_list alist;

	va_start(alist, format);
	vpanic(format, alist);
	va_end(alist);
}