summaryrefslogtreecommitdiff
path: root/usr/src/cmd/mdb/common/kmdb/kaif_start.c
blob: 17f136b3078a97b81f071974b882e81480ffc84f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#pragma ident	"%Z%%M%	%I%	%E% SMI"

/*
 * The main CPU-control loops, used to control masters and slaves.
 */

#include <sys/types.h>

#include <kmdb/kaif.h>
#include <kmdb/kaif_start.h>
#include <kmdb/kmdb_asmutil.h>
#include <kmdb/kmdb_dpi_impl.h>
#include <kmdb/kmdb_kdi.h>

#define	KAIF_SLAVE_CMD_SPIN	0
#define	KAIF_SLAVE_CMD_SWITCH	1
#define	KAIF_SLAVE_CMD_RESUME	2
#define	KAIF_SLAVE_CMD_FLUSH	3
#define	KAIF_SLAVE_CMD_REBOOT	4
#if defined(__sparc)
#define	KAIF_SLAVE_CMD_ACK	5
#endif


/*
 * Used to synchronize attempts to set kaif_master_cpuid.  kaif_master_cpuid may
 * be read without kaif_master_lock, and may be written by the current master
 * CPU.
 */
int kaif_master_cpuid = KAIF_MASTER_CPUID_UNSET;
static uintptr_t kaif_master_lock = 0;

/*
 * Used to ensure that all CPUs leave the debugger together. kaif_loop_lock must
 * be held to write kaif_looping, but need not be held to read it.
 */
static volatile uint_t kaif_looping;
static uintptr_t kaif_loop_lock;

static volatile int kaif_slave_cmd;
static volatile int kaif_slave_tgt;	/* target cpuid for CMD_SWITCH */

static void
kaif_lock_enter(uintptr_t *lock)
{
	while (cas(lock, 0, 1) != 0)
		continue;
	membar_producer();
}

static void
kaif_lock_exit(uintptr_t *lock)
{
	*lock = 0;
	membar_producer();
}

static int
kaif_master_loop(kaif_cpusave_t *cpusave)
{
	int notflushed, i;

#if defined(__sparc)
	kaif_prom_rearm();
#endif
	kaif_trap_set_debugger();

master_loop:
	switch (kmdb_dpi_reenter()) {
	case KMDB_DPI_CMD_SWITCH_CPU:
		/*
		 * We assume that the target CPU is a valid slave.  There's no
		 * easy way to complain here, so we'll assume that the caller
		 * has done the proper checking.
		 */
		if (kmdb_dpi_switch_target == cpusave->krs_cpu_id)
			break;

		kaif_slave_tgt = kaif_master_cpuid = kmdb_dpi_switch_target;
		cpusave->krs_cpu_state = KAIF_CPU_STATE_SLAVE;
		membar_producer();

		/*
		 * Switch back to the saved trap table before we switch CPUs --
		 * we need to make sure that only one CPU is on the debugger's
		 * table at a time.
		 */
		kaif_trap_set_saved(cpusave);

		kaif_slave_cmd = KAIF_SLAVE_CMD_SWITCH;

		/* The new master is now awake */
		return (KAIF_CPU_CMD_SWITCH);

	case KMDB_DPI_CMD_RESUME_ALL:
	case KMDB_DPI_CMD_RESUME_UNLOAD:
		/*
		 * Resume everyone, clean up for next entry.
		 */
		kaif_master_cpuid = KAIF_MASTER_CPUID_UNSET;
		membar_producer();
		kaif_slave_cmd = KAIF_SLAVE_CMD_RESUME;

		if (kmdb_dpi_work_required())
			kmdb_dpi_wrintr_fire();

		kaif_trap_set_saved(cpusave);

		return (KAIF_CPU_CMD_RESUME);

	case KMDB_DPI_CMD_RESUME_MASTER:
		/*
		 * Single-CPU resume, which is performed on the debugger's
		 * trap table (so no need to switch back).
		 */
		return (KAIF_CPU_CMD_RESUME_MASTER);

	case KMDB_DPI_CMD_FLUSH_CACHES:
		kaif_slave_cmd = KAIF_SLAVE_CMD_FLUSH;

		/*
		 * Wait for the other cpus to finish flushing their caches.
		 */
		do {
			notflushed = 0;
			for (i = 0; i < kaif_ncpusave; i++) {
				kaif_cpusave_t *save = &kaif_cpusave[i];

				if (save->krs_cpu_state ==
				    KAIF_CPU_STATE_SLAVE &&
				    !save->krs_cpu_flushed) {
					notflushed++;
					break;
				}
			}
		} while (notflushed > 0);

		kaif_slave_cmd = KAIF_SLAVE_CMD_SPIN;
		break;

#if defined(__i386) || defined(__amd64)
	case KMDB_DPI_CMD_REBOOT:
		/*
		 * Reboot must be initiated by CPU 0.  I could ask why, but I'm
		 * afraid that I don't want to know the answer.
		 */
		if (cpusave->krs_cpu_id == 0)
			return (KAIF_CPU_CMD_REBOOT);

		kaif_slave_cmd = KAIF_SLAVE_CMD_REBOOT;

		/*
		 * Spin forever, waiting for CPU 0 (apparently a slave) to
		 * reboot the system.
		 */
		for (;;)
			continue;

		/*NOTREACHED*/
		break;
#endif
	}

	goto master_loop;
}

static int
kaif_slave_loop(kaif_cpusave_t *cpusave)
{
	int slavecmd, rv;

#if defined(__sparc)
	/*
	 * If the user elects to drop to OBP from the debugger, some OBP
	 * implementations will cross-call the slaves.  We have to turn
	 * IE back on so we can receive the cross-calls.  If we don't,
	 * some OBP implementations will wait forever.
	 */
	interrupts_on();
#endif

	/* Wait for duty to call */
	for (;;) {
		slavecmd = kaif_slave_cmd;

		if (slavecmd == KAIF_SLAVE_CMD_SWITCH &&
		    kaif_slave_tgt == cpusave->krs_cpu_id) {
			kaif_slave_cmd = KAIF_SLAVE_CMD_SPIN;
			cpusave->krs_cpu_state = KAIF_CPU_STATE_MASTER;
			rv = KAIF_CPU_CMD_SWITCH;
			break;

		} else if (slavecmd == KAIF_SLAVE_CMD_FLUSH) {
			kmdb_kdi_flush_caches();
			cpusave->krs_cpu_flushed = 1;
			continue;

#if defined(__i386) || defined(__amd64)
		} else if (slavecmd == KAIF_SLAVE_CMD_REBOOT &&
		    cpusave->krs_cpu_id == 0) {
			rv = KAIF_CPU_CMD_REBOOT;
			break;
#endif

		} else if (slavecmd == KAIF_SLAVE_CMD_RESUME) {
			rv = KAIF_CPU_CMD_RESUME;
			break;
#if defined(__sparc)
		} else if (slavecmd == KAIF_SLAVE_CMD_ACK) {
			cpusave->krs_cpu_acked = 1;
		} else if (cpusave->krs_cpu_acked &&
			slavecmd == KAIF_SLAVE_CMD_SPIN) {
			cpusave->krs_cpu_acked = 0;
#endif
		}
	}

#if defined(__sparc)
	interrupts_off();
#endif

	return (rv);
}

static void
kaif_select_master(kaif_cpusave_t *cpusave)
{
	kaif_lock_enter(&kaif_master_lock);

	if (kaif_master_cpuid == KAIF_MASTER_CPUID_UNSET) {
		/* This is the master. */
		kaif_master_cpuid = cpusave->krs_cpu_id;
		cpusave->krs_cpu_state = KAIF_CPU_STATE_MASTER;
		kaif_slave_cmd = KAIF_SLAVE_CMD_SPIN;

		membar_producer();

		kmdb_kdi_stop_other_cpus(cpusave->krs_cpu_id,
		    kaif_slave_entry);

	} else {
		/* The master was already chosen - go be a slave */
		cpusave->krs_cpu_state = KAIF_CPU_STATE_SLAVE;
		membar_producer();
	}

	kaif_lock_exit(&kaif_master_lock);
}

int
kaif_main_loop(kaif_cpusave_t *cpusave)
{
	int cmd;

	if (kaif_master_cpuid == KAIF_MASTER_CPUID_UNSET) {
		if (!kmdb_dpi_resume_requested &&
		    kmdb_kdi_get_unload_request()) {
			/*
			 * Special case: Unload requested before first debugger
			 * entry.  Don't stop the world, as there's nothing to
			 * clean up that can't be handled by the running kernel.
			 */
			cpusave->krs_cpu_state = KAIF_CPU_STATE_NONE;
			return (KAIF_CPU_CMD_RESUME);
		}

		kaif_select_master(cpusave);

#ifdef __sparc
		if (kaif_master_cpuid == cpusave->krs_cpu_id) {
			/*
			 * Everyone has arrived, so we can disarm the post-PROM
			 * entry point.
			 */
			*kaif_promexitarmp = 0;
			membar_producer();
		}
#endif
	} else if (kaif_master_cpuid == cpusave->krs_cpu_id) {
		cpusave->krs_cpu_state = KAIF_CPU_STATE_MASTER;
	} else {
		cpusave->krs_cpu_state = KAIF_CPU_STATE_SLAVE;
	}

	cpusave->krs_cpu_flushed = 0;

	kaif_lock_enter(&kaif_loop_lock);
	kaif_looping++;
	kaif_lock_exit(&kaif_loop_lock);

	/*
	 * We know who the master and slaves are, so now they can go off
	 * to their respective loops.
	 */
	do {
		if (kaif_master_cpuid == cpusave->krs_cpu_id)
			cmd = kaif_master_loop(cpusave);
		else
			cmd = kaif_slave_loop(cpusave);
	} while (cmd == KAIF_CPU_CMD_SWITCH);

	kaif_lock_enter(&kaif_loop_lock);
	kaif_looping--;
	kaif_lock_exit(&kaif_loop_lock);

	cpusave->krs_cpu_state = KAIF_CPU_STATE_NONE;

	if (cmd == KAIF_CPU_CMD_RESUME) {
		/*
		 * By this point, the master has directed the slaves to resume,
		 * and everyone is making their way to this point.  We're going
		 * to block here until all CPUs leave the master and slave
		 * loops.  When all have arrived, we'll turn them all loose.
		 * This barrier is required for two reasons:
		 *
		 * 1. There exists a race condition whereby a CPU could reenter
		 *    the debugger while another CPU is still in the slave loop
		 *    from this debugger entry.  This usually happens when the
		 *    current master releases the slaves, and makes it back to
		 *    the world before the slaves notice the release.  The
		 *    former master then triggers a debugger entry, and attempts
		 *    to stop the slaves for this entry before they've even
		 *    resumed from the last one.  When the slaves arrive here,
		 *    they'll have re-disabled interrupts, and will thus ignore
		 *    cross-calls until they finish resuming.
		 *
		 * 2. At the time of this writing, there exists a SPARC bug that
		 *    causes an apparently unsolicited interrupt vector trap
		 *    from OBP to one of the slaves.  This wouldn't normally be
		 *    a problem but for the fact that the cross-called CPU
		 *    encounters some sort of failure while in OBP.  OBP
		 *    recovers by executing the debugger-hook word, which sends
		 *    the slave back into the debugger, triggering a debugger
		 *    fault.  This problem seems to only happen during resume,
		 *    the result being that all CPUs save for the cross-called
		 *    one make it back into the world, while the cross-called
		 *    one is stuck at the debugger fault prompt.  Leave the
		 *    world in that state too long, and you'll get a mondo
		 *    timeout panic.  If we hold everyone here, we can give the
		 *    the user a chance to trigger a panic for further analysis.
		 *    To trigger the bug, "pool_unlock:b :c" and "while : ; do
		 *    psrset -p ; done".
		 *
		 * When the second item is fixed, the barrier can move into
		 * kaif_select_master(), immediately prior to the setting of
		 * kaif_master_cpuid.
		 */
		while (kaif_looping != 0)
			continue;
	}

	return (cmd);
}


#if defined(__sparc)

static int slave_loop_barrier_failures = 0;	/* for debug */

/*
 * There exist a race condition observed by some
 * platforms where the kmdb master cpu exits to OBP via
 * prom_enter_mon (e.g. "$q" command) and then later re-enter
 * kmdb (typing "go") while the slaves are still proceeding
 * from the OBP idle-loop back to the kmdb slave loop. The
 * problem arises when the master cpu now back in kmdb proceed
 * to re-enter OBP (e.g. doing a prom_read() from the kmdb main
 * loop) while the slaves are still trying to get out of (the
 * previous trip in) OBP into the safety of the kmdb slave loop.
 * This routine forces the slaves to explicitly acknowledge
 * that they are back in the slave loop. The master cpu can
 * call this routine to ensure that all slave cpus are back
 * in the slave loop before proceeding.
 */
void
kaif_slave_loop_barrier(void)
{
	extern void kdi_usecwait(clock_t);
	int i;
	int not_acked;
	int timeout_count = 0;

	kaif_slave_cmd = KAIF_SLAVE_CMD_ACK;

	/*
	 * Wait for slave cpus to explicitly acknowledge
	 * that they are spinning in the slave loop.
	 */
	do {
		not_acked = 0;
		for (i = 0; i < kaif_ncpusave; i++) {
			kaif_cpusave_t *save = &kaif_cpusave[i];

			if (save->krs_cpu_state ==
			    KAIF_CPU_STATE_SLAVE &&
			    !save->krs_cpu_acked) {
				not_acked++;
				break;
			}
		}

		if (not_acked == 0)
			break;

		/*
		 * Play it safe and do a timeout delay.
		 * We will do at most kaif_ncpusave delays before
		 * bailing out of this barrier.
		 */
		kdi_usecwait(200);

	} while (++timeout_count < kaif_ncpusave);

	if (not_acked > 0)
		/*
		 * we cannot establish a barrier with all
		 * the slave cpus coming back from OBP
		 * Record this fact for future debugging
		 */
		slave_loop_barrier_failures++;

	kaif_slave_cmd = KAIF_SLAVE_CMD_SPIN;
}
#endif