summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/sys/cpuvar.h
blob: 24adbb7418f6f7d066989f90ab550a512edee167 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
 * Copyright (c) 2012 by Delphix. All rights reserved.
 * Copyright 2014 Igor Kozhukhov <ikozhukhov@gmail.com>.
 * Copyright 2017 RackTop Systems.
 * Copyright 2019 Joyent, Inc.
 * Copyright 2021 Oxide Computer Company
 */

#ifndef _SYS_CPUVAR_H
#define	_SYS_CPUVAR_H

#include <sys/thread.h>
#include <sys/sysinfo.h>	/* has cpu_stat_t definition */
#include <sys/disp.h>
#include <sys/processor.h>
#include <sys/kcpc.h>		/* has kcpc_ctx_t definition */

#include <sys/loadavg.h>
#if (defined(_KERNEL) || defined(_KMEMUSER)) && defined(_MACHDEP)
#include <sys/machcpuvar.h>
#endif

#include <sys/types.h>
#include <sys/file.h>
#include <sys/bitmap.h>
#include <sys/rwlock.h>
#include <sys/msacct.h>
#if defined(__GNUC__) && defined(_ASM_INLINES) && defined(_KERNEL) && \
	(defined(__i386) || defined(__amd64))
#include <asm/cpuvar.h>
#endif

#ifdef	__cplusplus
extern "C" {
#endif

struct squeue_set_s;

#define	CPU_CACHE_COHERENCE_SIZE	64

/*
 * For fast event tracing.
 */
struct ftrace_record;
typedef struct ftrace_data {
	int			ftd_state;	/* ftrace flags */
	kmutex_t		ftd_unused;	/* ftrace buffer lock, unused */
	struct ftrace_record	*ftd_cur;	/* current record */
	struct ftrace_record	*ftd_first;	/* first record */
	struct ftrace_record	*ftd_last;	/* last record */
} ftrace_data_t;

struct cyc_cpu;
struct nvlist;

/*
 * Per-CPU data.
 *
 * Be careful adding new members: if they are not the same in all modules (e.g.
 * change size depending on a #define), CTF uniquification can fail to work
 * properly.  Furthermore, this is transitive in that it applies recursively to
 * all types pointed to by cpu_t.
 */
typedef struct cpu {
	processorid_t	cpu_id;			/* CPU number */
	processorid_t	cpu_seqid;	/* sequential CPU id (0..ncpus-1) */
	volatile cpu_flag_t cpu_flags;		/* flags indicating CPU state */
	struct cpu	*cpu_self;		/* pointer to itself */
	kthread_t	*cpu_thread;		/* current thread */
	kthread_t	*cpu_idle_thread;	/* idle thread for this CPU */
	kthread_t	*cpu_pause_thread;	/* pause thread for this CPU */
	klwp_id_t	cpu_lwp;		/* current lwp (if any) */
	klwp_id_t	cpu_fpowner;		/* currently loaded fpu owner */
	struct cpupart	*cpu_part;		/* partition with this CPU */
	struct lgrp_ld	*cpu_lpl;		/* pointer to this cpu's load */
	int		cpu_cache_offset;	/* see kmem.c for details */

	/*
	 * Links to other CPUs.  It is safe to walk these lists if
	 * one of the following is true:
	 *	- cpu_lock held
	 *	- preemption disabled via kpreempt_disable
	 *	- PIL >= DISP_LEVEL
	 *	- acting thread is an interrupt thread
	 *	- all other CPUs are paused
	 */
	struct cpu	*cpu_next;		/* next existing CPU */
	struct cpu	*cpu_prev;		/* prev existing CPU */
	struct cpu	*cpu_next_onln;		/* next online (enabled) CPU */
	struct cpu	*cpu_prev_onln;		/* prev online (enabled) CPU */
	struct cpu	*cpu_next_part;		/* next CPU in partition */
	struct cpu	*cpu_prev_part;		/* prev CPU in partition */
	struct cpu	*cpu_next_lgrp;		/* next CPU in latency group */
	struct cpu	*cpu_prev_lgrp;		/* prev CPU in latency group */
	struct cpu	*cpu_next_lpl;		/* next CPU in lgrp partition */
	struct cpu	*cpu_prev_lpl;

	struct cpu_pg	*cpu_pg;		/* cpu's processor groups */

	void		*cpu_reserved[4];	/* reserved for future use */

	/*
	 * Scheduling variables.
	 */
	disp_t		*cpu_disp;		/* dispatch queue data */
	/*
	 * Note that cpu_disp is set before the CPU is added to the system
	 * and is never modified.  Hence, no additional locking is needed
	 * beyond what's necessary to access the cpu_t structure.
	 */
	char		cpu_runrun;	/* scheduling flag - set to preempt */
	char		cpu_kprunrun;		/* force kernel preemption */
	pri_t		cpu_chosen_level;	/* priority at which cpu */
						/* was chosen for scheduling */
	kthread_t	*cpu_dispthread; /* thread selected for dispatch */
	disp_lock_t	cpu_thread_lock; /* dispatcher lock on current thread */
	uint8_t		cpu_disp_flags;	/* flags used by dispatcher */
	/*
	 * The following field is updated when ever the cpu_dispthread
	 * changes. Also in places, where the current thread(cpu_dispthread)
	 * priority changes. This is used in disp_lowpri_cpu()
	 */
	pri_t		cpu_dispatch_pri; /* priority of cpu_dispthread */
	clock_t		cpu_last_swtch;	/* last time switched to new thread */

	/*
	 * Interrupt data.
	 */
	caddr_t		cpu_intr_stack;	/* interrupt stack */
	kthread_t	*cpu_intr_thread; /* interrupt thread list */
	uint_t		cpu_intr_actv;	/* interrupt levels active (bitmask) */
	int		cpu_base_spl;	/* priority for highest rupt active */

	/*
	 * Statistics.
	 */
	cpu_stats_t	cpu_stats;		/* per-CPU statistics */
	struct kstat	*cpu_info_kstat;	/* kstat for cpu info */

	uintptr_t	cpu_profile_pc;	/* kernel PC in profile interrupt */
	uintptr_t	cpu_profile_upc; /* user PC in profile interrupt */
	uintptr_t	cpu_profile_pil; /* PIL when profile interrupted */

	ftrace_data_t	cpu_ftrace;		/* per cpu ftrace data */

	clock_t		cpu_deadman_counter;	/* used by deadman() */
	uint_t		cpu_deadman_countdown;	/* used by deadman() */

	kmutex_t	cpu_cpc_ctxlock; /* protects context for idle thread */
	kcpc_ctx_t	*cpu_cpc_ctx;	/* performance counter context */

	/*
	 * Configuration information for the processor_info system call.
	 */
	processor_info_t cpu_type_info;	/* config info */
	time_t		cpu_state_begin; /* when CPU entered current state */
	char		cpu_cpr_flags;	/* CPR related info */
	struct cyc_cpu	*cpu_cyclic;	/* per cpu cyclic subsystem data */
	struct squeue_set_s *cpu_squeue_set;	/* per cpu squeue set */
	struct nvlist	*cpu_props;	/* pool-related properties */

	krwlock_t	cpu_ft_lock;		/* DTrace: fasttrap lock */
	uintptr_t	cpu_dtrace_caller;	/* DTrace: caller, if any */
	hrtime_t	cpu_dtrace_chillmark;	/* DTrace: chill mark time */
	hrtime_t	cpu_dtrace_chilled;	/* DTrace: total chill time */
	uint64_t	cpu_dtrace_probes;	/* DTrace: total probes fired */
	hrtime_t	cpu_dtrace_nsec;	/* DTrace: ns in dtrace_probe */

	volatile uint16_t cpu_mstate;		/* cpu microstate */
	volatile uint16_t cpu_mstate_gen;	/* generation counter */
	volatile hrtime_t cpu_mstate_start;	/* cpu microstate start time */
	volatile hrtime_t cpu_acct[NCMSTATES];	/* cpu microstate data */
	hrtime_t	cpu_intracct[NCMSTATES]; /* interrupt mstate data */
	hrtime_t	cpu_waitrq;		/* cpu run-queue wait time */
	struct loadavg_s cpu_loadavg;		/* loadavg info for this cpu */

	char		*cpu_idstr;	/* for printing and debugging */
	char		*cpu_brandstr;	/* for printing */

	/*
	 * Sum of all device interrupt weights that are currently directed at
	 * this cpu. Cleared at start of interrupt redistribution.
	 */
	int32_t		cpu_intr_weight;
	void		*cpu_vm_data;

	struct cpu_physid *cpu_physid;	/* physical associations */

	uint64_t	cpu_curr_clock;		/* current clock freq in Hz */
	char		*cpu_supp_freqs;	/* supported freqs in Hz */

	uintptr_t	cpu_cpcprofile_pc;	/* kernel PC in cpc interrupt */
	uintptr_t	cpu_cpcprofile_upc;	/* user PC in cpc interrupt */

	/*
	 * Interrupt load factor used by dispatcher & softcall
	 */
	hrtime_t	cpu_intrlast;   /* total interrupt time (nsec) */
	int		cpu_intrload;   /* interrupt load factor (0-99%) */

	uint_t		cpu_rotor;	/* for cheap pseudo-random numbers */

	struct cu_cpu_info	*cpu_cu_info;	/* capacity & util. info */

	/*
	 * cpu_generation is updated whenever CPU goes on-line or off-line.
	 * Updates to cpu_generation are protected by cpu_lock.
	 *
	 * See CPU_NEW_GENERATION() macro below.
	 */
	volatile uint_t		cpu_generation;	/* tracking on/off-line */

	/*
	 * New members must be added /before/ this member, as the CTF tools
	 * rely on this being the last field before cpu_m, so they can
	 * correctly calculate the offset when synthetically adding the cpu_m
	 * member in objects that do not have it.  This fixup is required for
	 * uniquification to work correctly.
	 */
	uintptr_t	cpu_m_pad;

#if (defined(_KERNEL) || defined(_KMEMUSER)) && defined(_MACHDEP)
	struct machcpu	cpu_m;		/* per architecture info */
#endif
} cpu_t;

/*
 * The cpu_core structure consists of per-CPU state available in any context.
 * On some architectures, this may mean that the page(s) containing the
 * NCPU-sized array of cpu_core structures must be locked in the TLB -- it
 * is up to the platform to assure that this is performed properly.  Note that
 * the structure is sized to avoid false sharing.
 */
#define	CPUC_SIZE		(sizeof (uint16_t) + sizeof (uint8_t) + \
				sizeof (uintptr_t) + sizeof (kmutex_t))
#define	CPUC_PADSIZE		CPU_CACHE_COHERENCE_SIZE - CPUC_SIZE

typedef struct cpu_core {
	uint16_t	cpuc_dtrace_flags;	/* DTrace flags */
	uint8_t		cpuc_dcpc_intr_state;	/* DCPC provider intr state */
	uint8_t		cpuc_pad[CPUC_PADSIZE];	/* padding */
	uintptr_t	cpuc_dtrace_illval;	/* DTrace illegal value */
	kmutex_t	cpuc_pid_lock;		/* DTrace pid provider lock */
} cpu_core_t;

#ifdef _KERNEL
extern cpu_core_t cpu_core[];
#endif /* _KERNEL */

/*
 * CPU_ON_INTR() macro. Returns non-zero if currently on interrupt stack.
 * Note that this isn't a test for a high PIL.  For example, cpu_intr_actv
 * does not get updated when we go through sys_trap from TL>0 at high PIL.
 * getpil() should be used instead to check for PIL levels.
 */
#define	CPU_ON_INTR(cpup) ((cpup)->cpu_intr_actv >> (LOCK_LEVEL + 1))

/*
 * Check to see if an interrupt thread might be active at a given ipl.
 * If so return true.
 * We must be conservative--it is ok to give a false yes, but a false no
 * will cause disaster.  (But if the situation changes after we check it is
 * ok--the caller is trying to ensure that an interrupt routine has been
 * exited).
 * This is used when trying to remove an interrupt handler from an autovector
 * list in avintr.c.
 */
#define	INTR_ACTIVE(cpup, level)	\
	((level) <= LOCK_LEVEL ?	\
	((cpup)->cpu_intr_actv & (1 << (level))) : (CPU_ON_INTR(cpup)))

/*
 * CPU_PSEUDO_RANDOM() returns a per CPU value that changes each time one
 * looks at it. It's meant as a cheap mechanism to be incorporated in routines
 * wanting to avoid biasing, but where true randomness isn't needed (just
 * something that changes).
 */
#define	CPU_PSEUDO_RANDOM() (CPU->cpu_rotor++)

#if defined(_KERNEL) || defined(_KMEMUSER) || defined(_BOOT)

#define	INTR_STACK_SIZE	MAX(DEFAULTSTKSZ, PAGESIZE)

/* MEMBERS PROTECTED BY "atomicity": cpu_flags */

/*
 * Flags in the CPU structure.
 *
 * These are protected by cpu_lock (except during creation).
 *
 * Offlined-CPUs have three stages of being offline:
 *
 * CPU_ENABLE indicates that the CPU is participating in I/O interrupts
 * that can be directed at a number of different CPUs.  If CPU_ENABLE
 * is off, the CPU will not be given interrupts that can be sent elsewhere,
 * but will still get interrupts from devices associated with that CPU only,
 * and from other CPUs.
 *
 * CPU_OFFLINE indicates that the dispatcher should not allow any threads
 * other than interrupt threads to run on that CPU.  A CPU will not have
 * CPU_OFFLINE set if there are any bound threads (besides interrupts).
 *
 * CPU_QUIESCED is set if p_offline was able to completely turn idle the
 * CPU and it will not have to run interrupt threads.  In this case it'll
 * stay in the idle loop until CPU_QUIESCED is turned off.
 *
 * CPU_FROZEN is used only by CPR to mark CPUs that have been successfully
 * suspended (in the suspend path), or have yet to be resumed (in the resume
 * case).
 *
 * CPU_DISABLED is used for disabling SMT. It is similar to CPU_OFFLINE, but
 * cannot be onlined without being forced.
 *
 * On some platforms CPUs can be individually powered off.
 * The following flags are set for powered off CPUs: CPU_QUIESCED,
 * CPU_OFFLINE, and CPU_POWEROFF.  The following flags are cleared:
 * CPU_RUNNING, CPU_READY, CPU_EXISTS, CPU_DISABLED and CPU_ENABLE.
 */
#define	CPU_RUNNING	0x001		/* CPU running */
#define	CPU_READY	0x002		/* CPU ready for cross-calls */
#define	CPU_QUIESCED	0x004		/* CPU will stay in idle */
#define	CPU_EXISTS	0x008		/* CPU is configured */
#define	CPU_ENABLE	0x010		/* CPU enabled for interrupts */
#define	CPU_OFFLINE	0x020		/* CPU offline via p_online */
#define	CPU_POWEROFF	0x040		/* CPU is powered off */
#define	CPU_FROZEN	0x080		/* CPU is frozen via CPR suspend */
#define	CPU_SPARE	0x100		/* CPU offline available for use */
#define	CPU_FAULTED	0x200		/* CPU offline diagnosed faulty */
#define	CPU_DISABLED	0x400		/* CPU explicitly disabled (HT) */

#define	CPU_ACTIVE(cpu)	(((cpu)->cpu_flags & CPU_OFFLINE) == 0)

/*
 * Flags for cpu_offline(), cpu_faulted(), and cpu_spare().
 */
#define	CPU_FORCED	0x0001		/* Force CPU offline */

/*
 * DTrace flags.
 */
#define	CPU_DTRACE_NOFAULT	0x0001	/* Don't fault */
#define	CPU_DTRACE_DROP		0x0002	/* Drop this ECB */
#define	CPU_DTRACE_BADADDR	0x0004	/* DTrace fault: bad address */
#define	CPU_DTRACE_BADALIGN	0x0008	/* DTrace fault: bad alignment */
#define	CPU_DTRACE_DIVZERO	0x0010	/* DTrace fault: divide by zero */
#define	CPU_DTRACE_ILLOP	0x0020	/* DTrace fault: illegal operation */
#define	CPU_DTRACE_NOSCRATCH	0x0040	/* DTrace fault: out of scratch */
#define	CPU_DTRACE_KPRIV	0x0080	/* DTrace fault: bad kernel access */
#define	CPU_DTRACE_UPRIV	0x0100	/* DTrace fault: bad user access */
#define	CPU_DTRACE_TUPOFLOW	0x0200	/* DTrace fault: tuple stack overflow */
#if defined(__sparc)
#define	CPU_DTRACE_FAKERESTORE	0x0400	/* pid provider hint to getreg */
#endif
#define	CPU_DTRACE_ENTRY	0x0800	/* pid provider hint to ustack() */
#define	CPU_DTRACE_BADSTACK	0x1000	/* DTrace fault: bad stack */

#define	CPU_DTRACE_FAULT	(CPU_DTRACE_BADADDR | CPU_DTRACE_BADALIGN | \
				CPU_DTRACE_DIVZERO | CPU_DTRACE_ILLOP | \
				CPU_DTRACE_NOSCRATCH | CPU_DTRACE_KPRIV | \
				CPU_DTRACE_UPRIV | CPU_DTRACE_TUPOFLOW | \
				CPU_DTRACE_BADSTACK)
#define	CPU_DTRACE_ERROR	(CPU_DTRACE_FAULT | CPU_DTRACE_DROP)

/*
 * Dispatcher flags
 * These flags must be changed only by the current CPU.
 */
#define	CPU_DISP_DONTSTEAL	0x01	/* CPU undergoing context swtch */
#define	CPU_DISP_HALTED		0x02	/* CPU halted waiting for interrupt */

/* Note: inside ifdef: _KERNEL || _KMEMUSER || _BOOT */

/*
 * Macros for manipulating sets of CPUs as a bitmap.  Note that this
 * bitmap may vary in size depending on the maximum CPU id a specific
 * platform supports.  This may be different than the number of CPUs
 * the platform supports, since CPU ids can be sparse.  We define two
 * sets of macros; one for platforms where the maximum CPU id is less
 * than the number of bits in a single word (32 in a 32-bit kernel,
 * 64 in a 64-bit kernel), and one for platforms that require bitmaps
 * of more than one word.
 */

#define	CPUSET_WORDS	BT_BITOUL(NCPU)
#define	CPUSET_NOTINSET	((uint_t)-1)

#if defined(_MACHDEP)
struct cpuset {
	ulong_t	cpub[CPUSET_WORDS];
};
#else
struct cpuset;
#endif

typedef struct cpuset cpuset_t;

extern cpuset_t	*cpuset_alloc(int);
extern void	cpuset_free(cpuset_t *);

/*
 * Functions for manipulating cpusets.  These were previously considered
 * private when some cpuset_t handling was performed in the CPUSET_* macros.
 * They are now acceptable to use in non-_MACHDEP code.
 */
extern void	cpuset_all(cpuset_t *);
extern void	cpuset_all_but(cpuset_t *, const uint_t);
extern int	cpuset_isnull(const cpuset_t *);
extern int	cpuset_isequal(const cpuset_t *, const cpuset_t *);
extern void	cpuset_only(cpuset_t *, const uint_t);
extern long	cpu_in_set(const cpuset_t *, const uint_t);
extern void	cpuset_add(cpuset_t *, const uint_t);
extern void	cpuset_del(cpuset_t *, const uint_t);
extern uint_t	cpuset_find(const cpuset_t *);
extern void	cpuset_bounds(const cpuset_t *, uint_t *, uint_t *);
extern void	cpuset_atomic_del(cpuset_t *, const uint_t);
extern void	cpuset_atomic_add(cpuset_t *, const uint_t);
extern long	cpuset_atomic_xadd(cpuset_t *, const uint_t);
extern long	cpuset_atomic_xdel(cpuset_t *, const uint_t);
extern void	cpuset_or(cpuset_t *, const cpuset_t *);
extern void	cpuset_xor(cpuset_t *, const cpuset_t *);
extern void	cpuset_and(cpuset_t *, const cpuset_t *);
extern void	cpuset_zero(cpuset_t *);


#if defined(_MACHDEP)

/*
 * Prior to the cpuset_t restructuring, the CPUSET_* macros contained
 * significant logic, rather than directly invoking the backend functions.
 * They are maintained here so that existing _MACHDEP code can use them.
 */

#define	CPUSET_ALL(set)			cpuset_all(&(set))
#define	CPUSET_ALL_BUT(set, cpu)	cpuset_all_but(&(set), cpu)
#define	CPUSET_ONLY(set, cpu)		cpuset_only(&(set), cpu)
#define	CPU_IN_SET(set, cpu)		cpu_in_set(&(set), cpu)
#define	CPUSET_ADD(set, cpu)		cpuset_add(&(set), cpu)
#define	CPUSET_DEL(set, cpu)		cpuset_del(&(set), cpu)
#define	CPUSET_ISNULL(set)		cpuset_isnull(&(set))
#define	CPUSET_ISEQUAL(set1, set2)	cpuset_isequal(&(set1), &(set2))

/*
 * Find one CPU in the cpuset.
 * Sets "cpu" to the id of the found CPU, or CPUSET_NOTINSET if no cpu
 * could be found. (i.e. empty set)
 */
#define	CPUSET_FIND(set, cpu)		{		\
	cpu = cpuset_find(&(set));			\
}

/*
 * Determine the smallest and largest CPU id in the set. Returns
 * CPUSET_NOTINSET in smallest and largest when set is empty.
 */
#define	CPUSET_BOUNDS(set, smallest, largest)	{		\
	cpuset_bounds(&(set), &(smallest), &(largest));		\
}

/*
 * Atomic cpuset operations
 * These are safe to use for concurrent cpuset manipulations.
 * "xdel" and "xadd" are exclusive operations, that set "result" to "0"
 * if the add or del was successful, or "-1" if not successful.
 * (e.g. attempting to add a cpu to a cpuset that's already there, or
 * deleting a cpu that's not in the cpuset)
 */

#define	CPUSET_ATOMIC_DEL(set, cpu)	cpuset_atomic_del(&(set), cpu)
#define	CPUSET_ATOMIC_ADD(set, cpu)	cpuset_atomic_add(&(set), cpu)

#define	CPUSET_ATOMIC_XADD(set, cpu, result)	\
	(result) = cpuset_atomic_xadd(&(set), cpu)

#define	CPUSET_ATOMIC_XDEL(set, cpu, result)	\
	(result) = cpuset_atomic_xdel(&(set), cpu)

#define	CPUSET_OR(set1, set2)	cpuset_or(&(set1), &(set2))

#define	CPUSET_XOR(set1, set2)	cpuset_xor(&(set1), &(set2))

#define	CPUSET_AND(set1, set2)	cpuset_and(&(set1), &(set2))

#define	CPUSET_ZERO(set)	cpuset_zero(&(set))

#endif /* defined(_MACHDEP) */


extern cpuset_t cpu_seqid_inuse;

extern struct cpu	*cpu[];		/* indexed by CPU number */
extern struct cpu	**cpu_seq;	/* indexed by sequential CPU id */
extern cpu_t		*cpu_list;	/* list of CPUs */
extern cpu_t		*cpu_active;	/* list of active CPUs */
extern cpuset_t		cpu_active_set;	/* cached set of active CPUs */
extern cpuset_t		cpu_available;	/* cached set of available CPUs */
extern int		ncpus;		/* number of CPUs present */
extern int		ncpus_online;	/* number of CPUs not quiesced */
extern int		ncpus_intr_enabled; /* nr of CPUs taking I/O intrs */
extern int		max_ncpus;	/* max present before ncpus is known */
extern int		boot_max_ncpus;	/* like max_ncpus but for real */
extern int		boot_ncpus;	/* # cpus present @ boot */
extern processorid_t	max_cpuid;	/* maximum CPU number */
extern struct cpu	*cpu_inmotion;	/* offline or partition move target */
extern cpu_t		*clock_cpu_list;
extern processorid_t	max_cpu_seqid_ever;	/* maximum seqid ever given */

#if defined(__i386) || defined(__amd64)
extern struct cpu *curcpup(void);
#define	CPU		(curcpup())	/* Pointer to current CPU */
#else
#define	CPU		(curthread->t_cpu)	/* Pointer to current CPU */
#endif

/*
 * CPU_CURRENT indicates to thread_affinity_set() to use whatever curthread's
 * current CPU is; holding cpu_lock is not required.
 */
#define	CPU_CURRENT	-3

/*
 * CPU_BEST can be used by thread_affinity_set() callers to set affinity to a
 * good CPU (in particular, an ht_acquire()-friendly choice); holding cpu_lock
 * is not required.
 */
#define	CPU_BEST	-4

/*
 * Per-CPU statistics
 *
 * cpu_stats_t contains numerous system and VM-related statistics, in the form
 * of gauges or monotonically-increasing event occurrence counts.
 */

#define	CPU_STATS_ENTER_K()	kpreempt_disable()
#define	CPU_STATS_EXIT_K()	kpreempt_enable()

#define	CPU_STATS_ADD_K(class, stat, amount) \
	{	kpreempt_disable(); /* keep from switching CPUs */\
		CPU_STATS_ADDQ(CPU, class, stat, amount); \
		kpreempt_enable(); \
	}

#define	CPU_STATS_ADDQ(cp, class, stat, amount)	{			\
	extern void __dtrace_probe___cpu_##class##info_##stat(uint_t,	\
	    uint64_t *, cpu_t *);					\
	uint64_t *stataddr = &((cp)->cpu_stats.class.stat);		\
	__dtrace_probe___cpu_##class##info_##stat((amount),		\
	    stataddr, cp);						\
	*(stataddr) += (amount);					\
}

#define	CPU_STATS(cp, stat)                                       \
	((cp)->cpu_stats.stat)

/*
 * Increment CPU generation value.
 * This macro should be called whenever CPU goes on-line or off-line.
 * Updates to cpu_generation should be protected by cpu_lock.
 */
#define	CPU_NEW_GENERATION(cp)	((cp)->cpu_generation++)

#endif /* defined(_KERNEL) || defined(_KMEMUSER) */

#define	CPU_CPR_OFFLINE		0x0
#define	CPU_CPR_ONLINE		0x1
#define	CPU_CPR_IS_OFFLINE(cpu)	(((cpu)->cpu_cpr_flags & CPU_CPR_ONLINE) == 0)
#define	CPU_CPR_IS_ONLINE(cpu)	((cpu)->cpu_cpr_flags & CPU_CPR_ONLINE)
#define	CPU_SET_CPR_FLAGS(cpu, flag)	((cpu)->cpu_cpr_flags |= flag)

/*
 * CPU support routines (not for genassym.c)
 */
#if	(defined(_KERNEL) || defined(_FAKE_KERNEL)) && defined(__STDC__)

struct zone;

void	cpu_list_init(cpu_t *);
void	cpu_add_unit(cpu_t *);
void	cpu_del_unit(int cpuid);
void	cpu_add_active(cpu_t *);
void	cpu_kstat_init(cpu_t *);
void	cpu_visibility_add(cpu_t *, struct zone *);
void	cpu_visibility_remove(cpu_t *, struct zone *);
void	cpu_visibility_configure(cpu_t *, struct zone *);
void	cpu_visibility_unconfigure(cpu_t *, struct zone *);
void	cpu_visibility_online(cpu_t *, struct zone *);
void	cpu_visibility_offline(cpu_t *, struct zone *);
void	cpu_create_intrstat(cpu_t *);
void	cpu_delete_intrstat(cpu_t *);
int	cpu_kstat_intrstat_update(kstat_t *, int);
void	cpu_intr_swtch_enter(kthread_t *);
void	cpu_intr_swtch_exit(kthread_t *);

void	mbox_lock_init(void);	 /* initialize cross-call locks */
void	mbox_init(int cpun);	 /* initialize cross-calls */
void	poke_cpu(int cpun);	 /* interrupt another CPU (to preempt) */

/*
 * values for safe_list.  Pause state that CPUs are in.
 */
#define	PAUSE_IDLE	0		/* normal state */
#define	PAUSE_READY	1		/* paused thread ready to spl */
#define	PAUSE_WAIT	2		/* paused thread is spl-ed high */
#define	PAUSE_DIE	3		/* tell pause thread to leave */
#define	PAUSE_DEAD	4		/* pause thread has left */

void	mach_cpu_pause(volatile char *);

void	pause_cpus(cpu_t *off_cp, void *(*func)(void *));
void	start_cpus(void);
int	cpus_paused(void);

void	cpu_pause_init(void);
cpu_t	*cpu_get(processorid_t cpun);	/* get the CPU struct associated */

int	cpu_online(cpu_t *, int);	/* take cpu online */
int	cpu_offline(cpu_t *, int);	/* take cpu offline */
int	cpu_spare(cpu_t *, int);	/* take cpu to spare */
int	cpu_faulted(cpu_t *, int);	/* take cpu to faulted */
int	cpu_poweron(cpu_t *);		/* take powered-off cpu to offline */
int	cpu_poweroff(cpu_t *);		/* take offline cpu to powered-off */

cpu_t	*cpu_intr_next(cpu_t *cp);	/* get next online CPU taking intrs */
int	cpu_intr_count(cpu_t *cp);	/* count # of CPUs handling intrs */
int	cpu_intr_on(cpu_t *cp);		/* CPU taking I/O interrupts? */
void	cpu_intr_enable(cpu_t *cp);	/* enable I/O interrupts */
int	cpu_intr_disable(cpu_t *cp);	/* disable I/O interrupts */
void	cpu_intr_alloc(cpu_t *cp, int n); /* allocate interrupt threads */

/*
 * Routines for checking CPU states.
 */
int	cpu_is_online(cpu_t *);		/* check if CPU is online */
int	cpu_is_nointr(cpu_t *);		/* check if CPU can service intrs */
int	cpu_is_active(cpu_t *);		/* check if CPU can run threads */
int	cpu_is_offline(cpu_t *);	/* check if CPU is offline */
int	cpu_is_poweredoff(cpu_t *);	/* check if CPU is powered off */

int	cpu_flagged_online(cpu_flag_t);	/* flags show CPU is online */
int	cpu_flagged_nointr(cpu_flag_t);	/* flags show CPU not handling intrs */
int	cpu_flagged_active(cpu_flag_t); /* flags show CPU scheduling threads */
int	cpu_flagged_offline(cpu_flag_t); /* flags show CPU is offline */
int	cpu_flagged_poweredoff(cpu_flag_t); /* flags show CPU is powered off */

/*
 * The processor_info(2) state of a CPU is a simplified representation suitable
 * for use by an application program.  Kernel subsystems should utilize the
 * internal per-CPU state as given by the cpu_flags member of the cpu structure,
 * as this information may include platform- or architecture-specific state
 * critical to a subsystem's disposition of a particular CPU.
 */
void	cpu_set_state(cpu_t *);		/* record/timestamp current state */
int	cpu_get_state(cpu_t *);		/* get current cpu state */
const char *cpu_get_state_str(cpu_flag_t);


void	cpu_set_curr_clock(uint64_t);	/* indicate the current CPU's freq */
void	cpu_set_supp_freqs(cpu_t *, const char *); /* set the CPU supported */
						/* frequencies */

int	cpu_configure(int);
int	cpu_unconfigure(int);
void	cpu_destroy_bound_threads(cpu_t *cp);

extern int cpu_bind_thread(kthread_t *tp, processorid_t bind,
    processorid_t *obind, int *error);
extern int cpu_unbind(processorid_t cpu_id, boolean_t force);
extern void thread_affinity_set(kthread_t *t, int cpu_id);
extern void thread_affinity_clear(kthread_t *t);
extern void affinity_set(int cpu_id);
extern void affinity_clear(void);
extern void init_cpu_mstate(struct cpu *, int);
extern void term_cpu_mstate(struct cpu *);
extern void new_cpu_mstate(int, hrtime_t);
extern void get_cpu_mstate(struct cpu *, hrtime_t *);
extern void thread_nomigrate(void);
extern void thread_allowmigrate(void);
extern void weakbinding_stop(void);
extern void weakbinding_start(void);

/*
 * The following routines affect the CPUs participation in interrupt processing,
 * if that is applicable on the architecture.  This only affects interrupts
 * which aren't directed at the processor (not cross calls).
 *
 * cpu_disable_intr returns non-zero if interrupts were previously enabled.
 */
int	cpu_disable_intr(struct cpu *cp); /* stop issuing interrupts to cpu */
void	cpu_enable_intr(struct cpu *cp); /* start issuing interrupts to cpu */

/*
 * The mutex cpu_lock protects cpu_flags for all CPUs, as well as the ncpus
 * and ncpus_online counts.
 */
extern kmutex_t	cpu_lock;	/* lock protecting CPU data */

/*
 * CPU state change events
 *
 * Various subsystems need to know when CPUs change their state. They get this
 * information by registering  CPU state change callbacks using
 * register_cpu_setup_func(). Whenever any CPU changes its state, the callback
 * function is called. The callback function is passed three arguments:
 *
 *   Event, described by cpu_setup_t
 *   CPU ID
 *   Transparent pointer passed when registering the callback
 *
 * The callback function is called with cpu_lock held. The return value from the
 * callback function is usually ignored, except for CPU_CONFIG and CPU_UNCONFIG
 * events. For these two events, non-zero return value indicates a failure and
 * prevents successful completion of the operation.
 *
 * New events may be added in the future. Callback functions should ignore any
 * events that they do not understand.
 *
 * The following events provide notification callbacks:
 *
 *  CPU_INIT	A new CPU is started and added to the list of active CPUs
 *		  This event is only used during boot
 *
 *  CPU_CONFIG	A newly inserted CPU is prepared for starting running code
 *		  This event is called by DR code
 *
 *  CPU_UNCONFIG CPU has been powered off and needs cleanup
 *		  This event is called by DR code
 *
 *  CPU_ON	CPU is enabled but does not run anything yet
 *
 *  CPU_INTR_ON	CPU is enabled and has interrupts enabled
 *
 *  CPU_OFF	CPU is going offline but can still run threads
 *
 *  CPU_CPUPART_OUT	CPU is going to move out of its partition
 *
 *  CPU_CPUPART_IN	CPU is going to move to a new partition
 *
 *  CPU_SETUP	CPU is set up during boot and can run threads
 */
typedef enum {
	CPU_INIT,
	CPU_CONFIG,
	CPU_UNCONFIG,
	CPU_ON,
	CPU_OFF,
	CPU_CPUPART_IN,
	CPU_CPUPART_OUT,
	CPU_SETUP,
	CPU_INTR_ON
} cpu_setup_t;

typedef int cpu_setup_func_t(cpu_setup_t, int, void *);

/*
 * Routines used to register interest in cpu's being added to or removed
 * from the system.
 */
extern void register_cpu_setup_func(cpu_setup_func_t *, void *);
extern void unregister_cpu_setup_func(cpu_setup_func_t *, void *);
extern void cpu_state_change_notify(int, cpu_setup_t);

/*
 * Call specified function on the given CPU
 */
typedef void (*cpu_call_func_t)(uintptr_t, uintptr_t);
extern void cpu_call(cpu_t *, cpu_call_func_t, uintptr_t, uintptr_t);


/*
 * Create various strings that describe the given CPU for the
 * processor_info system call and configuration-related kstats.
 */
#define	CPU_IDSTRLEN	100

extern void init_cpu_info(struct cpu *);
extern void populate_idstr(struct cpu *);
extern void cpu_vm_data_init(struct cpu *);
extern void cpu_vm_data_destroy(struct cpu *);

#endif	/* _KERNEL || _FAKE_KERNEL */

#ifdef	__cplusplus
}
#endif

#endif /* _SYS_CPUVAR_H */