summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/sys/zone.h
blob: 08677a2f651f9d9aabff93b2fb47e07e1c33a04f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
 * Copyright (c) 2011, Joyent Inc. All rights reserved.
 */

#ifndef _SYS_ZONE_H
#define	_SYS_ZONE_H

#include <sys/types.h>
#include <sys/mutex.h>
#include <sys/param.h>
#include <sys/rctl.h>
#include <sys/ipc_rctl.h>
#include <sys/pset.h>
#include <sys/tsol/label.h>
#include <sys/cred.h>
#include <sys/netstack.h>
#include <sys/uadmin.h>
#include <sys/ksynch.h>
#include <sys/socket_impl.h>
#include <netinet/in.h>

#ifdef	__cplusplus
extern "C" {
#endif

/*
 * NOTE
 *
 * The contents of this file are private to the implementation of
 * Solaris and are subject to change at any time without notice.
 * Applications and drivers using these interfaces may fail to
 * run on future releases.
 */

/* Available both in kernel and for user space */

/* zone id restrictions and special ids */
#define	MAX_ZONEID	9999
#define	MIN_USERZONEID	1	/* lowest user-creatable zone ID */
#define	MIN_ZONEID	0	/* minimum zone ID on system */
#define	GLOBAL_ZONEID	0
#define	ZONEID_WIDTH	4	/* for printf */

/*
 * Special zoneid_t token to refer to all zones.
 */
#define	ALL_ZONES	(-1)

/* system call subcodes */
#define	ZONE_CREATE		0
#define	ZONE_DESTROY		1
#define	ZONE_GETATTR		2
#define	ZONE_ENTER		3
#define	ZONE_LIST		4
#define	ZONE_SHUTDOWN		5
#define	ZONE_LOOKUP		6
#define	ZONE_BOOT		7
#define	ZONE_VERSION		8
#define	ZONE_SETATTR		9
#define	ZONE_ADD_DATALINK	10
#define	ZONE_DEL_DATALINK	11
#define	ZONE_CHECK_DATALINK	12
#define	ZONE_LIST_DATALINK	13

/* zone attributes */
#define	ZONE_ATTR_ROOT		1
#define	ZONE_ATTR_NAME		2
#define	ZONE_ATTR_STATUS	3
#define	ZONE_ATTR_PRIVSET	4
#define	ZONE_ATTR_UNIQID	5
#define	ZONE_ATTR_POOLID	6
#define	ZONE_ATTR_INITPID	7
#define	ZONE_ATTR_SLBL		8
#define	ZONE_ATTR_INITNAME	9
#define	ZONE_ATTR_BOOTARGS	10
#define	ZONE_ATTR_BRAND		11
#define	ZONE_ATTR_PMCAP_NOVER	12
#define	ZONE_ATTR_SCHED_CLASS	13
#define	ZONE_ATTR_FLAGS		14
#define	ZONE_ATTR_HOSTID	15
#define	ZONE_ATTR_FS_ALLOWED	16
#define	ZONE_ATTR_NETWORK	17
#define	ZONE_ATTR_DID		18
#define	ZONE_ATTR_PMCAP_PAGEOUT	19
#define	ZONE_ATTR_INITNORESTART	20

/* Start of the brand-specific attribute namespace */
#define	ZONE_ATTR_BRAND_ATTRS	32768

#define	ZONE_FS_ALLOWED_MAX	1024

#define	ZONE_EVENT_CHANNEL	"com.sun:zones:status"
#define	ZONE_EVENT_STATUS_CLASS	"status"
#define	ZONE_EVENT_STATUS_SUBCLASS	"change"

#define	ZONE_EVENT_UNINITIALIZED	"uninitialized"
#define	ZONE_EVENT_INITIALIZED		"initialized"
#define	ZONE_EVENT_READY		"ready"
#define	ZONE_EVENT_RUNNING		"running"
#define	ZONE_EVENT_SHUTTING_DOWN	"shutting_down"

#define	ZONE_CB_NAME		"zonename"
#define	ZONE_CB_NEWSTATE	"newstate"
#define	ZONE_CB_OLDSTATE	"oldstate"
#define	ZONE_CB_TIMESTAMP	"when"
#define	ZONE_CB_ZONEID		"zoneid"

/*
 * Exit values that may be returned by scripts or programs invoked by various
 * zone commands.
 *
 * These are defined as:
 *
 *	ZONE_SUBPROC_OK
 *	===============
 *	The subprocess completed successfully.
 *
 *	ZONE_SUBPROC_USAGE
 *	==================
 *	The subprocess failed with a usage message, or a usage message should
 *	be output in its behalf.
 *
 *	ZONE_SUBPROC_NOTCOMPLETE
 *	========================
 *	The subprocess did not complete, but the actions performed by the
 *	subprocess require no recovery actions by the user.
 *
 *	For example, if the subprocess were called by "zoneadm install," the
 *	installation of the zone did not succeed but the user need not perform
 *	a "zoneadm uninstall" before attempting another install.
 *
 *	ZONE_SUBPROC_FATAL
 *	==================
 *	The subprocess failed in a fatal manner, usually one that will require
 *	some type of recovery action by the user.
 *
 *	For example, if the subprocess were called by "zoneadm install," the
 *	installation of the zone did not succeed and the user will need to
 *	perform a "zoneadm uninstall" before another install attempt is
 *	possible.
 *
 *	The non-success exit values are large to avoid accidental collision
 *	with values used internally by some commands (e.g. "Z_ERR" and
 *	"Z_USAGE" as used by zoneadm.)
 */
#define	ZONE_SUBPROC_OK			0
#define	ZONE_SUBPROC_USAGE		253
#define	ZONE_SUBPROC_NOTCOMPLETE	254
#define	ZONE_SUBPROC_FATAL		255

#ifdef _SYSCALL32
typedef struct {
	caddr32_t zone_name;
	caddr32_t zone_root;
	caddr32_t zone_privs;
	size32_t zone_privssz;
	caddr32_t rctlbuf;
	size32_t rctlbufsz;
	caddr32_t extended_error;
	caddr32_t zfsbuf;
	size32_t  zfsbufsz;
	int match;			/* match level */
	uint32_t doi;			/* DOI for label */
	caddr32_t label;		/* label associated with zone */
	int flags;
	zoneid_t zoneid;		/* requested zoneid */
} zone_def32;
#endif
typedef struct {
	const char *zone_name;
	const char *zone_root;
	const struct priv_set *zone_privs;
	size_t zone_privssz;
	const char *rctlbuf;
	size_t rctlbufsz;
	int *extended_error;
	const char *zfsbuf;
	size_t zfsbufsz;
	int match;			/* match level */
	uint32_t doi;			/* DOI for label */
	const bslabel_t *label;		/* label associated with zone */
	int flags;
	zoneid_t zoneid;		/* requested zoneid */
} zone_def;

/* extended error information */
#define	ZE_UNKNOWN	0	/* No extended error info */
#define	ZE_CHROOTED	1	/* tried to zone_create from chroot */
#define	ZE_AREMOUNTS	2	/* there are mounts within the zone */
#define	ZE_LABELINUSE	3	/* label is already in use by some other zone */

/*
 * zone_status values
 *
 * You must modify zone_status_names in mdb(1M)'s genunix module
 * (genunix/zone.c) when you modify this enum.
 */
typedef enum {
	ZONE_IS_UNINITIALIZED = 0,
	ZONE_IS_INITIALIZED,
	ZONE_IS_READY,
	ZONE_IS_BOOTING,
	ZONE_IS_RUNNING,
	ZONE_IS_SHUTTING_DOWN,
	ZONE_IS_EMPTY,
	ZONE_IS_DOWN,
	ZONE_IS_DYING,
	ZONE_IS_DEAD
} zone_status_t;
#define	ZONE_MIN_STATE		ZONE_IS_UNINITIALIZED
#define	ZONE_MAX_STATE		ZONE_IS_DEAD

/*
 * Valid commands which may be issued by zoneadm to zoneadmd.  The kernel also
 * communicates with zoneadmd, but only uses Z_REBOOT and Z_HALT.
 */
typedef enum zone_cmd {
	Z_READY, Z_BOOT, Z_FORCEBOOT, Z_REBOOT, Z_HALT, Z_NOTE_UNINSTALLING,
	Z_MOUNT, Z_FORCEMOUNT, Z_UNMOUNT
} zone_cmd_t;

/*
 * The structure of a request to zoneadmd.
 */
typedef struct zone_cmd_arg {
	uint64_t	uniqid;		/* unique "generation number" */
	zone_cmd_t	cmd;		/* requested action */
	uint32_t debug;			/* enable brand hook debug */
	char locale[MAXPATHLEN];	/* locale in which to render messages */
	char bootbuf[BOOTARGS_MAX];	/* arguments passed to zone_boot() */
} zone_cmd_arg_t;

/*
 * Structure of zoneadmd's response to a request.  A NULL return value means
 * the caller should attempt to restart zoneadmd and retry.
 */
typedef struct zone_cmd_rval {
	int rval;			/* return value of request */
	char errbuf[1];	/* variable-sized buffer containing error messages */
} zone_cmd_rval_t;

/*
 * The zone support infrastructure uses the zone name as a component
 * of unix domain (AF_UNIX) sockets, which are limited to 108 characters
 * in length, so ZONENAME_MAX is limited by that.
 */
#define	ZONENAME_MAX		64

#define	GLOBAL_ZONENAME		"global"

/*
 * Extended Regular expression (see regex(5)) which matches all valid zone
 * names.
 */
#define	ZONENAME_REGEXP		"[a-zA-Z0-9][-_.a-zA-Z0-9]{0,62}"

/*
 * Where the zones support infrastructure places temporary files.
 */
#define	ZONES_TMPDIR		"/var/run/zones"

/*
 * The path to the door used by clients to communicate with zoneadmd.
 */
#define	ZONE_DOOR_PATH		ZONES_TMPDIR "/%s.zoneadmd_door"


/* zone_flags */
/*
 * Threads that read or write the following flag must hold zone_lock.
 */
#define	ZF_REFCOUNTS_LOGGED	0x1	/* a thread logged the zone's refs */

/*
 * The following threads are set when the zone is created and never changed.
 * Threads that test for these flags don't have to hold zone_lock.
 */
#define	ZF_HASHED_LABEL		0x2	/* zone has a unique label */
#define	ZF_IS_SCRATCH		0x4	/* scratch zone */
#define	ZF_NET_EXCL		0x8	/* Zone has an exclusive IP stack */


/* zone_create flags */
#define	ZCF_NET_EXCL		0x1	/* Create a zone with exclusive IP */

/* zone network properties */
#define	ZONE_NETWORK_ADDRESS	1
#define	ZONE_NETWORK_DEFROUTER	2

#define	ZONE_NET_ADDRNAME	"address"
#define	ZONE_NET_RTRNAME	"route"

typedef struct zone_net_data {
	int zn_type;
	int zn_len;
	datalink_id_t zn_linkid;
	uint8_t zn_val[1];
} zone_net_data_t;


#ifdef _KERNEL

/*
 * We need to protect the definition of 'list_t' from userland applications and
 * libraries which may be defining ther own versions.
 */
#include <sys/list.h>
#include <sys/cpuvar.h>

#define	GLOBAL_ZONEUNIQID	0	/* uniqid of the global zone */

struct pool;
struct brand;

/*
 * Each of these constants identifies a kernel subsystem that acquires and
 * releases zone references.  Each subsystem that invokes
 * zone_hold_ref() and zone_rele_ref() should specify the
 * zone_ref_subsys_t constant associated with the subsystem.  Tracked holds
 * help users and developers quickly identify subsystems that stall zone
 * shutdowns indefinitely.
 *
 * NOTE: You must modify zone_ref_subsys_names in usr/src/uts/common/os/zone.c
 * when you modify this enumeration.
 */
typedef enum zone_ref_subsys {
	ZONE_REF_NFS,			/* NFS */
	ZONE_REF_NFSV4,			/* NFSv4 */
	ZONE_REF_SMBFS,			/* SMBFS */
	ZONE_REF_MNTFS,			/* MNTFS */
	ZONE_REF_LOFI,			/* LOFI devices */
	ZONE_REF_VFS,			/* VFS infrastructure */
	ZONE_REF_IPC,			/* IPC infrastructure */
	ZONE_REF_NUM_SUBSYS		/* This must be the last entry. */
} zone_ref_subsys_t;

/*
 * zone_ref represents a general-purpose references to a zone.  Each zone's
 * references are linked into the zone's zone_t::zone_ref_list.  This allows
 * debuggers to walk zones' references.
 */
typedef struct zone_ref {
	struct zone	*zref_zone; /* the zone to which the reference refers */
	list_node_t	zref_linkage; /* linkage for zone_t::zone_ref_list */
} zone_ref_t;

/*
 * Structure to record list of ZFS datasets exported to a zone.
 */
typedef struct zone_dataset {
	char		*zd_dataset;
	list_node_t	zd_linkage;
} zone_dataset_t;

/*
 * structure for rctl zone kstats
 */
typedef struct zone_kstat {
	kstat_named_t zk_zonename;
	kstat_named_t zk_usage;
	kstat_named_t zk_value;
} zone_kstat_t;

struct cpucap;

typedef struct {
	hrtime_t	cycle_start;
	uint_t		cycle_cnt;
	hrtime_t	zone_avg_cnt;
} sys_zio_cntr_t;

typedef struct {
	kstat_named_t	zv_zonename;
	kstat_named_t	zv_nread;
	kstat_named_t	zv_reads;
	kstat_named_t	zv_rtime;
	kstat_named_t	zv_rlentime;
	kstat_named_t	zv_nwritten;
	kstat_named_t	zv_writes;
	kstat_named_t	zv_wtime;
	kstat_named_t	zv_wlentime;
	kstat_named_t	zv_10ms_ops;
	kstat_named_t	zv_100ms_ops;
	kstat_named_t	zv_1s_ops;
	kstat_named_t 	zv_delay_cnt;
	kstat_named_t	zv_delay_time;
} zone_vfs_kstat_t;

typedef struct {
	kstat_named_t	zz_zonename;
	kstat_named_t	zz_nread;
	kstat_named_t	zz_reads;
	kstat_named_t	zz_rtime;
	kstat_named_t	zz_rlentime;
	kstat_named_t	zz_nwritten;
	kstat_named_t	zz_writes;
	kstat_named_t	zz_waittime;
} zone_zfs_kstat_t;

typedef struct {
	kstat_named_t	zm_zonename;
	kstat_named_t	zm_rss;
	kstat_named_t	zm_phys_cap;
	kstat_named_t	zm_swap;
	kstat_named_t	zm_swap_cap;
	kstat_named_t	zm_nover;
	kstat_named_t	zm_pagedout;
	kstat_named_t	zm_pgpgin;
	kstat_named_t	zm_anonpgin;
	kstat_named_t	zm_execpgin;
	kstat_named_t	zm_fspgin;
	kstat_named_t	zm_anon_alloc_fail;
} zone_mcap_kstat_t;

typedef struct {
	kstat_named_t	zm_zonename;	/* full name, kstat truncates name */
	kstat_named_t	zm_utime;
	kstat_named_t	zm_stime;
	kstat_named_t	zm_wtime;
	kstat_named_t	zm_avenrun1;
	kstat_named_t	zm_avenrun5;
	kstat_named_t	zm_avenrun15;
} zone_misc_kstat_t;

typedef struct zone {
	/*
	 * zone_name is never modified once set.
	 */
	char		*zone_name;	/* zone's configuration name */
	/*
	 * zone_nodename and zone_domain are never freed once allocated.
	 */
	char		*zone_nodename;	/* utsname.nodename equivalent */
	char		*zone_domain;	/* srpc_domain equivalent */
	/*
	 * zone_hostid is used for per-zone hostid emulation.
	 * Currently it isn't modified after it's set (so no locks protect
	 * accesses), but that might have to change when we allow
	 * administrators to change running zones' properties.
	 *
	 * The global zone's zone_hostid must always be HW_INVALID_HOSTID so
	 * that zone_get_hostid() will function correctly.
	 */
	uint32_t	zone_hostid;	/* zone's hostid, HW_INVALID_HOSTID */
					/* if not emulated */
	/*
	 * zone_lock protects the following fields of a zone_t:
	 * 	zone_ref
	 * 	zone_cred_ref
	 * 	zone_subsys_ref
	 * 	zone_ref_list
	 * 	zone_ntasks
	 * 	zone_flags
	 * 	zone_zsd
	 *	zone_pfexecd
	 */
	kmutex_t	zone_lock;
	/*
	 * zone_linkage is the zone's linkage into the active or
	 * death-row list.  The field is protected by zonehash_lock.
	 */
	list_node_t	zone_linkage;
	zoneid_t	zone_id;	/* ID of zone */
	zoneid_t	zone_did;	/* persistent debug ID of zone */
	uint_t		zone_ref;	/* count of zone_hold()s on zone */
	uint_t		zone_cred_ref;	/* count of zone_hold_cred()s on zone */
	/*
	 * Fixed-sized array of subsystem-specific reference counts
	 * The sum of all of the counts must be less than or equal to zone_ref.
	 * The array is indexed by the counts' subsystems' zone_ref_subsys_t
	 * constants.
	 */
	uint_t		zone_subsys_ref[ZONE_REF_NUM_SUBSYS];
	list_t		zone_ref_list;	/* list of zone_ref_t structs */
	/*
	 * zone_rootvp and zone_rootpath can never be modified once set.
	 */
	struct vnode	*zone_rootvp;	/* zone's root vnode */
	char		*zone_rootpath;	/* Path to zone's root + '/' */
	ushort_t	zone_flags;	/* misc flags */
	zone_status_t	zone_status;	/* protected by zone_status_lock */
	uint_t		zone_ntasks;	/* number of tasks executing in zone */
	kmutex_t	zone_nlwps_lock; /* protects zone_nlwps, and *_nlwps */
					/* counters in projects and tasks */
					/* that are within the zone */
	rctl_qty_t	zone_nlwps;	/* number of lwps in zone */
	rctl_qty_t	zone_nlwps_ctl; /* protected by zone_rctls->rcs_lock */
	rctl_qty_t	zone_shmmax;	/* System V shared memory usage */
	ipc_rqty_t	zone_ipc;	/* System V IPC id resource usage */

	uint_t		zone_rootpathlen; /* strlen(zone_rootpath) + 1 */
	uint32_t	zone_shares;	/* FSS shares allocated to zone */
	rctl_set_t	*zone_rctls;	/* zone-wide (zone.*) rctls */
	kmutex_t	zone_mem_lock;	/* protects zone_locked_mem and */
					/* kpd_locked_mem for all */
					/* projects in zone. */
					/* Also protects zone_max_swap */
					/* grab after p_lock, before rcs_lock */
	rctl_qty_t	zone_locked_mem;	/* bytes of locked memory in */
						/* zone */
	rctl_qty_t	zone_locked_mem_ctl;	/* Current locked memory */
						/* limit.  Protected by */
						/* zone_rctls->rcs_lock */
	rctl_qty_t	zone_max_swap; /* bytes of swap reserved by zone */
	rctl_qty_t	zone_max_swap_ctl;	/* current swap limit. */
						/* Protected by */
						/* zone_rctls->rcs_lock */
	kmutex_t	zone_rctl_lock;	/* protects zone_max_lofi */
	rctl_qty_t	zone_max_lofi; /* lofi devs for zone */
	rctl_qty_t	zone_max_lofi_ctl;	/* current lofi limit. */
						/* Protected by */
						/* zone_rctls->rcs_lock */
	list_t		zone_zsd;	/* list of Zone-Specific Data values */
	kcondvar_t	zone_cv;	/* used to signal state changes */
	struct proc	*zone_zsched;	/* Dummy kernel "zsched" process */
	pid_t		zone_proc_initpid; /* pid of "init" for this zone */
	char		*zone_initname;	/* fs path to 'init' */
	int		zone_boot_err;  /* for zone_boot() if boot fails */
	char		*zone_bootargs;	/* arguments passed via zone_boot() */
	rctl_qty_t	zone_phys_mem_ctl;	/* current phys. memory limit */
	/*
	 * zone_kthreads is protected by zone_status_lock.
	 */
	kthread_t	*zone_kthreads;	/* kernel threads in zone */
	struct priv_set	*zone_privset;	/* limit set for zone */
	/*
	 * zone_vfslist is protected by vfs_list_lock().
	 */
	struct vfs	*zone_vfslist;	/* list of FS's mounted in zone */
	uint64_t	zone_uniqid;	/* unique zone generation number */
	struct cred	*zone_kcred;	/* kcred-like, zone-limited cred */
	/*
	 * zone_pool is protected by pool_lock().
	 */
	struct pool	*zone_pool;	/* pool the zone is bound to */
	hrtime_t	zone_pool_mod;	/* last pool bind modification time */
	/* zone_psetid is protected by cpu_lock */
	psetid_t	zone_psetid;	/* pset the zone is bound to */
	/*
	 * The following two can be read without holding any locks.  They are
	 * updated under cpu_lock.
	 */
	int		zone_ncpus;  /* zone's idea of ncpus */
	int		zone_ncpus_online; /* zone's idea of ncpus_online */
	/*
	 * List of ZFS datasets exported to this zone.
	 */
	list_t		zone_datasets;	/* list of datasets */

	ts_label_t	*zone_slabel;	/* zone sensitivity label */
	int		zone_match;	/* require label match for packets */
	tsol_mlp_list_t zone_mlps;	/* MLPs on zone-private addresses */

	boolean_t	zone_restart_init;	/* Restart init if it dies? */
	struct brand	*zone_brand;		/* zone's brand */
	void 		*zone_brand_data;	/* store brand specific data */
	id_t		zone_defaultcid;	/* dflt scheduling class id */
	kstat_t		*zone_swapresv_kstat;
	kstat_t		*zone_lockedmem_kstat;
	/*
	 * zone_dl_list is protected by zone_lock
	 */
	list_t		zone_dl_list;
	netstack_t	*zone_netstack;
	struct cpucap	*zone_cpucap;	/* CPU caps data */

	/*
	 * Data and counters used for ZFS fair-share disk IO.
	 */
	rctl_qty_t	zone_zfs_io_pri;	/* ZFS IO priority */
	uint_t		zone_zfs_queued;	/* enqueued count */
	uint64_t	zone_zfs_weight;	/* used to prevent starvation */
	uint64_t	zone_io_util;		/* IO utilization metric */
	boolean_t	zone_io_util_above_avg;	/* IO util percent > avg. */
	uint16_t	zone_io_delay;		/* IO delay on logical r/w */
	kmutex_t	zone_stg_io_lock;	/* protects IO window data */
	sys_zio_cntr_t	zone_rd_ops;		/* Counters for ZFS reads, */
	sys_zio_cntr_t	zone_wr_ops;		/* writes and logical writes. */
	sys_zio_cntr_t	zone_lwr_ops;

	/*
	 * kstats and counters for VFS ops and bytes.
	 */
	kmutex_t	zone_vfs_lock;		/* protects VFS statistics */
	kstat_t		*zone_vfs_ksp;
	kstat_io_t	zone_vfs_rwstats;
	zone_vfs_kstat_t *zone_vfs_stats;

	/*
	 * kstats for ZFS I/O ops and bytes.
	 */
	kmutex_t	zone_zfs_lock;		/* protects ZFS statistics */
	kstat_t		*zone_zfs_ksp;
	kstat_io_t	zone_zfs_rwstats;
	zone_zfs_kstat_t *zone_zfs_stats;

	/*
	 * Solaris Auditing per-zone audit context
	 */
	struct au_kcontext	*zone_audit_kctxt;
	/*
	 * For private use by mntfs.
	 */
	struct mntelem	*zone_mntfs_db;
	krwlock_t	zone_mntfs_db_lock;

	struct klpd_reg		*zone_pfexecd;

	char		*zone_fs_allowed;
	rctl_qty_t	zone_nprocs;	/* number of processes in the zone */
	rctl_qty_t	zone_nprocs_ctl;	/* current limit protected by */
						/* zone_rctls->rcs_lock */
	kstat_t		*zone_nprocs_kstat;

	/*
	 * kstats and counters for physical memory capping.
	 */
	rctl_qty_t	zone_phys_mem;	/* current bytes of phys. mem. (RSS) */
	kstat_t		*zone_physmem_kstat;
	uint64_t	zone_mcap_nover;	/* # of times over phys. cap */
	uint64_t	zone_mcap_pagedout;	/* bytes of mem. paged out */
	kmutex_t	zone_mcap_lock;	/* protects mcap statistics */
	kstat_t		*zone_mcap_ksp;
	zone_mcap_kstat_t *zone_mcap_stats;
	uint64_t	zone_pgpgin;		/* pages paged in */
	uint64_t	zone_anonpgin;		/* anon pages paged in */
	uint64_t	zone_execpgin;		/* exec pages paged in */
	uint64_t	zone_fspgin;		/* fs pages paged in */
	uint64_t	zone_anon_alloc_fail;	/* cnt of anon alloc fails */

	/*
	 * Misc. kstats and counters for zone cpu-usage aggregation.
	 * The zone_Xtime values are the sum of the micro-state accounting
	 * values for all threads that are running or have run in the zone.
	 * This is tracked in msacct.c as threads change state.
	 * The zone_stime is the sum of the LMS_SYSTEM times.
	 * The zone_utime is the sum of the LMS_USER times.
	 * The zone_wtime is the sum of the LMS_WAIT_CPU times.
	 * As with per-thread micro-state accounting values, these values are
	 * not scaled to nanosecs.  The scaling is done by the
	 * zone_misc_kstat_update function when kstats are requested.
	 */
	kmutex_t	zone_misc_lock;		/* protects misc statistics */
	kstat_t		*zone_misc_ksp;
	zone_misc_kstat_t *zone_misc_stats;
	uint64_t	zone_stime;		/* total system time */
	uint64_t	zone_utime;		/* total user time */
	uint64_t	zone_wtime;		/* total time waiting in runq */

	struct loadavg_s zone_loadavg;		/* loadavg for this zone */
	uint64_t	zone_hp_avenrun[3];	/* high-precision avenrun */
	int		zone_avenrun[3];	/* FSCALED avg. run queue len */
} zone_t;

/*
 * Special value of zone_psetid to indicate that pools are disabled.
 */
#define	ZONE_PS_INVAL	PS_MYID


extern zone_t zone0;
extern zone_t *global_zone;
extern uint_t maxzones;
extern rctl_hndl_t rc_zone_nlwps;
extern rctl_hndl_t rc_zone_nprocs;

extern long zone(int, void *, void *, void *, void *);
extern void zone_zsd_init(void);
extern void zone_init(void);
extern void zone_hold(zone_t *);
extern void zone_rele(zone_t *);
extern void zone_init_ref(zone_ref_t *);
extern void zone_hold_ref(zone_t *, zone_ref_t *, zone_ref_subsys_t);
extern void zone_rele_ref(zone_ref_t *, zone_ref_subsys_t);
extern void zone_cred_hold(zone_t *);
extern void zone_cred_rele(zone_t *);
extern void zone_task_hold(zone_t *);
extern void zone_task_rele(zone_t *);
extern zone_t *zone_find_by_id(zoneid_t);
extern zone_t *zone_find_by_label(const ts_label_t *);
extern zone_t *zone_find_by_name(char *);
extern zone_t *zone_find_by_any_path(const char *, boolean_t);
extern zone_t *zone_find_by_path(const char *);
extern zoneid_t getzoneid(void);
extern zoneid_t getzonedid(void);
extern zone_t *zone_find_by_id_nolock(zoneid_t);
extern int zone_datalink_walk(zoneid_t, int (*)(datalink_id_t, void *), void *);
extern int zone_check_datalink(zoneid_t *, datalink_id_t);
extern void zone_loadavg_update();

/*
 * Zone-specific data (ZSD) APIs
 */
/*
 * The following is what code should be initializing its zone_key_t to if it
 * calls zone_getspecific() without necessarily knowing that zone_key_create()
 * has been called on the key.
 */
#define	ZONE_KEY_UNINITIALIZED	0

typedef uint_t zone_key_t;

extern void	zone_key_create(zone_key_t *, void *(*)(zoneid_t),
    void (*)(zoneid_t, void *), void (*)(zoneid_t, void *));
extern int 	zone_key_delete(zone_key_t);
extern void	*zone_getspecific(zone_key_t, zone_t *);
extern int	zone_setspecific(zone_key_t, zone_t *, const void *);

/*
 * The definition of a zsd_entry is truly private to zone.c and is only
 * placed here so it can be shared with mdb.
 *
 * State maintained for each zone times each registered key, which tracks
 * the state of the create, shutdown and destroy callbacks.
 *
 * zsd_flags is used to keep track of pending actions to avoid holding locks
 * when calling the create/shutdown/destroy callbacks, since doing so
 * could lead to deadlocks.
 */
struct zsd_entry {
	zone_key_t		zsd_key;	/* Key used to lookup value */
	void			*zsd_data;	/* Caller-managed value */
	/*
	 * Callbacks to be executed when a zone is created, shutdown, and
	 * destroyed, respectively.
	 */
	void			*(*zsd_create)(zoneid_t);
	void			(*zsd_shutdown)(zoneid_t, void *);
	void			(*zsd_destroy)(zoneid_t, void *);
	list_node_t		zsd_linkage;
	uint16_t 		zsd_flags;	/* See below */
	kcondvar_t		zsd_cv;
};

/*
 * zsd_flags
 */
#define	ZSD_CREATE_NEEDED	0x0001
#define	ZSD_CREATE_INPROGRESS	0x0002
#define	ZSD_CREATE_COMPLETED	0x0004
#define	ZSD_SHUTDOWN_NEEDED	0x0010
#define	ZSD_SHUTDOWN_INPROGRESS	0x0020
#define	ZSD_SHUTDOWN_COMPLETED	0x0040
#define	ZSD_DESTROY_NEEDED	0x0100
#define	ZSD_DESTROY_INPROGRESS	0x0200
#define	ZSD_DESTROY_COMPLETED	0x0400

#define	ZSD_CREATE_ALL	\
	(ZSD_CREATE_NEEDED|ZSD_CREATE_INPROGRESS|ZSD_CREATE_COMPLETED)
#define	ZSD_SHUTDOWN_ALL	\
	(ZSD_SHUTDOWN_NEEDED|ZSD_SHUTDOWN_INPROGRESS|ZSD_SHUTDOWN_COMPLETED)
#define	ZSD_DESTROY_ALL	\
	(ZSD_DESTROY_NEEDED|ZSD_DESTROY_INPROGRESS|ZSD_DESTROY_COMPLETED)

#define	ZSD_ALL_INPROGRESS \
	(ZSD_CREATE_INPROGRESS|ZSD_SHUTDOWN_INPROGRESS|ZSD_DESTROY_INPROGRESS)

/*
 * Macros to help with zone visibility restrictions.
 */

/*
 * Is process in the global zone?
 */
#define	INGLOBALZONE(p) \
	((p)->p_zone == global_zone)

/*
 * Can process view objects in given zone?
 */
#define	HASZONEACCESS(p, zoneid) \
	((p)->p_zone->zone_id == (zoneid) || INGLOBALZONE(p))

/*
 * Convenience macro to see if a resolved path is visible from within a
 * given zone.
 *
 * The basic idea is that the first (zone_rootpathlen - 1) bytes of the
 * two strings must be equal.  Since the rootpathlen has a trailing '/',
 * we want to skip everything in the path up to (but not including) the
 * trailing '/'.
 */
#define	ZONE_PATH_VISIBLE(path, zone) \
	(strncmp((path), (zone)->zone_rootpath,		\
	    (zone)->zone_rootpathlen - 1) == 0)

/*
 * Convenience macro to go from the global view of a path to that seen
 * from within said zone.  It is the responsibility of the caller to
 * ensure that the path is a resolved one (ie, no '..'s or '.'s), and is
 * in fact visible from within the zone.
 */
#define	ZONE_PATH_TRANSLATE(path, zone)	\
	(ASSERT(ZONE_PATH_VISIBLE(path, zone)),	\
	(path) + (zone)->zone_rootpathlen - 2)

/*
 * Special processes visible in all zones.
 */
#define	ZONE_SPECIALPID(x)	 ((x) == 0 || (x) == 1)

/*
 * Zone-safe version of thread_create() to be used when the caller wants to
 * create a kernel thread to run within the current zone's context.
 */
extern kthread_t *zthread_create(caddr_t, size_t, void (*)(), void *, size_t,
    pri_t);
extern void zthread_exit(void);

/*
 * Functions for an external observer to register interest in a zone's status
 * change.  Observers will be woken up when the zone status equals the status
 * argument passed in (in the case of zone_status_timedwait, the function may
 * also return because of a timeout; zone_status_wait_sig may return early due
 * to a signal being delivered; zone_status_timedwait_sig may return for any of
 * the above reasons).
 *
 * Otherwise these behave identically to cv_timedwait(), cv_wait(), and
 * cv_wait_sig() respectively.
 */
extern clock_t zone_status_timedwait(zone_t *, clock_t, zone_status_t);
extern clock_t zone_status_timedwait_sig(zone_t *, clock_t, zone_status_t);
extern void zone_status_wait(zone_t *, zone_status_t);
extern int zone_status_wait_sig(zone_t *, zone_status_t);

/*
 * Get the status  of the zone (at the time it was called).  The state may
 * have progressed by the time it is returned.
 */
extern zone_status_t zone_status_get(zone_t *);

/*
 * Safely get the hostid of the specified zone (defaults to machine's hostid
 * if the specified zone doesn't emulate a hostid).  Passing NULL retrieves
 * the global zone's (i.e., physical system's) hostid.
 */
extern uint32_t zone_get_hostid(zone_t *);

/*
 * Get the "kcred" credentials corresponding to the given zone.
 */
extern struct cred *zone_get_kcred(zoneid_t);

/*
 * Get/set the pool the zone is currently bound to.
 */
extern struct pool *zone_pool_get(zone_t *);
extern void zone_pool_set(zone_t *, struct pool *);

/*
 * Get/set the pset the zone is currently using.
 */
extern psetid_t zone_pset_get(zone_t *);
extern void zone_pset_set(zone_t *, psetid_t);

/*
 * Get the number of cpus/online-cpus visible from the given zone.
 */
extern int zone_ncpus_get(zone_t *);
extern int zone_ncpus_online_get(zone_t *);

/*
 * Returns true if the named pool/dataset is visible in the current zone.
 */
extern int zone_dataset_visible(const char *, int *);

/*
 * zone version of kadmin()
 */
extern int zone_kadmin(int, int, const char *, cred_t *);
extern void zone_shutdown_global(void);

extern void mount_in_progress(void);
extern void mount_completed(void);

extern int zone_walk(int (*)(zone_t *, void *), void *);

extern rctl_hndl_t rc_zone_locked_mem;
extern rctl_hndl_t rc_zone_max_swap;
extern rctl_hndl_t rc_zone_phys_mem;
extern rctl_hndl_t rc_zone_max_lofi;

#endif	/* _KERNEL */

#ifdef	__cplusplus
}
#endif

#endif	/* _SYS_ZONE_H */