diff options
Diffstat (limited to 'usr/src/uts/common/sys/zone.h')
-rw-r--r-- | usr/src/uts/common/sys/zone.h | 188 |
1 files changed, 171 insertions, 17 deletions
diff --git a/usr/src/uts/common/sys/zone.h b/usr/src/uts/common/sys/zone.h index 27f52c57e2..e871689f5f 100644 --- a/usr/src/uts/common/sys/zone.h +++ b/usr/src/uts/common/sys/zone.h @@ -20,9 +20,9 @@ */ /* * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2015 Joyent, Inc. All rights reserved. * Copyright 2014 Nexenta Systems, Inc. All rights reserved. * Copyright 2014 Igor Kozhukhov <ikozhukhov@gmail.com>. + * Copyright 2018, Joyent, Inc. */ #ifndef _SYS_ZONE_H @@ -51,15 +51,27 @@ extern "C" { * NOTE * * The contents of this file are private to the implementation of - * Solaris and are subject to change at any time without notice. + * illumos and are subject to change at any time without notice. * Applications and drivers using these interfaces may fail to * run on future releases. */ /* Available both in kernel and for user space */ -/* zone id restrictions and special ids */ -#define MAX_ZONEID 9999 +/* + * zone id restrictions and special ids. + * See 'maxzones' for run-time zone limit. + * + * The current 8k value for MAX_ZONES was originally derived from the virtual + * interface limit in IP when "shared-stack" was the only supported networking + * for zones. The virtual interface limit is the number of addresses allowed + * on an interface (see MAX_ADDRS_PER_IF). Even with exclusive stacks, an 8k + * zone limit is still a reasonable choice at this time, given other limits + * within the kernel. Since we only support 8192 zones (which includes GZ), + * there is no point in allowing MAX_ZONEID > 8k. + */ +#define MAX_ZONES 8192 +#define MAX_ZONEID (MAX_ZONES - 1) #define MIN_USERZONEID 1 /* lowest user-creatable zone ID */ #define MIN_ZONEID 0 /* minimum zone ID on system */ #define GLOBAL_ZONEID 0 @@ -98,14 +110,18 @@ extern "C" { #define ZONE_ATTR_INITNAME 9 #define ZONE_ATTR_BOOTARGS 10 #define ZONE_ATTR_BRAND 11 -#define ZONE_ATTR_PHYS_MCAP 12 -#define ZONE_ATTR_SCHED_CLASS 13 -#define ZONE_ATTR_FLAGS 14 -#define ZONE_ATTR_HOSTID 15 -#define ZONE_ATTR_FS_ALLOWED 16 -#define ZONE_ATTR_NETWORK 17 -#define ZONE_ATTR_INITNORESTART 20 +#define ZONE_ATTR_SCHED_CLASS 12 +#define ZONE_ATTR_FLAGS 13 +#define ZONE_ATTR_HOSTID 14 +#define ZONE_ATTR_FS_ALLOWED 15 +#define ZONE_ATTR_NETWORK 16 +#define ZONE_ATTR_DID 17 +#define ZONE_ATTR_INITNORESTART 18 +#define ZONE_ATTR_APP_SVC_CT 19 +#define ZONE_ATTR_SCHED_FIXEDHI 20 #define ZONE_ATTR_SECFLAGS 21 +#define ZONE_ATTR_INITRESTART0 22 +#define ZONE_ATTR_INITREBOOT 23 /* Start of the brand-specific attribute namespace */ #define ZONE_ATTR_BRAND_ATTRS 32768 @@ -186,6 +202,7 @@ typedef struct { uint32_t doi; /* DOI for label */ caddr32_t label; /* label associated with zone */ int flags; + zoneid_t zoneid; /* requested zoneid */ } zone_def32; #endif typedef struct { @@ -202,6 +219,7 @@ typedef struct { uint32_t doi; /* DOI for label */ const bslabel_t *label; /* label associated with zone */ int flags; + zoneid_t zoneid; /* requested zoneid */ } zone_def; /* extended error information */ @@ -246,9 +264,12 @@ typedef enum zone_cmd { typedef struct zone_cmd_arg { uint64_t uniqid; /* unique "generation number" */ zone_cmd_t cmd; /* requested action */ - uint32_t _pad; /* need consistent 32/64 bit alignmt */ + int status; /* init status on shutdown */ + uint32_t debug; /* enable brand hook debug */ char locale[MAXPATHLEN]; /* locale in which to render messages */ char bootbuf[BOOTARGS_MAX]; /* arguments passed to zone_boot() */ + /* Needed for 32/64 zoneadm -> zoneadmd door arg size check. */ + int pad; } zone_cmd_arg_t; /* @@ -374,7 +395,7 @@ typedef struct zone_dataset { } zone_dataset_t; /* - * structure for zone kstats + * structure for rctl zone kstats */ typedef struct zone_kstat { kstat_named_t zk_zonename; @@ -385,12 +406,57 @@ typedef struct zone_kstat { struct cpucap; typedef struct { + hrtime_t cycle_start; + uint_t cycle_cnt; + hrtime_t zone_avg_cnt; +} sys_zio_cntr_t; + +typedef struct { + kstat_named_t zv_zonename; + kstat_named_t zv_nread; + kstat_named_t zv_reads; + kstat_named_t zv_rtime; + kstat_named_t zv_rlentime; + kstat_named_t zv_rcnt; + kstat_named_t zv_nwritten; + kstat_named_t zv_writes; + kstat_named_t zv_wtime; + kstat_named_t zv_wlentime; + kstat_named_t zv_wcnt; + kstat_named_t zv_10ms_ops; + kstat_named_t zv_100ms_ops; + kstat_named_t zv_1s_ops; + kstat_named_t zv_10s_ops; + kstat_named_t zv_delay_cnt; + kstat_named_t zv_delay_time; +} zone_vfs_kstat_t; + +typedef struct { + kstat_named_t zz_zonename; + kstat_named_t zz_nread; + kstat_named_t zz_reads; + kstat_named_t zz_rtime; + kstat_named_t zz_rlentime; + kstat_named_t zz_nwritten; + kstat_named_t zz_writes; + kstat_named_t zz_waittime; +} zone_zfs_kstat_t; + +typedef struct { kstat_named_t zm_zonename; + kstat_named_t zm_rss; + kstat_named_t zm_phys_cap; + kstat_named_t zm_swap; + kstat_named_t zm_swap_cap; + kstat_named_t zm_nover; + kstat_named_t zm_pagedout; kstat_named_t zm_pgpgin; kstat_named_t zm_anonpgin; kstat_named_t zm_execpgin; kstat_named_t zm_fspgin; kstat_named_t zm_anon_alloc_fail; + kstat_named_t zm_pf_throttle; + kstat_named_t zm_pf_throttle_usec; } zone_mcap_kstat_t; typedef struct { @@ -405,6 +471,7 @@ typedef struct { kstat_named_t zm_ffnoproc; kstat_named_t zm_ffnomem; kstat_named_t zm_ffmisc; + kstat_named_t zm_mfseglim; kstat_named_t zm_nested_intp; kstat_named_t zm_init_pid; kstat_named_t zm_boot_time; @@ -449,6 +516,7 @@ typedef struct zone { */ list_node_t zone_linkage; zoneid_t zone_id; /* ID of zone */ + zoneid_t zone_did; /* persistent debug ID of zone */ uint_t zone_ref; /* count of zone_hold()s on zone */ uint_t zone_cred_ref; /* count of zone_hold_cred()s on zone */ /* @@ -501,10 +569,10 @@ typedef struct zone { kcondvar_t zone_cv; /* used to signal state changes */ struct proc *zone_zsched; /* Dummy kernel "zsched" process */ pid_t zone_proc_initpid; /* pid of "init" for this zone */ - char *zone_initname; /* fs path to 'init' */ + char *zone_initname; /* fs path to 'init' */ + int zone_init_status; /* init's exit status */ int zone_boot_err; /* for zone_boot() if boot fails */ char *zone_bootargs; /* arguments passed via zone_boot() */ - uint64_t zone_phys_mcap; /* physical memory cap */ /* * zone_kthreads is protected by zone_status_lock. */ @@ -542,9 +610,13 @@ typedef struct zone { tsol_mlp_list_t zone_mlps; /* MLPs on zone-private addresses */ boolean_t zone_restart_init; /* Restart init if it dies? */ + boolean_t zone_reboot_on_init_exit; /* Reboot if init dies? */ + boolean_t zone_restart_init_0; /* Restart only if it exits 0 */ + boolean_t zone_setup_app_contract; /* setup contract? */ struct brand *zone_brand; /* zone's brand */ void *zone_brand_data; /* store brand specific data */ id_t zone_defaultcid; /* dflt scheduling class id */ + boolean_t zone_fixed_hipri; /* fixed sched. hi prio */ kstat_t *zone_swapresv_kstat; kstat_t *zone_lockedmem_kstat; /* @@ -553,8 +625,24 @@ typedef struct zone { list_t zone_dl_list; netstack_t *zone_netstack; struct cpucap *zone_cpucap; /* CPU caps data */ + /* - * Solaris Auditing per-zone audit context + * kstats and counters for VFS ops and bytes. + */ + kmutex_t zone_vfs_lock; /* protects VFS statistics */ + kstat_t *zone_vfs_ksp; + kstat_io_t zone_vfs_rwstats; + zone_vfs_kstat_t *zone_vfs_stats; + + /* + * kstats for ZFS I/O ops and bytes. + */ + kmutex_t zone_zfs_lock; /* protects ZFS statistics */ + kstat_t *zone_zfs_ksp; + zone_zfs_kstat_t *zone_zfs_stats; + + /* + * illumos Auditing per-zone audit context */ struct au_kcontext *zone_audit_kctxt; /* @@ -571,7 +659,11 @@ typedef struct zone { /* zone_rctls->rcs_lock */ kstat_t *zone_nprocs_kstat; - kmutex_t zone_mcap_lock; /* protects mcap statistics */ + /* + * kstats and counters for physical memory capping. + */ + kstat_t *zone_physmem_kstat; + kmutex_t zone_mcap_lock; /* protects mcap statistics */ kstat_t *zone_mcap_ksp; zone_mcap_kstat_t *zone_mcap_stats; uint64_t zone_pgpgin; /* pages paged in */ @@ -606,6 +698,8 @@ typedef struct zone { uint32_t zone_ffnomem; /* as_dup/memory error */ uint32_t zone_ffmisc; /* misc. other error */ + uint32_t zone_mfseglim; /* map failure (# segs limit) */ + uint32_t zone_nested_intp; /* nested interp. kstat */ struct loadavg_s zone_loadavg; /* loadavg for this zone */ @@ -633,6 +727,53 @@ typedef struct zone { } zone_t; /* + * Data and counters used for ZFS fair-share disk IO. + */ +typedef struct zone_zfs_io { + uint16_t zpers_zfs_io_pri; /* ZFS IO priority - 16k max */ + uint_t zpers_zfs_queued[2]; /* sync I/O enqueued count */ + sys_zio_cntr_t zpers_rd_ops; /* Counters for ZFS reads, */ + sys_zio_cntr_t zpers_wr_ops; /* writes, and */ + sys_zio_cntr_t zpers_lwr_ops; /* logical writes. */ + kstat_io_t zpers_zfs_rwstats; + uint64_t zpers_io_util; /* IO utilization metric */ + uint64_t zpers_zfs_rd_waittime; + uint8_t zpers_io_delay; /* IO delay on logical r/w */ + uint8_t zpers_zfs_weight; /* used to prevent starvation */ + uint8_t zpers_io_util_above_avg; /* IO util percent > avg. */ +} zone_zfs_io_t; + +/* + * "Persistent" zone data which can be accessed idependently of the zone_t. + */ +typedef struct zone_persist { + kmutex_t zpers_zfs_lock; /* Protects zpers_zfsp references */ + zone_zfs_io_t *zpers_zfsp; /* ZFS fair-share IO data */ + uint8_t zpers_over; /* currently over cap */ + uint32_t zpers_pg_cnt; /* current RSS in pages */ + uint32_t zpers_pg_limit; /* current RRS limit in pages */ + uint32_t zpers_nover; /* # of times over phys. cap */ +#ifndef DEBUG + uint64_t zpers_pg_out; /* # pages flushed */ +#else + /* + * To conserve memory, some detailed kstats are only kept for DEBUG + * builds. + */ + uint64_t zpers_zfs_rd_waittime; + + uint64_t zpers_pg_anon; /* # clean anon pages flushed */ + uint64_t zpers_pg_anondirty; /* # dirty anon pages flushed */ + uint64_t zpers_pg_fs; /* # clean fs pages flushed */ + uint64_t zpers_pg_fsdirty; /* # dirty fs pages flushed */ +#endif +} zone_persist_t; + +typedef enum zone_pageout_op { + ZPO_DIRTY, ZPO_FS, ZPO_ANON, ZPO_ANONDIRTY +} zone_pageout_op_t; + +/* * Special value of zone_psetid to indicate that pools are disabled. */ #define ZONE_PS_INVAL PS_MYID @@ -662,6 +803,7 @@ extern zone_t *zone_find_by_name(char *); extern zone_t *zone_find_by_any_path(const char *, boolean_t); extern zone_t *zone_find_by_path(const char *); extern zoneid_t getzoneid(void); +extern zoneid_t getzonedid(void); extern zone_t *zone_find_by_id_nolock(zoneid_t); extern int zone_datalink_walk(zoneid_t, int (*)(datalink_id_t, void *), void *); extern int zone_check_datalink(zoneid_t *, datalink_id_t); @@ -842,6 +984,7 @@ extern int zone_ncpus_online_get(zone_t *); * Returns true if the named pool/dataset is visible in the current zone. */ extern int zone_dataset_visible(const char *, int *); +extern int zone_dataset_visible_inzone(zone_t *, const char *, int *); /* * zone version of kadmin() @@ -854,8 +997,19 @@ extern void mount_completed(zone_t *); extern int zone_walk(int (*)(zone_t *, void *), void *); +struct page; +extern void zone_add_page(struct page *); +extern void zone_rm_page(struct page *); +extern void zone_pageout_stat(int, zone_pageout_op_t); +extern void zone_get_physmem_data(int, pgcnt_t *, pgcnt_t *); + +/* Interfaces for page scanning */ +extern uint_t zone_num_over_cap; +extern zone_persist_t zone_pdata[MAX_ZONES]; + extern rctl_hndl_t rc_zone_locked_mem; extern rctl_hndl_t rc_zone_max_swap; +extern rctl_hndl_t rc_zone_phys_mem; extern rctl_hndl_t rc_zone_max_lofi; #endif /* _KERNEL */ |