diff options
Diffstat (limited to 'usr/src/uts/common/sys')
86 files changed, 1913 insertions, 146 deletions
diff --git a/usr/src/uts/common/sys/Makefile b/usr/src/uts/common/sys/Makefile index 70c8342377..94922f459b 100644 --- a/usr/src/uts/common/sys/Makefile +++ b/usr/src/uts/common/sys/Makefile @@ -259,6 +259,7 @@ CHKHDRS= \ flock.h \ flock_impl.h \ fork.h \ + frameio.h \ fss.h \ fsspriocntl.h \ fsid.h \ @@ -284,6 +285,7 @@ CHKHDRS= \ idmap.h \ ieeefp.h \ id_space.h \ + inotify.h \ instance.h \ int_const.h \ int_fmtio.h \ @@ -496,6 +498,7 @@ CHKHDRS= \ rctl_impl.h \ rds.h \ reboot.h \ + refhash.h \ refstr.h \ refstr_impl.h \ resource.h \ @@ -661,6 +664,8 @@ CHKHDRS= \ vmem.h \ vmem_impl.h \ vmsystm.h \ + vnd.h \ + vnd_errno.h \ vnic.h \ vnic_impl.h \ vnode.h \ @@ -678,6 +683,7 @@ CHKHDRS= \ watchpoint.h \ winlockio.h \ zcons.h \ + zfd.h \ zone.h \ xti_inet.h \ xti_osi.h \ @@ -839,14 +845,14 @@ FSHDRS= \ autofs.h \ decomp.h \ dv_node.h \ - sdev_impl.h \ - sdev_plugin.h \ fifonode.h \ hsfs_isospec.h \ hsfs_node.h \ hsfs_rrip.h \ hsfs_spec.h \ hsfs_susp.h \ + hyprlofs.h \ + hyprlofs_info.h \ lofs_info.h \ lofs_node.h \ mntdata.h \ @@ -856,6 +862,8 @@ FSHDRS= \ pc_label.h \ pc_node.h \ pxfs_ki.h \ + sdev_impl.h \ + sdev_plugin.h \ snode.h \ swapnode.h \ tmp.h \ diff --git a/usr/src/uts/common/sys/acct.h b/usr/src/uts/common/sys/acct.h index f00884681b..e01ad61025 100644 --- a/usr/src/uts/common/sys/acct.h +++ b/usr/src/uts/common/sys/acct.h @@ -22,6 +22,7 @@ /* * Copyright 2014 Garrett D'Amore <garrett@damore.org> * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2017 Joyent, Inc. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -88,7 +89,7 @@ extern int acct(const char *); #if defined(_KERNEL) -void acct(char); +void acct(int); int sysacct(char *); struct vnode; diff --git a/usr/src/uts/common/sys/auxv.h b/usr/src/uts/common/sys/auxv.h index e591320025..203b884cd3 100644 --- a/usr/src/uts/common/sys/auxv.h +++ b/usr/src/uts/common/sys/auxv.h @@ -79,6 +79,9 @@ typedef struct { #define AT_FLAGS 8 /* processor flags */ #define AT_ENTRY 9 /* a.out entry point */ +/* First introduced on Linux */ +#define AT_RANDOM 25 /* address of 16 random bytes */ + /* * These relate to the original PPC ABI document; Linux reused * the values for other things (see below), so disambiguation of @@ -91,19 +94,18 @@ typedef struct { * These are the values from LSB 1.3, the first five are also described * in the draft amd64 ABI. * - * At the time of writing, Solaris doesn't place any of these values into - * the aux vector, except AT_CLKTCK which is placed on the aux vector for - * lx branded processes; also, we do similar things via AT_SUN_ values. + * At the time of writing, illumos doesn't place any of these values into the + * aux vector, except where noted. We do similar things via AT_SUN_ values. * * AT_NOTELF 10 program is not ELF? - * AT_UID 11 real user id - * AT_EUID 12 effective user id - * AT_GID 13 real group id - * AT_EGID 14 effective group id + * AT_UID 11 real user id (provided in LX) + * AT_EUID 12 effective user id (provided in LX) + * AT_GID 13 real group id (provided in LX) + * AT_EGID 14 effective group id (provided in LX) * * AT_PLATFORM 15 * AT_HWCAP 16 - * AT_CLKTCK 17 c.f. _SC_CLK_TCK + * AT_CLKTCK 17 c.f. _SC_CLK_TCK (provided in LX) * AT_FPUCW 18 * * AT_DCACHEBSIZE 19 (moved from 10) @@ -111,6 +113,16 @@ typedef struct { * AT_UCACHEBSIZE 21 (moved from 12) * * AT_IGNOREPPC 22 + * + * On Linux: + * AT_* values 18 through 22 are reserved + * AT_SECURE 23 secure mode boolean (provided in LX) + * AT_BASE_PLATFORM 24 string identifying real platform, may + * differ from AT_PLATFORM. + * AT_HWCAP2 26 extension of AT_HWCAP + * AT_EXECFN 31 filename of program + * AT_SYSINFO 32 + * AT_SYSINFO_EHDR 33 The vDSO location */ /* @@ -190,6 +202,8 @@ extern uint_t getisax(uint32_t *, uint_t); #define AT_SUN_BRAND_AUX1 2020 #define AT_SUN_BRAND_AUX2 2021 #define AT_SUN_BRAND_AUX3 2022 +#define AT_SUN_BRAND_AUX4 2025 +#define AT_SUN_BRAND_NROOT 2024 /* * Aux vector for comm page diff --git a/usr/src/uts/common/sys/brand.h b/usr/src/uts/common/sys/brand.h index a2feda573d..bace1142f9 100644 --- a/usr/src/uts/common/sys/brand.h +++ b/usr/src/uts/common/sys/brand.h @@ -103,28 +103,106 @@ struct brand_mach_ops; struct intpdata; struct execa; +/* + * Common structure to define hooks for brand operation. + * + * Required Fields: + * b_init_brand_data - Setup zone brand data during zone_setbrand + * b_free_brand_data - Free zone brand data during zone_destroy + * b_brandsys - Syscall handler for brandsys + * b_setbrand - Initialize process brand data + * b_getattr - Get brand-custom zone attribute + * b_setattr - Set brand-custom zone attribute + * b_copy_procdata - Copy process brand data during fork + * b_proc_exit - Perform process brand exit processing + * b_exec - Reset branded process state on exec + * b_lwp_setrval - Set return code for forked child + * b_initlwp - Initialize lwp brand data (cannot drop p->p_lock) + * b_forklwp - Copy lwp brand data during fork + * b_freelwp - Free lwp brand data + * b_lwpexit - Perform lwp-specific brand exit processing + * b_elfexec - Load and execute ELF binary + * b_sigset_native_to_brand - Convert sigset native->brand + * b_sigset_brand_to_native - Convert sigset brand->native + * b_nsig - Maxiumum signal number + * b_sendsig - Update process state after sendsig + * + * Optional Fields: + * b_lwpdata_alloc - Speculatively allocate data for use in b_initlwp + * b_lwpdata_free - Free data from allocated by b_lwpdata_alloc if errors occur + * during lwp creation before b_initlwp could be called. + * b_initlwp_post - Complete lwp branding (can temporarily drop p->p_lock) + * b_exit_with_sig - Instead of sending SIGCLD, exit with custom behavior + * b_psig_to_proc - Custom additional behavior during psig + * b_wait_filter - Filter processes from being matched by waitid + * b_native_exec - Provide interpreter path prefix for executables + * b_ptrace_exectrap - Custom behavior for legacy ptrace traps + * b_map32limit - Specify alternate limit for MAP_32BIT mappings + * b_stop_notify - Hook process stop events + * b_waitid_helper - Generate synthetic results for waitid + * b_sigcld_repost - Post synthetic SIGCLD signals + * b_issig_stop - Alter/suppress signal delivery during issig + * b_sig_ignorable - Disallow discarding of signals + * b_savecontext - Alter context during savecontext + * b_restorecontext - Alter context during restorecontext + * b_sendsig_stack - Override stack used for signal delivery + * b_setid_clear - Override setid_clear behavior + * b_pagefault - Trap pagefault events + * b_intp_parse_arg - Controls interpreter argument handling (allow 1 or all) + * b_clearbrand - Perform any actions necessary when clearing the brand. + * b_rpc_statd - Upcall to rpc.statd running within the zone + * b_acct_out - Output properly formatted accounting record + */ struct brand_ops { - void (*b_init_brand_data)(zone_t *); + void (*b_init_brand_data)(zone_t *, kmutex_t *); void (*b_free_brand_data)(zone_t *); int (*b_brandsys)(int, int64_t *, uintptr_t, uintptr_t, uintptr_t, - uintptr_t, uintptr_t, uintptr_t); + uintptr_t); void (*b_setbrand)(struct proc *); int (*b_getattr)(zone_t *, int, void *, size_t *); int (*b_setattr)(zone_t *, int, void *, size_t); void (*b_copy_procdata)(struct proc *, struct proc *); - void (*b_proc_exit)(struct proc *, klwp_t *); + void (*b_proc_exit)(struct proc *); void (*b_exec)(); void (*b_lwp_setrval)(klwp_t *, int, int); - int (*b_initlwp)(klwp_t *); + void *(*b_lwpdata_alloc)(struct proc *); + void (*b_lwpdata_free)(void *); + void (*b_initlwp)(klwp_t *, void *); + void (*b_initlwp_post)(klwp_t *); void (*b_forklwp)(klwp_t *, klwp_t *); void (*b_freelwp)(klwp_t *); void (*b_lwpexit)(klwp_t *); int (*b_elfexec)(struct vnode *, struct execa *, struct uarg *, - struct intpdata *, int, size_t *, int, caddr_t, struct cred *, - int); + struct intpdata *, int, size_t *, int, caddr_t, struct cred *, + int *); void (*b_sigset_native_to_brand)(sigset_t *); void (*b_sigset_brand_to_native)(sigset_t *); + void (*b_sigfd_translate)(k_siginfo_t *); int b_nsig; + void (*b_exit_with_sig)(proc_t *, sigqueue_t *); + boolean_t (*b_wait_filter)(proc_t *, proc_t *); + boolean_t (*b_native_exec)(uint8_t, const char **); + uint32_t (*b_map32limit)(proc_t *); + void (*b_stop_notify)(proc_t *, klwp_t *, ushort_t, ushort_t); + int (*b_waitid_helper)(idtype_t, id_t, k_siginfo_t *, int, + boolean_t *, int *); + int (*b_sigcld_repost)(proc_t *, sigqueue_t *); + int (*b_issig_stop)(proc_t *, klwp_t *); + boolean_t (*b_sig_ignorable)(proc_t *, klwp_t *, int); + void (*b_savecontext)(ucontext_t *); +#if defined(_SYSCALL32_IMPL) + void (*b_savecontext32)(ucontext32_t *); +#endif + void (*b_restorecontext)(ucontext_t *); + caddr_t (*b_sendsig_stack)(int); + void (*b_sendsig)(int); + int (*b_setid_clear)(vattr_t *vap, cred_t *cr); + int (*b_pagefault)(proc_t *, klwp_t *, caddr_t, enum fault_type, + enum seg_rw); + boolean_t b_intp_parse_arg; + void (*b_clearbrand)(proc_t *, boolean_t); + void (*b_rpc_statd)(int, void *, void *); + void (*b_acct_out)(struct vnode *, int); }; /* @@ -135,6 +213,7 @@ typedef struct brand { char *b_name; struct brand_ops *b_ops; struct brand_mach_ops *b_machops; + size_t b_data_size; } brand_t; extern brand_t native_brand; @@ -165,7 +244,7 @@ extern brand_t *brand_register_zone(struct brand_attr *); extern brand_t *brand_find_name(char *); extern void brand_unregister_zone(brand_t *); extern int brand_zone_count(brand_t *); -extern void brand_setbrand(proc_t *); +extern int brand_setbrand(proc_t *, boolean_t); extern void brand_clearbrand(proc_t *, boolean_t); /* @@ -178,17 +257,16 @@ extern int brand_solaris_cmd(int, uintptr_t, uintptr_t, uintptr_t, extern void brand_solaris_copy_procdata(proc_t *, proc_t *, struct brand *); extern int brand_solaris_elfexec(vnode_t *, execa_t *, uarg_t *, - intpdata_t *, int, size_t *, int, caddr_t, cred_t *, int, - struct brand *, char *, char *, char *, char *, char *); + intpdata_t *, int, size_t *, int, caddr_t, cred_t *, int *, + struct brand *, char *, char *, char *); extern void brand_solaris_exec(struct brand *); extern int brand_solaris_fini(char **, struct modlinkage *, struct brand *); extern void brand_solaris_forklwp(klwp_t *, klwp_t *, struct brand *); extern void brand_solaris_freelwp(klwp_t *, struct brand *); -extern int brand_solaris_initlwp(klwp_t *, struct brand *); +extern void brand_solaris_initlwp(klwp_t *, struct brand *); extern void brand_solaris_lwpexit(klwp_t *, struct brand *); -extern void brand_solaris_proc_exit(struct proc *, klwp_t *, - struct brand *); +extern void brand_solaris_proc_exit(struct proc *, struct brand *); extern void brand_solaris_setbrand(proc_t *, struct brand *); #if defined(_SYSCALL32) diff --git a/usr/src/uts/common/sys/buf.h b/usr/src/uts/common/sys/buf.h index e20e0e0c35..b6b5c20e44 100644 --- a/usr/src/uts/common/sys/buf.h +++ b/usr/src/uts/common/sys/buf.h @@ -21,6 +21,7 @@ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2012 Joyent, Inc. All rights reserved. * * Copyright 2017 RackTop Systems. */ @@ -188,6 +189,7 @@ struct biostats { #define B_STARTED 0x2000000 /* io:::start probe called for buf */ #define B_ABRWRITE 0x4000000 /* Application based recovery active */ #define B_PAGE_NOWAIT 0x8000000 /* Skip the page if it is locked */ +#define B_INVALCURONLY 0x10000000 /* invalidate only for curproc */ /* * There is some confusion over the meaning of B_FREE and B_INVAL and what @@ -200,6 +202,12 @@ struct biostats { * between the sole use of these two flags. In both cases, IO will be done * if the page is not yet committed to storage. * + * The B_INVALCURONLY flag modifies the behavior of the B_INVAL flag and is + * intended to be used in conjunction with B_INVAL. B_INVALCURONLY has no + * meaning on its own. When both B_INVALCURONLY and B_INVAL are set, then + * the mapping for the page is only invalidated for the current process. + * In this case, the page is not destroyed unless this was the final mapping. + * * In order to discard pages without writing them back, (B_INVAL | B_TRUNC) * should be used. * diff --git a/usr/src/uts/common/sys/contract/process.h b/usr/src/uts/common/sys/contract/process.h index 21cf94dcf9..2c70d7c9f1 100644 --- a/usr/src/uts/common/sys/contract/process.h +++ b/usr/src/uts/common/sys/contract/process.h @@ -21,13 +21,12 @@ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2015 Joyent, Inc. */ #ifndef _SYS_CONTRACT_PROCESS_H #define _SYS_CONTRACT_PROCESS_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/contract.h> #include <sys/time.h> @@ -55,7 +54,8 @@ typedef struct cont_process cont_process_t; #define CT_PR_NOORPHAN 0x2 /* kill when contract is abandoned */ #define CT_PR_PGRPONLY 0x4 /* only kill process group on fatal errors */ #define CT_PR_REGENT 0x8 /* automatically detach inherited contracts */ -#define CT_PR_ALLPARAM 0xf +#define CT_PR_KEEP_EXEC 0x10 /* preserve template accross exec */ +#define CT_PR_ALLPARAM 0x1f /* * ctr_ev_* flags diff --git a/usr/src/uts/common/sys/cpucaps.h b/usr/src/uts/common/sys/cpucaps.h index 6063ff4380..6bc042108c 100644 --- a/usr/src/uts/common/sys/cpucaps.h +++ b/usr/src/uts/common/sys/cpucaps.h @@ -22,6 +22,7 @@ /* * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2011, 2012, Joyent, Inc. All rights reserved. */ #ifndef _SYS_CPUCAPS_H @@ -84,12 +85,16 @@ extern void cpucaps_zone_remove(zone_t *); */ extern int cpucaps_project_set(kproject_t *, rctl_qty_t); extern int cpucaps_zone_set(zone_t *, rctl_qty_t); +extern int cpucaps_zone_set_base(zone_t *, rctl_qty_t); +extern int cpucaps_zone_set_burst_time(zone_t *, rctl_qty_t); /* * Get current CPU usage for a project/zone. */ extern rctl_qty_t cpucaps_project_get(kproject_t *); extern rctl_qty_t cpucaps_zone_get(zone_t *); +extern rctl_qty_t cpucaps_zone_get_base(zone_t *); +extern rctl_qty_t cpucaps_zone_get_burst_time(zone_t *); /* * Scheduling class hooks into CPU caps framework. diff --git a/usr/src/uts/common/sys/cpucaps_impl.h b/usr/src/uts/common/sys/cpucaps_impl.h index 95afd21827..2cd4ed644d 100644 --- a/usr/src/uts/common/sys/cpucaps_impl.h +++ b/usr/src/uts/common/sys/cpucaps_impl.h @@ -22,6 +22,7 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2011, 2012, Joyent, Inc. All rights reserved. */ #ifndef _SYS_CPUCAPS_IMPL_H @@ -66,8 +67,12 @@ typedef struct cpucap { waitq_t cap_waitq; /* waitq for capped threads */ kstat_t *cap_kstat; /* cpucaps specific kstat */ int64_t cap_gen; /* zone cap specific */ + hrtime_t cap_chk_value; /* effective CPU usage cap */ hrtime_t cap_value; /* scaled CPU usage cap */ hrtime_t cap_usage; /* current CPU usage */ + hrtime_t cap_base; /* base CPU for burst */ + u_longlong_t cap_burst_limit; /* max secs (in tics) for a burst */ + u_longlong_t cap_bursting; /* # of ticks currently bursting */ disp_lock_t cap_usagelock; /* protects cap_usage above */ /* * Per cap statistics. @@ -75,6 +80,7 @@ typedef struct cpucap { hrtime_t cap_maxusage; /* maximum cap usage */ u_longlong_t cap_below; /* # of ticks spend below the cap */ u_longlong_t cap_above; /* # of ticks spend above the cap */ + u_longlong_t cap_above_base; /* # of ticks spent above the base */ } cpucap_t; /* diff --git a/usr/src/uts/common/sys/cpuvar.h b/usr/src/uts/common/sys/cpuvar.h index 7b153a3e9e..24adbb7418 100644 --- a/usr/src/uts/common/sys/cpuvar.h +++ b/usr/src/uts/common/sys/cpuvar.h @@ -390,6 +390,8 @@ extern cpu_core_t cpu_core[]; #define CPU_DISP_DONTSTEAL 0x01 /* CPU undergoing context swtch */ #define CPU_DISP_HALTED 0x02 /* CPU halted waiting for interrupt */ +/* Note: inside ifdef: _KERNEL || _KMEMUSER || _BOOT */ + /* * Macros for manipulating sets of CPUs as a bitmap. Note that this * bitmap may vary in size depending on the maximum CPU id a specific @@ -512,6 +514,7 @@ extern struct cpu **cpu_seq; /* indexed by sequential CPU id */ extern cpu_t *cpu_list; /* list of CPUs */ extern cpu_t *cpu_active; /* list of active CPUs */ extern cpuset_t cpu_active_set; /* cached set of active CPUs */ +extern cpuset_t cpu_available; /* cached set of available CPUs */ extern int ncpus; /* number of CPUs present */ extern int ncpus_online; /* number of CPUs not quiesced */ extern int ncpus_intr_enabled; /* nr of CPUs taking I/O intrs */ diff --git a/usr/src/uts/common/sys/cred.h b/usr/src/uts/common/sys/cred.h index fb79dfecde..1f938132e0 100644 --- a/usr/src/uts/common/sys/cred.h +++ b/usr/src/uts/common/sys/cred.h @@ -93,6 +93,7 @@ extern gid_t crgetgid(const cred_t *); extern gid_t crgetrgid(const cred_t *); extern gid_t crgetsgid(const cred_t *); extern zoneid_t crgetzoneid(const cred_t *); +extern zoneid_t crgetzonedid(const cred_t *); extern projid_t crgetprojid(const cred_t *); extern cred_t *crgetmapped(const cred_t *); diff --git a/usr/src/uts/common/sys/dktp/dadk.h b/usr/src/uts/common/sys/dktp/dadk.h index f5c990e7c0..2178ad1f0d 100644 --- a/usr/src/uts/common/sys/dktp/dadk.h +++ b/usr/src/uts/common/sys/dktp/dadk.h @@ -65,6 +65,8 @@ struct dadk { kstat_t *dad_errstats; /* error stats */ kmutex_t dad_cmd_mutex; int dad_cmd_count; + uint32_t dad_err_cnt; /* number of recent errors */ + hrtime_t dad_last_log; /* time of last error log */ }; #define DAD_SECSIZ dad_phyg.g_secsiz diff --git a/usr/src/uts/common/sys/dld.h b/usr/src/uts/common/sys/dld.h index de7ac46db4..b73d22249a 100644 --- a/usr/src/uts/common/sys/dld.h +++ b/usr/src/uts/common/sys/dld.h @@ -192,6 +192,7 @@ typedef struct dld_ioc_rename { datalink_id_t dir_linkid1; datalink_id_t dir_linkid2; char dir_link[MAXLINKNAMELEN]; + boolean_t dir_zoneinit; } dld_ioc_rename_t; /* @@ -204,6 +205,7 @@ typedef struct dld_ioc_rename { typedef struct dld_ioc_zid { zoneid_t diz_zid; datalink_id_t diz_linkid; + boolean_t diz_transient; } dld_ioc_zid_t; /* @@ -356,6 +358,7 @@ typedef struct dld_ioc_led { #define DLD_CAPAB_POLL 0x00000002 #define DLD_CAPAB_PERIM 0x00000003 #define DLD_CAPAB_LSO 0x00000004 +#define DLD_CAPAB_IPCHECK 0x00000005 #define DLD_ENABLE 0x00000001 #define DLD_DISABLE 0x00000002 @@ -382,6 +385,7 @@ typedef struct dld_ioc_led { */ typedef int (*dld_capab_func_t)(void *, uint_t, void *, uint_t); +#define DI_DIRECT_RAW 0x1 /* * Direct Tx/Rx capability. */ @@ -406,8 +410,16 @@ typedef struct dld_capab_direct_s { /* flow control "can I put on a ring" callback */ uintptr_t di_tx_fctl_df; /* canput-like callback */ void *di_tx_fctl_dh; + + /* flags that control our behavior */ + uint_t di_flags; } dld_capab_direct_t; +typedef struct dld_capab_ipcheck_s { + uintptr_t ipc_allowed_df; + void *ipc_allowed_dh; +} dld_capab_ipcheck_t; + /* * Polling/softring capability. */ diff --git a/usr/src/uts/common/sys/dld_impl.h b/usr/src/uts/common/sys/dld_impl.h index 035eea893a..336fa9cb67 100644 --- a/usr/src/uts/common/sys/dld_impl.h +++ b/usr/src/uts/common/sys/dld_impl.h @@ -53,7 +53,8 @@ typedef enum { typedef enum { DLD_UNINITIALIZED, DLD_PASSIVE, - DLD_ACTIVE + DLD_ACTIVE, + DLD_EXCLUSIVE } dld_passivestate_t; /* @@ -256,6 +257,8 @@ extern void dld_str_rx_unitdata(void *, mac_resource_handle_t, extern void dld_str_notify_ind(dld_str_t *); extern mac_tx_cookie_t str_mdata_fastpath_put(dld_str_t *, mblk_t *, uintptr_t, uint16_t); +extern mac_tx_cookie_t str_mdata_raw_fastpath_put(dld_str_t *, mblk_t *, + uintptr_t, uint16_t); extern int dld_flow_ctl_callb(dld_str_t *, uint64_t, int (*func)(), void *); diff --git a/usr/src/uts/common/sys/dlpi.h b/usr/src/uts/common/sys/dlpi.h index e9ac27cddd..e71a55ab84 100644 --- a/usr/src/uts/common/sys/dlpi.h +++ b/usr/src/uts/common/sys/dlpi.h @@ -109,6 +109,7 @@ typedef struct dl_ipnetinfo { #define DL_PASSIVE_REQ 0x114 /* Allow access to aggregated link */ #define DL_INTR_MODE_REQ 0x115 /* Request Rx processing in INTR mode */ #define DL_NOTIFY_CONF 0x116 /* Notification from upstream */ +#define DL_EXCLUSIVE_REQ 0x117 /* Make bind active */ /* * Primitives used for Connectionless Service @@ -391,6 +392,7 @@ typedef struct dl_ipnetinfo { #define DL_PROMISC_SAP 0x02 /* promiscuous mode at sap level */ #define DL_PROMISC_MULTI 0x03 /* promiscuous mode for multicast */ #define DL_PROMISC_RX_ONLY 0x04 /* above only enabled for rx */ +#define DL_PROMISC_FIXUPS 0x05 /* above will be fixed up */ /* * DLPI notification codes for DL_NOTIFY_REQ primitives. @@ -1085,6 +1087,13 @@ typedef struct { } dl_intr_mode_req_t; /* + * DL_EXCLUSIVE_REQ, M_PROTO type + */ +typedef struct { + t_uscalar_t dl_primitive; +} dl_exclusive_req_t; + +/* * CONNECTION-ORIENTED SERVICE PRIMITIVES */ @@ -1506,6 +1515,7 @@ union DL_primitives { dl_control_ack_t control_ack; dl_passive_req_t passive_req; dl_intr_mode_req_t intr_mode_req; + dl_exclusive_req_t exclusive_req; }; #define DL_INFO_REQ_SIZE sizeof (dl_info_req_t) @@ -1574,6 +1584,7 @@ union DL_primitives { #define DL_CONTROL_ACK_SIZE sizeof (dl_control_ack_t) #define DL_PASSIVE_REQ_SIZE sizeof (dl_passive_req_t) #define DL_INTR_MODE_REQ_SIZE sizeof (dl_intr_mode_req_t) +#define DL_EXCLUSIVE_REQ_SIZE sizeof (dl_exclusive_req_t) #ifdef _KERNEL /* diff --git a/usr/src/uts/common/sys/dls.h b/usr/src/uts/common/sys/dls.h index cd3749dc21..0c5ffb0dd7 100644 --- a/usr/src/uts/common/sys/dls.h +++ b/usr/src/uts/common/sys/dls.h @@ -21,6 +21,7 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2015 Joyent, Inc. */ #ifndef _SYS_DLS_H @@ -86,6 +87,7 @@ typedef struct dls_link_s dls_link_t; #define DLS_PROMISC_MULTI 0x00000002 #define DLS_PROMISC_PHYS 0x00000004 #define DLS_PROMISC_RX_ONLY 0x00000008 +#define DLS_PROMISC_FIXUPS 0x00000010 extern int dls_open(dls_link_t *, dls_dl_handle_t, dld_str_t *); extern void dls_close(dld_str_t *); @@ -107,11 +109,13 @@ extern void str_notify(void *, mac_notify_type_t); extern int dls_devnet_open(const char *, dls_dl_handle_t *, dev_t *); +extern int dls_devnet_open_in_zone(const char *, + dls_dl_handle_t *, dev_t *, zoneid_t); extern void dls_devnet_close(dls_dl_handle_t); extern boolean_t dls_devnet_rebuild(); extern int dls_devnet_rename(datalink_id_t, datalink_id_t, - const char *); + const char *, boolean_t); extern int dls_devnet_create(mac_handle_t, datalink_id_t, zoneid_t); extern int dls_devnet_destroy(mac_handle_t, datalink_id_t *, @@ -129,7 +133,7 @@ extern uint16_t dls_devnet_vid(dls_dl_handle_t); extern datalink_id_t dls_devnet_linkid(dls_dl_handle_t); extern int dls_devnet_dev2linkid(dev_t, datalink_id_t *); extern int dls_devnet_phydev(datalink_id_t, dev_t *); -extern int dls_devnet_setzid(dls_dl_handle_t, zoneid_t); +extern int dls_devnet_setzid(dls_dl_handle_t, zoneid_t, boolean_t); extern zoneid_t dls_devnet_getzid(dls_dl_handle_t); extern zoneid_t dls_devnet_getownerzid(dls_dl_handle_t); extern boolean_t dls_devnet_islinkvisible(datalink_id_t, zoneid_t); @@ -143,6 +147,8 @@ extern int dls_mgmt_update(const char *, uint32_t, boolean_t, extern int dls_mgmt_get_linkinfo(datalink_id_t, char *, datalink_class_t *, uint32_t *, uint32_t *); extern int dls_mgmt_get_linkid(const char *, datalink_id_t *); +extern int dls_mgmt_get_linkid_in_zone(const char *, + datalink_id_t *, zoneid_t); extern datalink_id_t dls_mgmt_get_next(datalink_id_t, datalink_class_t, datalink_media_t, uint32_t); extern int dls_devnet_macname2linkid(const char *, diff --git a/usr/src/uts/common/sys/dls_impl.h b/usr/src/uts/common/sys/dls_impl.h index cd13a41413..329f8dd08e 100644 --- a/usr/src/uts/common/sys/dls_impl.h +++ b/usr/src/uts/common/sys/dls_impl.h @@ -21,6 +21,7 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2015 Joyent, Inc. */ #ifndef _SYS_DLS_IMPL_H @@ -62,6 +63,7 @@ struct dls_link_s { /* Protected by */ uint_t dl_zone_ref; link_tagmode_t dl_tagmode; /* atomic */ uint_t dl_nonip_cnt; /* SL */ + uint_t dl_exclusive; /* SL */ }; typedef struct dls_head_s { @@ -97,7 +99,8 @@ extern void dls_create_str_kstats(dld_str_t *); extern int dls_stat_update(kstat_t *, dls_link_t *, int); extern int dls_stat_create(const char *, int, const char *, zoneid_t, int (*)(struct kstat *, int), void *, - kstat_t **); + kstat_t **, zoneid_t); +extern void dls_stat_delete(kstat_t *); extern int dls_devnet_open_by_dev(dev_t, dls_link_t **, dls_dl_handle_t *); @@ -129,6 +132,7 @@ extern void dls_mgmt_init(void); extern void dls_mgmt_fini(void); extern int dls_mgmt_get_phydev(datalink_id_t, dev_t *); +extern int dls_exclusive_set(dld_str_t *, boolean_t); #ifdef __cplusplus } diff --git a/usr/src/uts/common/sys/dls_mgmt.h b/usr/src/uts/common/sys/dls_mgmt.h index b60e53b267..6fec277991 100644 --- a/usr/src/uts/common/sys/dls_mgmt.h +++ b/usr/src/uts/common/sys/dls_mgmt.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2015, Joyent, Inc. + * Copyright 2016 Joyent, Inc. */ #ifndef _DLS_MGMT_H @@ -114,10 +114,14 @@ typedef uint64_t datalink_media_t; #define DLMGMT_CMD_BASE 128 /* - * Indicate the link mapping is active or persistent + * Indicate if the link mapping is active, persistent, or transient. A + * transient link is an active link with a twist -- it is an active + * link which is destroyed along with the zone rather than reassigned + * to the GZ. */ #define DLMGMT_ACTIVE 0x01 #define DLMGMT_PERSIST 0x02 +#define DLMGMT_TRANSIENT 0x04 /* upcall argument */ typedef struct dlmgmt_door_arg { @@ -168,6 +172,7 @@ typedef struct dlmgmt_door_getname { typedef struct dlmgmt_door_getlinkid { int ld_cmd; char ld_link[MAXLINKNAMELEN]; + zoneid_t ld_zoneid; } dlmgmt_door_getlinkid_t; typedef struct dlmgmt_door_getnext_s { diff --git a/usr/src/uts/common/sys/dumpadm.h b/usr/src/uts/common/sys/dumpadm.h index 616828bb2b..8ca10ff3c5 100644 --- a/usr/src/uts/common/sys/dumpadm.h +++ b/usr/src/uts/common/sys/dumpadm.h @@ -21,6 +21,7 @@ /* * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. + * Copyright 2019 Joyent, Inc. */ #ifndef _SYS_DUMPADM_H @@ -44,11 +45,13 @@ extern "C" { #define DIOCSETUUID (DDIOC | 0x17) #define DIOCGETUUID (DDIOC | 0x18) #define DIOCRMDEV (DDIOC | 0x19) +#define DIOCSCRYPTKEY (DDIOC | 0x1a) /* * Kernel-controlled dump state flags for dump_conflags */ #define DUMP_EXCL 0x00000001 /* dedicated dump device (not swap) */ +#define DUMP_ENCRYPT 0x00000002 /* encrypt dump */ #define DUMP_STATE 0x0000ffff /* the set of all kernel flags */ /* diff --git a/usr/src/uts/common/sys/dumphdr.h b/usr/src/uts/common/sys/dumphdr.h index 2019f60a5d..aa2fbde7a5 100644 --- a/usr/src/uts/common/sys/dumphdr.h +++ b/usr/src/uts/common/sys/dumphdr.h @@ -21,6 +21,7 @@ /* * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2016 by Delphix. All rights reserved. + * Copyright 2019 Joyent, Inc. */ #ifndef _SYS_DUMPHDR_H @@ -60,6 +61,22 @@ extern "C" { sizeof (summary_dump_t) + 1024), \ DUMP_OFFSET)) /* summary save area */ +#define DUMP_CRYPT_KEYLEN 32 /* byte len for crypto key */ +#define DUMP_CRYPT_NONCELEN 8 /* byte len for nonce */ +#define DUMP_CRYPT_HMACLEN 64 /* byte len for HMAC */ +#define DUMP_CRYPT_BLOCKSHIFT 6 /* 64-byte blocks */ + +#define DUMP_CRYPT_ALGO_NONE 0 /* dump not encrypted */ +#define DUMP_CRYPT_ALGO_CHACHA20 1 /* ChaCha20 */ + +#if DUMP_OFFSET & ((1 << DUMP_CRYPT_BLOCKSHIFT) - 1) +#error DUMP_OFFSET not DUMP_CRYPT_BLOCKSHIFT aligned +#endif + +#if DUMP_LOGSIZE & ((1 << DUMP_CRYPT_BLOCKSHIFT) - 1) +#error DUMP_LOGSIZE not DUMP_CRYPT_BLOCKSHIFT aligned +#endif + typedef struct dumphdr { uint32_t dump_magic; /* magic number */ uint32_t dump_version; /* version number */ @@ -86,12 +103,22 @@ typedef struct dumphdr { } dumphdr_t; /* + * If DF_ENCRYPTED is set, this header will be found after the dumphdr. + */ +typedef struct dump_crypt { + uint8_t dump_crypt_algo; /* encryption algorithm */ + uint8_t dump_crypt_hmac[DUMP_CRYPT_HMACLEN]; /* HMAC for crypto key */ + uint8_t dump_crypt_nonce[DUMP_CRYPT_NONCELEN]; /* encryption none */ +} dump_crypt_t; + +/* * Values for dump_flags */ #define DF_VALID 0x00000001 /* Dump is valid (savecore clears) */ #define DF_COMPLETE 0x00000002 /* All pages present as configured */ #define DF_LIVE 0x00000004 /* Dump was taken on a live system */ #define DF_COMPRESSED 0x00000008 /* Dump is compressed */ +#define DF_ENCRYPTED 0x00000010 /* Dump is encrypted */ #define DF_KERNEL 0x00010000 /* Contains kernel pages only */ #define DF_ALL 0x00020000 /* Contains all pages */ #define DF_CURPROC 0x00040000 /* Contains kernel + cur proc pages */ @@ -175,6 +202,8 @@ extern u_offset_t dumpvp_size; extern struct dumphdr *dumphdr; extern int dump_conflags; extern char *dumppath; +extern uint8_t dump_crypt_key[DUMP_CRYPT_KEYLEN]; +extern uint8_t dump_crypt_nonce[DUMP_CRYPT_NONCELEN]; extern int dump_timeout; extern int dump_timeleft; diff --git a/usr/src/uts/common/sys/elf.h b/usr/src/uts/common/sys/elf.h index 1f290c282c..556a49c60f 100644 --- a/usr/src/uts/common/sys/elf.h +++ b/usr/src/uts/common/sys/elf.h @@ -501,6 +501,11 @@ typedef struct { #define PT_GNU_STACK 0x6474e551 /* Indicates stack executability */ #define PT_GNU_RELRO 0x6474e552 /* Read-only after relocation */ +/* + * Linux specific program headers not even used by Linux (!!) + */ +#define PT_PAX_FLAGS 0x65041580 /* PaX flags (see below) */ + #define PT_LOSUNW 0x6ffffffa #define PT_SUNWBSS 0x6ffffffa /* Sun Specific segment (unused) */ #define PT_SUNWSTACK 0x6ffffffb /* describes the stack segment */ @@ -516,6 +521,45 @@ typedef struct { #define PF_W 0x2 #define PF_X 0x1 +/* + * PaX is a regrettable series of never-integrated Linux patches for a + * facility to provide additional protections on memory pages for purposes of + * increasing security, and for allowing binaries to demand (or refuse) those + * protections via the PT_PAX_FLAGS program header. (Portents of its + * rudderless existence, "PaX" is a term of indefinite origin written by an + * unknown group of people.) This facility is unfortunate in any number of + * ways, and was largely obviated by the broad adoption of non-executable + * stacks at any rate -- but it lives on in binaries that continue to mark + * themselves to explicitly refuse the (never-integrated, now-obviated) + * facility. One might cringe that PaX overloads the meaning of the p_flags + * to specify protections, but that is the least of its transgressions: + * instead of using one p_type constant to explicitly enable a series of + * protections and another to explicitly disable others, it insists on + * conflating both actions into PT_PAX_FLAGS. The resulting doubling of + * constant definitions (two constant definitions for every protection instead + * of merely one) assures that the values can't even fit in the eight + * PF_MASKOS bits putatively defined to provide a modicum of cleanliness for + * such filthy functionality. And were all of this not enough, there is one + * final nomenclature insult to be added to this semantic injury: the + * constants for the p_flags don't even embed "_PAX_" in their name -- despite + * the fact that this is their only purpose! We resist the temptation to + * right this final wrong here; we grit our teeth and provide exactly the + * Linux definitions -- or rather, what would have been the Linux definitions + * had this belching jalopy ever been permitted to crash itself into mainline. + */ +#define PF_PAGEEXEC 0x00000010 /* PaX: enable PAGEEXEC */ +#define PF_NOPAGEEXEC 0x00000020 /* PaX: disable PAGEEXEC */ +#define PF_SEGMEXEC 0x00000040 /* PaX: enable SEGMEXEC */ +#define PF_NOSEGMEXEC 0x00000080 /* PaX: disable SEGMEXEC */ +#define PF_MPROTECT 0x00000100 /* PaX: enable MPROTECT */ +#define PF_NOMPROTECT 0x00000200 /* PaX: disable MPROTECT */ +#define PF_RANDEXEC 0x00000400 /* PaX: enable RANDEXEC */ +#define PF_NORANDEXEC 0x00000800 /* PaX: disable RANDEXEC */ +#define PF_EMUTRAMP 0x00001000 /* PaX: enable EMUTRAMP */ +#define PF_NOEMUTRAMP 0x00002000 /* PaX: disable EMUTRAMP */ +#define PF_RANDMMAP 0x00004000 /* PaX: enable RANDMMAP */ +#define PF_NORANDMMAP 0x00008000 /* PaX: disable RANDMMAP */ + #define PF_MASKOS 0x0ff00000 /* OS specific values */ #define PF_MASKPROC 0xf0000000 /* processor specific values */ diff --git a/usr/src/uts/common/sys/eventfd.h b/usr/src/uts/common/sys/eventfd.h index 1b0d961b0b..b64a101348 100644 --- a/usr/src/uts/common/sys/eventfd.h +++ b/usr/src/uts/common/sys/eventfd.h @@ -10,7 +10,7 @@ */ /* - * Copyright (c) 2015 Joyent, Inc. All rights reserved. + * Copyright (c) 2017, Joyent, Inc. */ /* @@ -47,6 +47,13 @@ typedef uint64_t eventfd_t; #define EVENTFDIOC (('e' << 24) | ('f' << 16) | ('d' << 8)) #define EVENTFDIOC_SEMAPHORE (EVENTFDIOC | 1) /* toggle sem state */ +/* + * Kernel-internal method to write to eventfd while bypassing overflow limits, + * therefore avoiding potential to block as well. This is used to fulfill AIO + * behavior in LX related to eventfd notification. + */ +#define EVENTFDIOC_POST (EVENTFDIOC | 2) + #ifndef _KERNEL extern int eventfd(unsigned int, int); @@ -58,6 +65,7 @@ extern int eventfd_write(int, eventfd_t); #define EVENTFDMNRN_EVENTFD 0 #define EVENTFDMNRN_CLONE 1 #define EVENTFD_VALMAX (ULLONG_MAX - 1ULL) +#define EVENTFD_VALOVERFLOW ULLONG_MAX #endif /* _KERNEL */ diff --git a/usr/src/uts/common/sys/exec.h b/usr/src/uts/common/sys/exec.h index 23eb5b6bf7..0d5b4c4611 100644 --- a/usr/src/uts/common/sys/exec.h +++ b/usr/src/uts/common/sys/exec.h @@ -81,7 +81,8 @@ typedef struct uarg { ssize_t na; ssize_t ne; ssize_t nc; - ssize_t arglen; + size_t argstrlen; + size_t envstrlen; char *fname; char *pathname; size_t auxsize; @@ -107,10 +108,13 @@ typedef struct uarg { vnode_t *ex_vp; char *emulator; char *brandname; + const char *brand_nroot; char *auxp_auxflags; /* addr of auxflags auxv on the user stack */ char *auxp_brand; /* address of first brand auxv on user stack */ cred_t *pfcred; boolean_t scrubenv; + uintptr_t maxstack; + boolean_t stk_prot_override; uintptr_t commpage; } uarg_t; @@ -181,7 +185,7 @@ struct execsw { int (*exec_func)(struct vnode *vp, struct execa *uap, struct uarg *args, struct intpdata *idata, int level, size_t *execsz, int setid, caddr_t exec_file, - struct cred *cred, int brand_action); + struct cred *cred, int *brand_action); int (*exec_core)(struct vnode *vp, struct proc *p, struct cred *cred, rlim64_t rlimit, int sig, core_content_t content); @@ -209,7 +213,7 @@ extern int exec_common(const char *fname, const char **argp, const char **envp, int brand_action); extern int gexec(vnode_t **vp, struct execa *uap, struct uarg *args, struct intpdata *idata, int level, size_t *execsz, caddr_t exec_file, - struct cred *cred, int brand_action); + struct cred *cred, int *brand_action); extern struct execsw *allocate_execsw(char *name, char *magic, size_t magic_size); extern struct execsw *findexecsw(char *magic); @@ -234,18 +238,20 @@ extern void exec_set_sp(size_t); * when compiling the 32-bit compatability elf code in the elfexec module. */ extern int elfexec(vnode_t *, execa_t *, uarg_t *, intpdata_t *, int, - size_t *, int, caddr_t, cred_t *, int); + size_t *, int, caddr_t, cred_t *, int *); extern int mapexec_brand(vnode_t *, uarg_t *, Ehdr *, Addr *, - intptr_t *, caddr_t, int *, caddr_t *, caddr_t *, size_t *, uintptr_t *); + intptr_t *, caddr_t, char **, caddr_t *, caddr_t *, size_t *, + uintptr_t *, uintptr_t *); extern int elfreadhdr(vnode_t *, cred_t *, Ehdr *, uint_t *, caddr_t *, size_t *); #endif /* !_ELF32_COMPAT */ #if defined(_LP64) extern int elf32exec(vnode_t *, execa_t *, uarg_t *, intpdata_t *, int, - size_t *, int, caddr_t, cred_t *, int); + size_t *, int, caddr_t, cred_t *, int *); extern int mapexec32_brand(vnode_t *, uarg_t *, Elf32_Ehdr *, Elf32_Addr *, - intptr_t *, caddr_t, int *, caddr_t *, caddr_t *, size_t *, uintptr_t *); + intptr_t *, caddr_t, char **, caddr_t *, caddr_t *, size_t *, + uintptr_t *, uintptr_t *); extern int elf32readhdr(vnode_t *, cred_t *, Elf32_Ehdr *, uint_t *, caddr_t *, size_t *); #endif /* _LP64 */ diff --git a/usr/src/uts/common/sys/file.h b/usr/src/uts/common/sys/file.h index d300b940e2..66620ab7b9 100644 --- a/usr/src/uts/common/sys/file.h +++ b/usr/src/uts/common/sys/file.h @@ -28,6 +28,7 @@ /* Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved. */ /* Copyright 2020 Joyent, Inc. */ +/* Copyright 2021 OmniOS Community Edition (OmniOSce) Association. */ #ifndef _SYS_FILE_H #define _SYS_FILE_H @@ -120,6 +121,15 @@ typedef struct fpollinfo { #define FCLOEXEC 0x800000 /* O_CLOEXEC = 0x800000 */ #define FDIRECTORY 0x1000000 /* O_DIRECTORY = 0x1000000 */ #define FDIRECT 0x2000000 /* O_DIRECT = 0x2000000 */ +/* + * Private interface for lx O_PATH|O_NOFOLLOW emulation for symlinks. + */ +#define __FLXPATH 0x80000000 +/* + * Private interface for lx fstatat(AT_NO_AUTOMOUNT) emulation. + * Since usage is disjoint, the __FLXPATH bit is re-used. + */ +#define __FLXNOAUTO 0x80000000 #if defined(_KERNEL) || defined(_FAKE_KERNEL) @@ -224,6 +234,7 @@ extern void fcnt_add(struct uf_info *, int); extern void close_exec(struct uf_info *); extern void clear_stale_fd(void); extern void clear_active_fd(int); +extern void set_active_fd(int); extern void free_afd(afd_t *afd); extern int fgetstartvp(int, char *, struct vnode **); extern int fsetattrat(int, char *, int, struct vattr *); diff --git a/usr/src/uts/common/sys/frameio.h b/usr/src/uts/common/sys/frameio.h new file mode 100644 index 0000000000..54e6dbeedf --- /dev/null +++ b/usr/src/uts/common/sys/frameio.h @@ -0,0 +1,107 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014, Joyent, Inc. All rights reserved. + */ + +#ifndef _SYS_FRAMEIO_H +#define _SYS_FRAMEIO_H + +/* + * Frame I/O definitions + */ + +#include <sys/types.h> + +#ifdef _KERNEL +/* Kernel only headers */ +#include <sys/stream.h> +#endif /* _KERNEL */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * An individual frame vector component. Collections of these are used to make + * ioctls. + */ +typedef struct framevec { + void *fv_buf; /* Buffer with data */ + size_t fv_buflen; /* Size of the buffer */ + size_t fv_actlen; /* Amount of buffer consumed, ignore on error */ +} framevec_t; + +/* + * The base unit used with frameio. + */ +typedef struct frameio { + uint_t fio_version; /* Should always be FRAMEIO_CURRENT_VERSION */ + uint_t fio_nvpf; /* How many vectors make up one frame */ + uint_t fio_nvecs; /* The total number of vectors */ + framevec_t fio_vecs[]; /* C99 VLA */ +} frameio_t; + + +#define FRAMEIO_VERSION_ONE 1 +#define FRAMEIO_CURRENT_VERSION FRAMEIO_VERSION_ONE + +#define FRAMEIO_NVECS_MAX 32 + +/* + * Definitions for kernel modules to include as helpers. These are consolidation + * private. + */ +#ifdef _KERNEL + +/* + * 32-bit versions for 64-bit kernels + */ +typedef struct framevec32 { + caddr32_t fv_buf; + size32_t fv_buflen; + size32_t fv_actlen; +} framevec32_t; + +typedef struct frameio32 { + uint_t fio_version; + uint_t fio_vecspframe; + uint_t fio_nvecs; + framevec32_t fio_vecs[]; +} frameio32_t; + +/* + * Describe the different ways that vectors should map to frames. + */ +typedef enum frameio_write_mblk_map { + MAP_BLK_FRAME +} frameio_write_mblk_map_t; + +int frameio_init(void); +void frameio_fini(void); +frameio_t *frameio_alloc(int); +void frameio_free(frameio_t *); +int frameio_hdr_copyin(frameio_t *, int, const void *, uint_t); +int frameio_mblk_chain_read(frameio_t *, mblk_t **, int *, int); +int frameio_mblk_chain_write(frameio_t *, frameio_write_mblk_map_t, mblk_t *, + int *, int); +int frameio_hdr_copyout(frameio_t *, int, void *, uint_t); +size_t frameio_frame_length(frameio_t *, framevec_t *); +void frameio_mark_consumed(frameio_t *, int); + +#endif /* _KERNEL */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_FRAMEIO_H */ diff --git a/usr/src/uts/common/sys/fs/fifonode.h b/usr/src/uts/common/sys/fs/fifonode.h index d8b158ce3c..1ea8563e1c 100644 --- a/usr/src/uts/common/sys/fs/fifonode.h +++ b/usr/src/uts/common/sys/fs/fifonode.h @@ -21,6 +21,7 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2017 Joyent, Inc. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -83,6 +84,7 @@ struct fifonode { struct msgb *fn_tail; /* last message to read */ fifolock_t *fn_lock; /* pointer to per fifo lock */ uint_t fn_count; /* Number of bytes on fn_mp */ + uint_t fn_hiwat; /* pipe (fifofast) high water */ kcondvar_t fn_wait_cv; /* fifo conditional variable */ ushort_t fn_wcnt; /* number of writers */ ushort_t fn_rcnt; /* number of readers */ @@ -135,6 +137,8 @@ typedef struct fifodata { #define FIFOPOLLRBAND 0x20000 #define FIFOSTAYFAST 0x40000 /* don't turn into stream mode */ #define FIFOWAITMODE 0x80000 /* waiting for the possibility to change mode */ +/* Data on loan, block reads. Use in conjunction with FIFOSTAYFAST. */ +#define FIFORDBLOCK 0x100000 #define FIFOHIWAT (16 * 1024) #define FIFOLOWAT (0) @@ -147,16 +151,6 @@ typedef struct fifodata { #if defined(_KERNEL) -/* - * Fifohiwat defined as a variable is to allow tuning of the high - * water mark if needed. It is not meant to be released. - */ -#if FIFODEBUG -extern int Fifohiwat; -#else /* FIFODEBUG */ -#define Fifohiwat FIFOHIWAT -#endif /* FIFODEBUG */ - extern struct vnodeops *fifo_vnodeops; extern const struct fs_operation_def fifo_vnodeops_template[]; extern struct kmem_cache *fnode_cache; @@ -181,6 +175,8 @@ extern void fifo_fastoff(fifonode_t *); extern struct streamtab *fifo_getinfo(); extern void fifo_wakereader(fifonode_t *, fifolock_t *); extern void fifo_wakewriter(fifonode_t *, fifolock_t *); +extern boolean_t fifo_stayfast_enter(fifonode_t *); +extern void fifo_stayfast_exit(fifonode_t *); #endif /* _KERNEL */ diff --git a/usr/src/uts/common/sys/fs/hyprlofs.h b/usr/src/uts/common/sys/fs/hyprlofs.h new file mode 100644 index 0000000000..b8c4149df2 --- /dev/null +++ b/usr/src/uts/common/sys/fs/hyprlofs.h @@ -0,0 +1,91 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2012, Joyent, Inc. All rights reserved. + */ + +#ifndef _SYS_FS_HYPRLOFS_H +#define _SYS_FS_HYPRLOFS_H + +#include <sys/param.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * hyprlofs ioctl numbers. + */ +#define HYPRLOFS_IOC ('H' << 8) + +#define HYPRLOFS_ADD_ENTRIES (HYPRLOFS_IOC | 1) +#define HYPRLOFS_RM_ENTRIES (HYPRLOFS_IOC | 2) +#define HYPRLOFS_RM_ALL (HYPRLOFS_IOC | 3) +#define HYPRLOFS_GET_ENTRIES (HYPRLOFS_IOC | 4) + +typedef struct { + char *hle_path; + uint_t hle_plen; + char *hle_name; + uint_t hle_nlen; +} hyprlofs_entry_t; + +typedef struct { + hyprlofs_entry_t *hle_entries; + uint_t hle_len; +} hyprlofs_entries_t; + +typedef struct { + char hce_path[MAXPATHLEN]; + char hce_name[MAXPATHLEN]; +} hyprlofs_curr_entry_t; + +typedef struct { + hyprlofs_curr_entry_t *hce_entries; + uint_t hce_cnt; +} hyprlofs_curr_entries_t; + +#ifdef _KERNEL +typedef struct { + caddr32_t hle_path; + uint_t hle_plen; + caddr32_t hle_name; + uint_t hle_nlen; +} hyprlofs_entry32_t; + +typedef struct { + caddr32_t hle_entries; + uint_t hle_len; +} hyprlofs_entries32_t; + +typedef struct { + caddr32_t hce_entries; + uint_t hce_cnt; +} hyprlofs_curr_entries32_t; + +#endif /* _KERNEL */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_FS_HYPRLOFS_H */ diff --git a/usr/src/uts/common/sys/fs/hyprlofs_info.h b/usr/src/uts/common/sys/fs/hyprlofs_info.h new file mode 100644 index 0000000000..38389f77d9 --- /dev/null +++ b/usr/src/uts/common/sys/fs/hyprlofs_info.h @@ -0,0 +1,174 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2012, Joyent, Inc. All rights reserved. + */ + +#ifndef _SYS_FS_HYPRLOFS_INFO_H +#define _SYS_FS_HYPRLOFS_INFO_H + +#include <sys/t_lock.h> +#include <vm/seg.h> +#include <vm/seg_vn.h> +#include <sys/vfs_opreg.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * hlnode is the file system dependent node for hyprlofs. + * It is modeled on the tmpfs tmpnode. + * + * hln_rwlock protects access of the directory list at hln_dir + * as well as syncronizing read/writes to directory hlnodes. + * hln_tlock protects updates to hln_mode and hln_nlink. + * hln_tlock doesn't require any hlnode locks. + */ +typedef struct hlnode { + struct hlnode *hln_back; /* linked list of hlnodes */ + struct hlnode *hln_forw; /* linked list of hlnodes */ + union { + struct { + struct hldirent *un_dirlist; /* dirent list */ + uint_t un_dirents; /* number of dirents */ + } un_dirstruct; + vnode_t *un_realvp; /* real vnode */ + } un_hlnode; + vnode_t *hln_vnode; /* vnode for this hlnode */ + int hln_gen; /* pseudo gen num for hlfid */ + int hln_looped; /* flag indicating loopback */ + vattr_t hln_attr; /* attributes */ + krwlock_t hln_rwlock; /* rw - serialize mods and */ + /* directory updates */ + kmutex_t hln_tlock; /* time, flag, and nlink lock */ +} hlnode_t; + +/* + * hyprlofs per-mount data structure. + * All fields are protected by hlm_contents. + */ +typedef struct { + vfs_t *hlm_vfsp; /* filesystem's vfs struct */ + hlnode_t *hlm_rootnode; /* root hlnode */ + char *hlm_mntpath; /* name of hyprlofs mount point */ + dev_t hlm_dev; /* unique dev # of mounted `device' */ + uint_t hlm_gen; /* pseudo generation number for files */ + kmutex_t hlm_contents; /* lock for hlfsmount structure */ +} hlfsmount_t; + +/* + * hyprlofs directories are made up of a linked list of hldirent structures + * hanging off directory hlnodes. File names are not fixed length, + * but are null terminated. + */ +typedef struct hldirent { + hlnode_t *hld_hlnode; /* hlnode for this file */ + struct hldirent *hld_next; /* next directory entry */ + struct hldirent *hld_prev; /* prev directory entry */ + uint_t hld_offset; /* "offset" of dir entry */ + uint_t hld_hash; /* a hash of td_name */ + struct hldirent *hld_link; /* linked via the hash table */ + hlnode_t *hld_parent; /* parent, dir we are in */ + char *hld_name; /* must be null terminated */ + /* max length is MAXNAMELEN */ +} hldirent_t; + +/* + * hlfid overlays the fid structure (for VFS_VGET) + */ +typedef struct { + uint16_t hlfid_len; + ino32_t hlfid_ino; + int32_t hlfid_gen; +} hlfid_t; + +/* + * File system independent to hyprlofs conversion macros + */ +#define VFSTOHLM(vfsp) ((hlfsmount_t *)(vfsp)->vfs_data) +#define VTOHLM(vp) ((hlfsmount_t *)(vp)->v_vfsp->vfs_data) +#define VTOHLN(vp) ((hlnode_t *)(vp)->v_data) +#define HLNTOV(tp) ((tp)->hln_vnode) +#define REALVP(vp) ((vnode_t *)VTOHLN(vp)->hln_realvp) +#define hlnode_hold(tp) VN_HOLD(HLNTOV(tp)) +#define hlnode_rele(tp) VN_RELE(HLNTOV(tp)) + +#define hln_dir un_hlnode.un_dirstruct.un_dirlist +#define hln_dirents un_hlnode.un_dirstruct.un_dirents +#define hln_realvp un_hlnode.un_realvp + +/* + * Attributes + */ +#define hln_mask hln_attr.va_mask +#define hln_type hln_attr.va_type +#define hln_mode hln_attr.va_mode +#define hln_uid hln_attr.va_uid +#define hln_gid hln_attr.va_gid +#define hln_fsid hln_attr.va_fsid +#define hln_nodeid hln_attr.va_nodeid +#define hln_nlink hln_attr.va_nlink +#define hln_size hln_attr.va_size +#define hln_atime hln_attr.va_atime +#define hln_mtime hln_attr.va_mtime +#define hln_ctime hln_attr.va_ctime +#define hln_rdev hln_attr.va_rdev +#define hln_blksize hln_attr.va_blksize +#define hln_nblocks hln_attr.va_nblocks +#define hln_seq hln_attr.va_seq + +/* + * enums + */ +enum de_op { DE_CREATE, DE_MKDIR }; /* direnter ops */ +enum dr_op { DR_REMOVE, DR_RMDIR }; /* dirremove ops */ + +/* + * hyprlofs_minfree is the amount (in pages) of anonymous memory that hyprlofs + * leaves free for the rest of the system. The default value for + * hyprlofs_minfree is btopr(HYPRLOFSMINFREE) but it can be patched to a + * different number of pages. Since hyprlofs doesn't actually use much + * memory, its unlikely this ever needs to be patched. + */ +#define HYPRLOFSMINFREE 8 * 1024 * 1024 /* 8 Megabytes */ + +extern size_t hyprlofs_minfree; /* Anonymous memory in pages */ + +extern void hyprlofs_node_init(hlfsmount_t *, hlnode_t *, vattr_t *, + cred_t *); +extern int hyprlofs_dirlookup(hlnode_t *, char *, hlnode_t **, cred_t *); +extern int hyprlofs_dirdelete(hlnode_t *, hlnode_t *, char *, enum dr_op, + cred_t *); +extern void hyprlofs_dirinit(hlnode_t *, hlnode_t *); +extern void hyprlofs_dirtrunc(hlnode_t *); +extern int hyprlofs_taccess(void *, int, cred_t *); +extern int hyprlofs_direnter(hlfsmount_t *, hlnode_t *, char *, enum de_op, + vnode_t *, vattr_t *, hlnode_t **, cred_t *); + +extern struct vnodeops *hyprlofs_vnodeops; +extern const struct fs_operation_def hyprlofs_vnodeops_template[]; + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_FS_HYPRLOFS_INFO_H */ diff --git a/usr/src/uts/common/sys/fs/sdev_impl.h b/usr/src/uts/common/sys/fs/sdev_impl.h index dc6601bb43..676193fcfa 100644 --- a/usr/src/uts/common/sys/fs/sdev_impl.h +++ b/usr/src/uts/common/sys/fs/sdev_impl.h @@ -37,6 +37,7 @@ extern "C" { #include <sys/vfs_opreg.h> #include <sys/list.h> #include <sys/nvpair.h> +#include <sys/fs/sdev_plugin.h> #include <sys/sunddi.h> #include <sys/fs/sdev_plugin.h> diff --git a/usr/src/uts/common/sys/fs/tmp.h b/usr/src/uts/common/sys/fs/tmp.h index fb07de6588..f4cee09244 100644 --- a/usr/src/uts/common/sys/fs/tmp.h +++ b/usr/src/uts/common/sys/fs/tmp.h @@ -23,7 +23,7 @@ * All rights reserved. Use is subject to license terms. */ /* - * Copyright 2015 Joyent, Inc. + * Copyright 2016 Joyent, Inc. */ #ifndef _SYS_FS_TMP_H @@ -43,8 +43,10 @@ struct tmount { struct vfs *tm_vfsp; /* filesystem's vfs struct */ struct tmpnode *tm_rootnode; /* root tmpnode */ char *tm_mntpath; /* name of tmpfs mount point */ - ulong_t tm_anonmax; /* file system max anon reservation */ - pgcnt_t tm_anonmem; /* pages of reserved anon memory */ + size_t tm_anonmax; /* file system max anon reservation */ + size_t tm_anonmem; /* bytes of reserved anon memory */ + /* and allocated kmem for the fs */ + size_t tm_allocmem; /* bytes alloced from tmp_kmem_ funcs */ dev_t tm_dev; /* unique dev # of mounted `device' */ uint_t tm_gen; /* pseudo generation number for files */ kmutex_t tm_contents; /* lock for tmount structure */ @@ -58,6 +60,7 @@ struct tmount { #define VTOTM(vp) ((struct tmount *)(vp)->v_vfsp->vfs_data) #define VTOTN(vp) ((struct tmpnode *)(vp)->v_data) #define TNTOV(tp) ((tp)->tn_vnode) +#define TNTOTM(tp) (VTOTM(TNTOV(tp))) #define tmpnode_hold(tp) VN_HOLD(TNTOV(tp)) #define tmpnode_rele(tp) VN_RELE(TNTOV(tp)) @@ -69,41 +72,39 @@ enum dr_op { DR_REMOVE, DR_RMDIR, DR_RENAME }; /* dirremove ops */ /* * tmpfs_minfree is the amount (in pages) of anonymous memory that tmpfs - * leaves free for the rest of the system. E.g. in a system with 32MB of - * configured swap space, if 16MB were reserved (leaving 16MB free), - * tmpfs could allocate up to 16MB - tmpfs_minfree. The default value - * for tmpfs_minfree is btopr(TMPMINFREE) but it can cautiously patched - * to a different number of pages. - * NB: If tmpfs allocates too much swap space, other processes will be - * unable to execute. + * leaves free for the rest of the system. In antiquity, this number could be + * relevant on a system-wide basis, as physical DRAM was routinely exhausted; + * however, in more modern times, the relative growth of DRAM with respect to + * application footprint means that this number is only likely to become + * factor in a virtualized OS environment (e.g., a zone) -- and even then only + * when DRAM and swap have both been capped low to allow for maximum tenancy. + * TMPMINFREE -- the value from which tmpfs_minfree is derived -- should + * therefore be configured to a value that is roughly the smallest practical + * value for memory + swap minus the largest reasonable size for tmpfs in such + * a configuration. As of this writing, the smallest practical memory + swap + * configuration is 128MB, and it seems reasonable to allow tmpfs to consume + * no more than seven-eighths of this, yielding a TMPMINFREE of 16MB. Care + * should be exercised in changing this: tuning this value too high will + * result in spurious ENOSPC errors in tmpfs in small zones (a problem that + * can induce cascading failure surprisingly often); tuning this value too low + * will result in tmpfs consumption alone to alone induce application-level + * memory allocation failure. */ -#define TMPMINFREE 2 * 1024 * 1024 /* 2 Megabytes */ +#define TMPMINFREE 16 * 1024 * 1024 /* 16 Megabytes */ extern size_t tmpfs_minfree; /* Anonymous memory in pages */ -/* - * tmpfs can allocate only a certain percentage of kernel memory, - * which is used for tmpnodes, directories, file names, etc. - * This is statically set as TMPMAXFRACKMEM of physical memory. - * The actual number of allocatable bytes can be patched in tmpfs_maxkmem. - */ -#define TMPMAXFRACKMEM 25 /* 1/25 of physical memory */ - -extern size_t tmp_kmemspace; -extern size_t tmpfs_maxkmem; /* Allocatable kernel memory in bytes */ - extern void tmpnode_init(struct tmount *, struct tmpnode *, struct vattr *, struct cred *); +extern void tmpnode_cleanup(struct tmpnode *tp); extern int tmpnode_trunc(struct tmount *, struct tmpnode *, ulong_t); extern void tmpnode_growmap(struct tmpnode *, ulong_t); extern int tdirlookup(struct tmpnode *, char *, struct tmpnode **, struct cred *); extern int tdirdelete(struct tmpnode *, struct tmpnode *, char *, enum dr_op, struct cred *); -extern void tdirinit(struct tmpnode *, struct tmpnode *); +extern int tdirinit(struct tmpnode *, struct tmpnode *); extern void tdirtrunc(struct tmpnode *); -extern void *tmp_memalloc(size_t, int); -extern void tmp_memfree(void *, size_t); extern int tmp_resv(struct tmount *, struct tmpnode *, size_t, int); extern int tmp_taccess(void *, int, struct cred *); extern int tmp_sticky_remove_access(struct tmpnode *, struct tmpnode *, @@ -114,6 +115,9 @@ extern int tdirenter(struct tmount *, struct tmpnode *, char *, enum de_op, struct tmpnode *, struct tmpnode *, struct vattr *, struct tmpnode **, struct cred *, caller_context_t *); +extern void *tmp_kmem_zalloc(struct tmount *, size_t, int); +extern void tmp_kmem_free(struct tmount *, void *, size_t); + #define TMP_MUSTHAVE 0x01 #ifdef __cplusplus diff --git a/usr/src/uts/common/sys/fss.h b/usr/src/uts/common/sys/fss.h index 6168e9d9ed..87d798d6c1 100644 --- a/usr/src/uts/common/sys/fss.h +++ b/usr/src/uts/common/sys/fss.h @@ -160,6 +160,7 @@ typedef struct fsszone { /* * fss_flags */ +/* Formerly: FSSKPRI 0x01 - the thread is in kernel mode */ #define FSSBACKQ 0x02 /* thread should be placed at the back of */ /* the dispatch queue if preempted */ #define FSSRESTORE 0x04 /* thread was not preempted, due to schedctl */ diff --git a/usr/src/uts/common/sys/fx.h b/usr/src/uts/common/sys/fx.h index 2d4e1aa7fb..4a48af52a1 100644 --- a/usr/src/uts/common/sys/fx.h +++ b/usr/src/uts/common/sys/fx.h @@ -21,13 +21,12 @@ /* * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2015 Joyent, Inc. */ #ifndef _SYS_FX_H #define _SYS_FX_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/types.h> #include <sys/thread.h> #include <sys/ddi.h> @@ -145,7 +144,14 @@ typedef struct fxkparms { uint_t fx_cflags; } fxkparms_t; +/* + * control flags (kparms->fx_cflags). + */ +#define FX_DOUPRILIM 0x01 /* change user priority limit */ +#define FX_DOUPRI 0x02 /* change user priority */ +#define FX_DOTQ 0x04 /* change FX time quantum */ +#define FXMAXUPRI 60 /* maximum user priority setting */ /* * Interface for partner private code. This is not a public interface. diff --git a/usr/src/uts/common/sys/gsqueue.h b/usr/src/uts/common/sys/gsqueue.h new file mode 100644 index 0000000000..91ab46fc44 --- /dev/null +++ b/usr/src/uts/common/sys/gsqueue.h @@ -0,0 +1,59 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2017 Joyent, Inc. + */ + +#ifndef _SYS_GSQUEUE_H +#define _SYS_GSQUEUE_H + +/* + * Standard interfaces to serializaion queues for everyone (except IP). + */ + +#include <sys/types.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _KERNEL + +typedef struct gsqueue gsqueue_t; +typedef struct gsqueue_set gsqueue_set_t; + +typedef void (*gsqueue_cb_f)(gsqueue_set_t *, gsqueue_t *, void *, boolean_t); +typedef void (*gsqueue_proc_f)(void *, mblk_t *, gsqueue_t *, void *); + +extern gsqueue_set_t *gsqueue_set_create(pri_t); +extern void gsqueue_set_destroy(gsqueue_set_t *); +extern gsqueue_t *gsqueue_set_get(gsqueue_set_t *, uint_t); + +extern uintptr_t gsqueue_set_cb_add(gsqueue_set_t *, gsqueue_cb_f, void *); +extern int gsqueue_set_cb_remove(gsqueue_set_t *, uintptr_t); + +#define GSQUEUE_FILL 0x0001 +#define GSQUEUE_NODRAIN 0x0002 +#define GSQUEUE_PROCESS 0x0004 + +extern void gsqueue_enter_one(gsqueue_t *, mblk_t *, gsqueue_proc_f, void *, + int, uint8_t); + +#define GSQUEUE_DEFAULT_PRIORITY MAXCLSYSPRI + +#endif /* _KERNEL */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_GSQUEUE_H */ diff --git a/usr/src/uts/common/sys/ia.h b/usr/src/uts/common/sys/ia.h index 02dc29aaec..567c121bb0 100644 --- a/usr/src/uts/common/sys/ia.h +++ b/usr/src/uts/common/sys/ia.h @@ -86,6 +86,7 @@ typedef struct iaproc { /* flags */ +/* Formerly: IAKPRI 0x01 - thread at kernel model priority */ #define IABACKQ 0x02 /* thread goes to back of disp q when preempted */ #define IASLEPT 0x04 /* thread had long-term suspend - give new slice */ diff --git a/usr/src/uts/common/sys/inotify.h b/usr/src/uts/common/sys/inotify.h new file mode 100644 index 0000000000..8acc1a7280 --- /dev/null +++ b/usr/src/uts/common/sys/inotify.h @@ -0,0 +1,153 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014 Joyent, Inc. All rights reserved. + */ + +/* + * Header file to support for the inotify facility. Note that this facility + * is designed to be binary compatible with the Linux inotify facility; values + * for constants here should therefore exactly match those found in Linux, and + * this facility shouldn't be extended independently of Linux. + */ + +#ifndef _SYS_INOTIFY_H +#define _SYS_INOTIFY_H + +#include <sys/types.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Events that can be explicitly requested on any inotify watch. + */ +#define IN_ACCESS 0x00000001 +#define IN_MODIFY 0x00000002 +#define IN_ATTRIB 0x00000004 +#define IN_CLOSE_WRITE 0x00000008 +#define IN_CLOSE_NOWRITE 0x00000010 +#define IN_OPEN 0x00000020 +#define IN_MOVED_FROM 0x00000040 +#define IN_MOVED_TO 0x00000080 +#define IN_CREATE 0x00000100 +#define IN_DELETE 0x00000200 +#define IN_DELETE_SELF 0x00000400 +#define IN_MOVE_SELF 0x00000800 + +/* + * Events that can be sent to an inotify watch -- requested or not. + */ +#define IN_UNMOUNT 0x00002000 +#define IN_Q_OVERFLOW 0x00004000 +#define IN_IGNORED 0x00008000 + +/* + * Flags that can modify an inotify event. + */ +#define IN_ONLYDIR 0x01000000 +#define IN_DONT_FOLLOW 0x02000000 +#define IN_EXCL_UNLINK 0x04000000 +#define IN_MASK_ADD 0x20000000 +#define IN_ISDIR 0x40000000 +#define IN_ONESHOT 0x80000000 + +/* + * Helpful constants. + */ +#define IN_CLOSE (IN_CLOSE_WRITE | IN_CLOSE_NOWRITE) +#define IN_MOVE (IN_MOVED_FROM | IN_MOVED_TO) +#define IN_ALL_EVENTS \ + (IN_ACCESS | IN_MODIFY | IN_ATTRIB | IN_CLOSE_WRITE | \ + IN_CLOSE_NOWRITE | IN_OPEN | IN_MOVED_FROM | IN_MOVED_TO | \ + IN_DELETE | IN_CREATE | IN_DELETE_SELF | IN_MOVE_SELF) + +#define IN_CHILD_EVENTS \ + (IN_ACCESS | IN_MODIFY | IN_ATTRIB | IN_CLOSE_WRITE | \ + IN_CLOSE_NOWRITE | IN_MODIFY | IN_OPEN) + +/* + * To assure binary compatibility with Linux, these values are fixed at their + * Linux equivalents, not their native ones. + */ +#define IN_CLOEXEC 02000000 /* LX_O_CLOEXEC */ +#define IN_NONBLOCK 04000 /* LX_O_NONBLOCK */ + +struct inotify_event { + int32_t wd; /* watch descriptor */ + uint32_t mask; /* mask of events */ + uint32_t cookie; /* event association cookie, if any */ + uint32_t len; /* size of name field */ + char name[]; /* optional NUL-terminated name */ +}; + +/* + * These ioctl values are specific to the native implementation; applications + * shouldn't be using them directly, and they should therefore be safe to + * change without breaking apps. + */ +#define INOTIFYIOC (('i' << 24) | ('n' << 16) | ('y' << 8)) +#define INOTIFYIOC_ADD_WATCH (INOTIFYIOC | 1) /* add watch */ +#define INOTIFYIOC_RM_WATCH (INOTIFYIOC | 2) /* remove watch */ +#define INOTIFYIOC_ADD_CHILD (INOTIFYIOC | 3) /* add child watch */ +#define INOTIFYIOC_ACTIVATE (INOTIFYIOC | 4) /* activate watch */ + +#ifndef _LP64 +#ifndef _LITTLE_ENDIAN +#define INOTIFY_PTR(type, name) uint32_t name##pad; type *name +#else +#define INOTIFY_PTR(type, name) type *name; uint32_t name##pad +#endif +#else +#define INOTIFY_PTR(type, name) type *name +#endif + +typedef struct inotify_addwatch { + int inaw_fd; /* open fd for object */ + uint32_t inaw_mask; /* desired mask */ +} inotify_addwatch_t; + +typedef struct inotify_addchild { + INOTIFY_PTR(char, inac_name); /* pointer to name */ + int inac_fd; /* open fd for parent */ +} inotify_addchild_t; + +#ifndef _KERNEL + +extern int inotify_init(void); +extern int inotify_init1(int); +extern int inotify_add_watch(int, const char *, uint32_t); +extern int inotify_rm_watch(int, int); + +#else + +#define IN_UNMASKABLE \ + (IN_UNMOUNT | IN_Q_OVERFLOW | IN_IGNORED | IN_ISDIR) + +#define IN_MODIFIERS \ + (IN_EXCL_UNLINK | IN_ONESHOT) + +#define IN_FLAGS \ + (IN_ONLYDIR | IN_DONT_FOLLOW | IN_MASK_ADD) + +#define IN_REMOVAL (1ULL << 32) +#define INOTIFYMNRN_INOTIFY 0 +#define INOTIFYMNRN_CLONE 1 + +#endif /* _KERNEL */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_INOTIFY_H */ diff --git a/usr/src/uts/common/sys/ipc_impl.h b/usr/src/uts/common/sys/ipc_impl.h index 0569c3e967..d7dc365c09 100644 --- a/usr/src/uts/common/sys/ipc_impl.h +++ b/usr/src/uts/common/sys/ipc_impl.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2016, Joyent, Inc. */ #ifndef _IPC_IMPL_H @@ -226,6 +227,7 @@ int ipc_commit_begin(ipc_service_t *, key_t, int, kipc_perm_t *); kmutex_t *ipc_commit_end(ipc_service_t *, kipc_perm_t *); void ipc_cleanup(ipc_service_t *, kipc_perm_t *); +void ipc_rmsvc(ipc_service_t *, kipc_perm_t *); int ipc_rmid(ipc_service_t *, int, cred_t *); int ipc_ids(ipc_service_t *, int *, uint_t, uint_t *); diff --git a/usr/src/uts/common/sys/ipd.h b/usr/src/uts/common/sys/ipd.h index bad74f8b81..f21c3fb5af 100644 --- a/usr/src/uts/common/sys/ipd.h +++ b/usr/src/uts/common/sys/ipd.h @@ -20,7 +20,7 @@ */ /* - * Copyright (c) 2012, Joyent, Inc. All rights reserved. + * Copyright (c) 2018, Joyent, Inc. All rights reserved. */ /* @@ -35,7 +35,7 @@ extern "C" { #endif #define IPD_DEV_PATH "/dev/ipd" -#define IPD_MAX_DELAY 10000 /* 10 ms in us */ +#define IPD_MAX_DELAY 1000000 /* 1 second in microseconds */ typedef struct ipd_ioc_perturb { zoneid_t ipip_zoneid; diff --git a/usr/src/uts/common/sys/iso/signal_iso.h b/usr/src/uts/common/sys/iso/signal_iso.h index bf89ef0d33..0a76ee19a7 100644 --- a/usr/src/uts/common/sys/iso/signal_iso.h +++ b/usr/src/uts/common/sys/iso/signal_iso.h @@ -22,6 +22,7 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2015, Joyent, Inc. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -95,7 +96,7 @@ extern "C" { /* insert new signals here, and move _SIGRTM* appropriately */ #define _SIGRTMIN 42 /* first (highest-priority) realtime signal */ -#define _SIGRTMAX 73 /* last (lowest-priority) realtime signal */ +#define _SIGRTMAX 74 /* last (lowest-priority) realtime signal */ extern long _sysconf(int); /* System Private interface to sysconf() */ #define SIGRTMIN ((int)_sysconf(_SC_SIGRT_MIN)) /* first realtime signal */ #define SIGRTMAX ((int)_sysconf(_SC_SIGRT_MAX)) /* last realtime signal */ diff --git a/usr/src/uts/common/sys/klwp.h b/usr/src/uts/common/sys/klwp.h index 41b70f6a6e..0ea1a396b9 100644 --- a/usr/src/uts/common/sys/klwp.h +++ b/usr/src/uts/common/sys/klwp.h @@ -24,7 +24,7 @@ */ /* - * Copyright (c) 2013, Joyent, Inc. All rights reserved. + * Copyright 2016 Joyent, Inc. */ #ifndef _SYS_KLWP_H @@ -191,7 +191,14 @@ typedef struct _klwp { struct ct_template *lwp_ct_active[CTT_MAXTYPE]; /* active templates */ struct contract *lwp_ct_latest[CTT_MAXTYPE]; /* last created contract */ - void *lwp_brand; /* per-lwp brand data */ + /* + * Branding: + * lwp_brand - per-lwp brand data + * lwp_brand_syscall - brand syscall interposer + */ + void *lwp_brand; + int (*lwp_brand_syscall)(void); + struct psinfo *lwp_spymaster; /* if an agent LWP, our spymaster */ } klwp_t; diff --git a/usr/src/uts/common/sys/mac_client.h b/usr/src/uts/common/sys/mac_client.h index 1d1915a816..8fff314bfe 100644 --- a/usr/src/uts/common/sys/mac_client.h +++ b/usr/src/uts/common/sys/mac_client.h @@ -116,6 +116,7 @@ typedef enum { #define MAC_PROMISC_FLAGS_NO_PHYS 0x0002 #define MAC_PROMISC_FLAGS_VLAN_TAG_STRIP 0x0004 #define MAC_PROMISC_FLAGS_NO_COPY 0x0008 +#define MAC_PROMISC_FLAGS_DO_FIXUPS 0x0010 /* flags passed to mac_tx() */ #define MAC_DROP_ON_NO_DESC 0x01 /* freemsg() if no tx descs */ diff --git a/usr/src/uts/common/sys/mac_client_impl.h b/usr/src/uts/common/sys/mac_client_impl.h index 0e3a6306e0..0f8be50fde 100644 --- a/usr/src/uts/common/sys/mac_client_impl.h +++ b/usr/src/uts/common/sys/mac_client_impl.h @@ -83,6 +83,7 @@ typedef struct mac_promisc_impl_s { /* Protected by */ boolean_t mpi_no_phys; /* WO */ boolean_t mpi_strip_vlan_tag; /* WO */ boolean_t mpi_no_copy; /* WO */ + boolean_t mpi_do_fixups; /* WO */ } mac_promisc_impl_t; typedef union mac_tx_percpu_s { diff --git a/usr/src/uts/common/sys/mac_client_priv.h b/usr/src/uts/common/sys/mac_client_priv.h index 01cb27644c..97b3fd685a 100644 --- a/usr/src/uts/common/sys/mac_client_priv.h +++ b/usr/src/uts/common/sys/mac_client_priv.h @@ -58,6 +58,9 @@ extern const mac_info_t *mac_info(mac_handle_t); extern boolean_t mac_info_get(const char *, mac_info_t *); extern boolean_t mac_promisc_get(mac_handle_t); +extern boolean_t mac_protect_check_addr(mac_client_handle_t, boolean_t, + in6_addr_t *); + extern int mac_start(mac_handle_t); extern void mac_stop(mac_handle_t); diff --git a/usr/src/uts/common/sys/mac_flow.h b/usr/src/uts/common/sys/mac_flow.h index 04aa8be3f3..a9a2a5f61e 100644 --- a/usr/src/uts/common/sys/mac_flow.h +++ b/usr/src/uts/common/sys/mac_flow.h @@ -22,7 +22,7 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright 2013 Joyent, Inc. All rights reserved. + * Copyright 2017 Joyent, Inc. All rights reserved. * Copyright 2020 RackTop Systems, Inc. */ @@ -156,6 +156,14 @@ typedef enum { #define MPT_MAXIPADDR MPT_MAXCNT #define MPT_MAXCID MPT_MAXCNT #define MPT_MAXCIDLEN 256 +#define MPT_FALSE 0x00000000 +#define MPT_TRUE 0x00000001 + +/* Dynamic address detection types */ +#define MPT_DYN_DHCPV4 0x00000001 +#define MPT_DYN_DHCPV6 0x00000002 +#define MPT_DYN_SLAAC 0x00000004 +#define MPT_DYN_ALL 0x00000007 typedef struct mac_ipaddr_s { uint32_t ip_version; @@ -176,11 +184,13 @@ typedef struct mac_dhcpcid_s { } mac_dhcpcid_t; typedef struct mac_protect_s { - uint32_t mp_types; - uint32_t mp_ipaddrcnt; - mac_ipaddr_t mp_ipaddrs[MPT_MAXIPADDR]; - uint32_t mp_cidcnt; - mac_dhcpcid_t mp_cids[MPT_MAXCID]; + uint32_t mp_types; /* Enabled protection types */ + uint32_t mp_ipaddrcnt; /* Count of allowed IPs */ + mac_ipaddr_t mp_ipaddrs[MPT_MAXIPADDR]; /* Allowed IPs */ + uint32_t mp_cidcnt; /* Count of allowed DHCP CIDs */ + mac_dhcpcid_t mp_cids[MPT_MAXCID]; /* Allowed DHCP CIDs */ + uint32_t mp_allcids; /* Whether to allow all CIDs through */ + uint32_t mp_dynamic; /* Enabled dynamic address methods */ } mac_protect_t; /* The default priority for links */ diff --git a/usr/src/uts/common/sys/mman.h b/usr/src/uts/common/sys/mman.h index 11fa46e571..6906cb3dbf 100644 --- a/usr/src/uts/common/sys/mman.h +++ b/usr/src/uts/common/sys/mman.h @@ -337,6 +337,7 @@ struct memcntl_mha32 { #define MS_SYNC 0x4 /* wait for msync */ #define MS_ASYNC 0x1 /* return immediately */ #define MS_INVALIDATE 0x2 /* invalidate caches */ +#define MS_INVALCURPROC 0x8 /* invalidate cache for curproc only */ #if !defined(_STRICT_POSIX) || (_POSIX_C_SOURCE > 2) || defined(_XPG5) /* flags to mlockall */ diff --git a/usr/src/uts/common/sys/mntent.h b/usr/src/uts/common/sys/mntent.h index 88c98dc5a4..7196f7b3ac 100644 --- a/usr/src/uts/common/sys/mntent.h +++ b/usr/src/uts/common/sys/mntent.h @@ -21,6 +21,7 @@ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2012, Joyent, Inc. All rights reserved. * Copyright 2015 Nexenta Systems, Inc. All rights reserved. * * Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T @@ -47,6 +48,7 @@ extern "C" { #define MNTTYPE_PCFS "pcfs" /* PC (MSDOS) file system */ #define MNTTYPE_PC MNTTYPE_PCFS /* Deprecated name; use MNTTYPE_PCFS */ #define MNTTYPE_LOFS "lofs" /* Loop back file system */ +#define MNTTYPE_HYPRLOFS "hyprlofs" /* Hyperlofs file system */ #define MNTTYPE_LO MNTTYPE_LOFS /* Deprecated name; use MNTTYPE_LOFS */ #define MNTTYPE_HSFS "hsfs" /* High Sierra (9660) file system */ #define MNTTYPE_SWAP "swap" /* Swap file system */ diff --git a/usr/src/uts/common/sys/netconfig.h b/usr/src/uts/common/sys/netconfig.h index 6407534a3b..658f9f3f6b 100644 --- a/usr/src/uts/common/sys/netconfig.h +++ b/usr/src/uts/common/sys/netconfig.h @@ -28,6 +28,7 @@ * * Copyright 2004 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2015 Joyent, Inc. */ #ifndef _SYS_NETCONFIG_H diff --git a/usr/src/uts/common/sys/neti.h b/usr/src/uts/common/sys/neti.h index e7027f8ece..92bd5b897d 100644 --- a/usr/src/uts/common/sys/neti.h +++ b/usr/src/uts/common/sys/neti.h @@ -48,6 +48,8 @@ struct msgb; /* avoiding sys/stream.h here */ #define NHF_INET "NHF_INET" #define NHF_INET6 "NHF_INET6" #define NHF_ARP "NHF_ARP" +#define NHF_VND_INET "NHF_VND_INET" +#define NHF_VND_INET6 "NHF_VND_INET6" #define NHF_VIONA "NHF_VIONA" /* diff --git a/usr/src/uts/common/sys/netstack.h b/usr/src/uts/common/sys/netstack.h index 7ee33318cd..b327e69fad 100644 --- a/usr/src/uts/common/sys/netstack.h +++ b/usr/src/uts/common/sys/netstack.h @@ -88,7 +88,8 @@ typedef id_t netstackid_t; #define NS_IPSECESP 16 #define NS_IPNET 17 #define NS_ILB 18 -#define NS_MAX (NS_ILB+1) +#define NS_VND 19 +#define NS_MAX (NS_VND+1) /* * State maintained for each module which tracks the state of diff --git a/usr/src/uts/common/sys/param.h b/usr/src/uts/common/sys/param.h index 282d84b912..66bd91f76f 100644 --- a/usr/src/uts/common/sys/param.h +++ b/usr/src/uts/common/sys/param.h @@ -116,7 +116,7 @@ extern "C" { #define DEFAULT_MAXPID 999999 #define DEFAULT_JUMPPID 100000 #else -#define DEFAULT_MAXPID 30000 +#define DEFAULT_MAXPID 99999 #define DEFAULT_JUMPPID 0 #endif diff --git a/usr/src/uts/common/sys/policy.h b/usr/src/uts/common/sys/policy.h index d8983a28c4..9f1b80d390 100644 --- a/usr/src/uts/common/sys/policy.h +++ b/usr/src/uts/common/sys/policy.h @@ -108,6 +108,7 @@ int secpolicy_ipc_owner(const cred_t *, const struct kipc_perm *); int secpolicy_kmdb(const cred_t *); int secpolicy_lock_memory(const cred_t *); int secpolicy_meminfo(const cred_t *); +int secpolicy_fs_import(const cred_t *); int secpolicy_modctl(const cred_t *, int); int secpolicy_net(const cred_t *, int, boolean_t); int secpolicy_net_bindmlp(const cred_t *); @@ -176,6 +177,7 @@ int secpolicy_setid_setsticky_clear(vnode_t *, vattr_t *, const vattr_t *, cred_t *); int secpolicy_xvattr(xvattr_t *, uid_t, cred_t *, vtype_t); int secpolicy_xvm_control(const cred_t *); +int secpolicy_hyprlofs_control(const cred_t *); int secpolicy_basic_exec(const cred_t *, vnode_t *); int secpolicy_basic_fork(const cred_t *); diff --git a/usr/src/uts/common/sys/poll_impl.h b/usr/src/uts/common/sys/poll_impl.h index ff277f89c8..388849a14f 100644 --- a/usr/src/uts/common/sys/poll_impl.h +++ b/usr/src/uts/common/sys/poll_impl.h @@ -141,6 +141,7 @@ struct pollstate { pollstate_t *ps_contend_nextp; /* next in contender list */ pollstate_t **ps_contend_pnextp; /* pointer-to-previous-next */ int ps_flags; /* state flags */ + short ps_implicit_ev; /* implicit poll event interest */ }; /* pollstate flags */ diff --git a/usr/src/uts/common/sys/proc.h b/usr/src/uts/common/sys/proc.h index 06e5a8caf4..d05886d1fc 100644 --- a/usr/src/uts/common/sys/proc.h +++ b/usr/src/uts/common/sys/proc.h @@ -357,6 +357,7 @@ typedef struct proc { struct zone *p_zone; /* zone in which process lives */ struct vnode *p_execdir; /* directory that p_exec came from */ struct brand *p_brand; /* process's brand */ + void *p_brand_data; /* per-process brand state */ psecflags_t p_secflags; /* per-process security flags */ @@ -373,7 +374,6 @@ typedef struct proc { */ struct user p_user; /* (see sys/user.h) */ } proc_t; - #define PROC_T /* headers relying on proc_t are OK */ #ifdef _KERNEL @@ -647,6 +647,7 @@ extern int signal_is_blocked(kthread_t *, int); extern int sigcheck(proc_t *, kthread_t *); extern void sigdefault(proc_t *); +extern struct pid *pid_find(pid_t pid); extern void pid_setmin(void); extern pid_t pid_allocate(proc_t *, pid_t, int); extern int pid_rele(struct pid *); @@ -662,6 +663,7 @@ extern int sprtrylock_proc(proc_t *); extern void sprwaitlock_proc(proc_t *); extern void sprlock_proc(proc_t *); extern void sprunlock(proc_t *); +extern void sprunprlock(proc_t *); extern void pid_init(void); extern proc_t *pid_entry(int); extern int pid_slot(proc_t *); @@ -753,6 +755,10 @@ extern kthread_t *thread_unpin(void); extern void thread_init(void); extern void thread_load(kthread_t *, void (*)(), caddr_t, size_t); +extern void thread_splitstack(void (*)(void *), void *, size_t); +extern void thread_splitstack_run(caddr_t, void (*)(void *), void *); +extern void thread_splitstack_cleanup(void); + extern void tsd_create(uint_t *, void (*)(void *)); extern void tsd_destroy(uint_t *); extern void *tsd_getcreate(uint_t *, void (*)(void *), void *(*)(void)); @@ -794,7 +800,7 @@ extern void pokelwps(proc_t *); extern void continuelwps(proc_t *); extern int exitlwps(int); extern void lwp_ctmpl_copy(klwp_t *, klwp_t *); -extern void lwp_ctmpl_clear(klwp_t *); +extern void lwp_ctmpl_clear(klwp_t *, boolean_t); extern klwp_t *forklwp(klwp_t *, proc_t *, id_t); extern void lwp_load(klwp_t *, gregset_t, uintptr_t); extern void lwp_setrval(klwp_t *, int, int); diff --git a/usr/src/uts/common/sys/procfs.h b/usr/src/uts/common/sys/procfs.h index 00ba23594e..3d6760a7b4 100644 --- a/usr/src/uts/common/sys/procfs.h +++ b/usr/src/uts/common/sys/procfs.h @@ -25,7 +25,7 @@ */ /* * Copyright 2012 DEY Storage Systems, Inc. All rights reserved. - * Copyright 2018 Joyent, Inc. + * Copyright 2019 Joyent, Inc. * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. */ @@ -237,6 +237,7 @@ typedef struct pstatus { #define PR_FAULTED 6 #define PR_SUSPENDED 7 #define PR_CHECKPOINT 8 +#define PR_BRAND 9 /* * lwp ps(1) information file. /proc/<pid>/lwp/<lwpid>/lwpsinfo @@ -271,10 +272,12 @@ typedef struct lwpsinfo { int pr_filler[4]; /* reserved for future use */ } lwpsinfo_t; +#define PRARGSZ 80 /* number of chars of arguments */ +#define PRMAXARGVLEN 4096 /* max len of /proc/%s/argv */ + /* * process ps(1) information file. /proc/<pid>/psinfo */ -#define PRARGSZ 80 /* number of chars of arguments */ typedef struct psinfo { int pr_flag; /* process flags (DEPRECATED; do not use) */ int pr_nlwp; /* number of active lwps in the process */ diff --git a/usr/src/uts/common/sys/ptms.h b/usr/src/uts/common/sys/ptms.h index 23594fdc13..52d69b3416 100644 --- a/usr/src/uts/common/sys/ptms.h +++ b/usr/src/uts/common/sys/ptms.h @@ -125,6 +125,12 @@ extern void ptms_logp(char *, uintptr_t); #define DDBGP(a, b) #endif +typedef struct __ptmptsopencb_arg *ptmptsopencb_arg_t; +typedef struct ptmptsopencb { + boolean_t (*ppocb_func)(ptmptsopencb_arg_t); + ptmptsopencb_arg_t ppocb_arg; +} ptmptsopencb_t; + #endif /* _KERNEL */ typedef struct pt_own { @@ -160,6 +166,19 @@ typedef struct pt_own { #define ZONEPT (('P'<<8)|4) /* set zone of manager/subsidiary pair */ #define OWNERPT (('P'<<8)|5) /* set owner/group for subsidiary */ +#ifdef _KERNEL +/* + * kernel ioctl commands + * + * PTMPTSOPENCB: Returns a callback function pointer and opaque argument. + * The return value of the callback function when it's invoked + * with the opaque argument passed to it will indicate if the + * pts slave device is currently open. + */ +#define PTMPTSOPENCB (('P'<<8)|6) /* check if the slave is open */ + +#endif /* _KERNEL */ + #ifdef __cplusplus } #endif diff --git a/usr/src/uts/common/sys/refhash.h b/usr/src/uts/common/sys/refhash.h index b7427a454d..469cb6d686 100644 --- a/usr/src/uts/common/sys/refhash.h +++ b/usr/src/uts/common/sys/refhash.h @@ -19,6 +19,10 @@ #include <sys/types.h> #include <sys/list.h> +#ifdef __cplusplus +extern "C" { +#endif + #define RHL_F_DEAD 0x01 typedef struct refhash_link { @@ -58,4 +62,8 @@ extern void *refhash_first(refhash_t *); extern void *refhash_next(refhash_t *, void *); extern boolean_t refhash_obj_valid(refhash_t *hp, const void *); +#ifdef __cplusplus +} +#endif + #endif /* _SYS_REFHASH_H */ diff --git a/usr/src/uts/common/sys/resource.h b/usr/src/uts/common/sys/resource.h index 13166f378d..d65ca00f69 100644 --- a/usr/src/uts/common/sys/resource.h +++ b/usr/src/uts/common/sys/resource.h @@ -23,6 +23,7 @@ * * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2017 Joyent, Inc. All rights reserved. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ diff --git a/usr/src/uts/common/sys/rt.h b/usr/src/uts/common/sys/rt.h index d4233aecb5..2ed7320a09 100644 --- a/usr/src/uts/common/sys/rt.h +++ b/usr/src/uts/common/sys/rt.h @@ -22,6 +22,7 @@ /* * Copyright 2004 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2015 Joyent, Inc. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -75,6 +76,16 @@ typedef struct rtkparms { int rt_tqsig; /* real-time time quantum signal */ uint_t rt_cflags; /* real-time control flags */ } rtkparms_t; + +#define RTGPPRIO0 100 /* Global priority for RT priority 0 */ + +/* + * control flags (kparms->rt_cflags). + */ +#define RT_DOPRI 0x01 /* change priority */ +#define RT_DOTQ 0x02 /* change RT time quantum */ +#define RT_DOSIG 0x04 /* change RT time quantum signal */ + #endif /* _KERNEL */ #ifdef __cplusplus diff --git a/usr/src/uts/common/sys/scsi/adapters/mpt_sas/mpi/mpi2_pci.h b/usr/src/uts/common/sys/scsi/adapters/mpt_sas/mpi/mpi2_pci.h new file mode 100644 index 0000000000..afb7a94c58 --- /dev/null +++ b/usr/src/uts/common/sys/scsi/adapters/mpt_sas/mpi/mpi2_pci.h @@ -0,0 +1,147 @@ +/*- + * Copyright (c) 2012-2015 LSI Corp. + * Copyright (c) 2013-2016 Avago Technologies + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the author nor the names of any co-contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright (c) 2000-2015 LSI Corporation. + * Copyright (c) 2013-2016 Avago Technologies + * All rights reserved. + * + * + * Name: mpi2_pci.h + * Title: MPI PCIe Attached Devices structures and definitions. + * Creation Date: October 9, 2012 + * + * mpi2_pci.h Version: 02.00.02 + * + * NOTE: Names (typedefs, defines, etc.) beginning with an MPI25 or Mpi25 + * prefix are for use only on MPI v2.5 products, and must not be used + * with MPI v2.0 products. Unless otherwise noted, names beginning with + * MPI2 or Mpi2 are for use with both MPI v2.0 and MPI v2.5 products. + * + * Version History + * --------------- + * + * Date Version Description + * -------- -------- ------------------------------------------------------ + * 03-16-15 02.00.00 Initial version. + * 02-17-16 02.00.01 Removed AHCI support. + * Removed SOP support. + * 07-01-16 02.00.02 Added MPI26_NVME_FLAGS_FORCE_ADMIN_ERR_RESP to + * NVME Encapsulated Request. + * -------------------------------------------------------------------------- + */ + +#ifndef MPI2_PCI_H +#define MPI2_PCI_H + + +/* + * Values for the PCIe DeviceInfo field used in PCIe Device Status Change Event + * data and PCIe Configuration pages. + */ +#define MPI26_PCIE_DEVINFO_DIRECT_ATTACH (0x00000010) + +#define MPI26_PCIE_DEVINFO_MASK_DEVICE_TYPE (0x0000000F) +#define MPI26_PCIE_DEVINFO_NO_DEVICE (0x00000000) +#define MPI26_PCIE_DEVINFO_PCI_SWITCH (0x00000001) +#define MPI26_PCIE_DEVINFO_NVME (0x00000003) + + +/**************************************************************************** +* NVMe Encapsulated message +****************************************************************************/ + +/* NVME Encapsulated Request Message */ +typedef struct _MPI26_NVME_ENCAPSULATED_REQUEST +{ + U16 DevHandle; /* 0x00 */ + U8 ChainOffset; /* 0x02 */ + U8 Function; /* 0x03 */ + U16 EncapsulatedCommandLength; /* 0x04 */ + U8 Reserved1; /* 0x06 */ + U8 MsgFlags; /* 0x07 */ + U8 VP_ID; /* 0x08 */ + U8 VF_ID; /* 0x09 */ + U16 Reserved2; /* 0x0A */ + U32 Reserved3; /* 0x0C */ + U64 ErrorResponseBaseAddress; /* 0x10 */ + U16 ErrorResponseAllocationLength; /* 0x18 */ + U16 Flags; /* 0x1A */ + U32 DataLength; /* 0x1C */ + U8 NVMe_Command[4]; /* 0x20 */ /* variable length */ + +} MPI26_NVME_ENCAPSULATED_REQUEST, MPI2_POINTER PTR_MPI26_NVME_ENCAPSULATED_REQUEST, + Mpi26NVMeEncapsulatedRequest_t, MPI2_POINTER pMpi26NVMeEncapsulatedRequest_t; + +/* defines for the Flags field */ +#define MPI26_NVME_FLAGS_FORCE_ADMIN_ERR_RESP (0x0020) +/* Submission Queue Type*/ +#define MPI26_NVME_FLAGS_SUBMISSIONQ_MASK (0x0010) +#define MPI26_NVME_FLAGS_SUBMISSIONQ_IO (0x0000) +#define MPI26_NVME_FLAGS_SUBMISSIONQ_ADMIN (0x0010) +/* Error Response Address Space */ +#define MPI26_NVME_FLAGS_MASK_ERROR_RSP_ADDR (0x000C) +#define MPI26_NVME_FLAGS_SYSTEM_RSP_ADDR (0x0000) +#define MPI26_NVME_FLAGS_IOCPLB_RSP_ADDR (0x0008) +#define MPI26_NVME_FLAGS_IOCPLBNTA_RSP_ADDR (0x000C) +/* Data Direction*/ +#define MPI26_NVME_FLAGS_DATADIRECTION_MASK (0x0003) +#define MPI26_NVME_FLAGS_NODATATRANSFER (0x0000) +#define MPI26_NVME_FLAGS_WRITE (0x0001) +#define MPI26_NVME_FLAGS_READ (0x0002) +#define MPI26_NVME_FLAGS_BIDIRECTIONAL (0x0003) + + +/* NVMe Encapuslated Reply Message */ +typedef struct _MPI26_NVME_ENCAPSULATED_ERROR_REPLY +{ + U16 DevHandle; /* 0x00 */ + U8 MsgLength; /* 0x02 */ + U8 Function; /* 0x03 */ + U16 EncapsulatedCommandLength; /* 0x04 */ + U8 Reserved1; /* 0x06 */ + U8 MsgFlags; /* 0x07 */ + U8 VP_ID; /* 0x08 */ + U8 VF_ID; /* 0x09 */ + U16 Reserved2; /* 0x0A */ + U16 Reserved3; /* 0x0C */ + U16 IOCStatus; /* 0x0E */ + U32 IOCLogInfo; /* 0x10 */ + U16 ErrorResponseCount; /* 0x14 */ + U16 Reserved4; /* 0x16 */ +} MPI26_NVME_ENCAPSULATED_ERROR_REPLY, + MPI2_POINTER PTR_MPI26_NVME_ENCAPSULATED_ERROR_REPLY, + Mpi26NVMeEncapsulatedErrorReply_t, + MPI2_POINTER pMpi26NVMeEncapsulatedErrorReply_t; + + +#endif + + diff --git a/usr/src/uts/common/sys/scsi/generic/inquiry.h b/usr/src/uts/common/sys/scsi/generic/inquiry.h index ddfd683169..fcbf00d5dc 100644 --- a/usr/src/uts/common/sys/scsi/generic/inquiry.h +++ b/usr/src/uts/common/sys/scsi/generic/inquiry.h @@ -362,7 +362,8 @@ struct scsi_inquiry { #define DTYPE_NOTPRESENT (DPQ_NEVER | DTYPE_UNKNOWN) /* - * Defined Response Data Formats: + * Defined Versions for inquiry data. These represent the base version that a + * device supports. */ #define RDF_LEVEL0 0x00 /* no conformance claim (SCSI-1) */ #define RDF_CCS 0x01 /* Obsolete (pseudo-spec) */ @@ -370,7 +371,8 @@ struct scsi_inquiry { #define RDF_SCSI_SPC 0x03 /* ANSI INCITS 301-1997 (SPC) */ #define RDF_SCSI_SPC2 0x04 /* ANSI INCITS 351-2001 (SPC-2) */ #define RDF_SCSI_SPC3 0x05 /* ANSI INCITS 408-2005 (SPC-3) */ -#define RDF_SCSI_SPC4 0x06 /* t10 (SPC-4) */ +#define RDF_SCSI_SPC4 0x06 /* ANSI INCITS 513-2015 (SPC-4) */ +#define RDF_SCSI_SPC5 0x07 /* t10 (SPC-5) */ /* * Defined Target Port Group Select values: @@ -436,6 +438,7 @@ struct vpd_desc { #define PM_CAPABLE_SPC2 RDF_SCSI_SPC2 #define PM_CAPABLE_SPC3 RDF_SCSI_SPC3 #define PM_CAPABLE_SPC4 RDF_SCSI_SPC4 +#define PM_CAPABLE_SPC5 RDF_SCSI_SPC5 #define PM_CAPABLE_LOG_MASK 0xffff0000 /* use upper 16 bit to */ /* indicate log specifics */ #define PM_CAPABLE_LOG_SUPPORTED 0x10000 /* Log page 0xE might be */ diff --git a/usr/src/uts/common/sys/scsi/targets/sddef.h b/usr/src/uts/common/sys/scsi/targets/sddef.h index d28918d9c5..bb522141af 100644 --- a/usr/src/uts/common/sys/scsi/targets/sddef.h +++ b/usr/src/uts/common/sys/scsi/targets/sddef.h @@ -763,6 +763,12 @@ _NOTE(MUTEX_PROTECTS_DATA(sd_lun::un_fi_mutex, #define SD_FM_LOG(un) (((struct sd_fm_internal *)\ ((un)->un_fm_private))->fm_log_level) +/* + * Version Related Macros + */ +#define SD_SCSI_VERS_IS_GE_SPC_4(un) \ + (SD_INQUIRY(un)->inq_ansi == RDF_SCSI_SPC4 || \ + SD_INQUIRY(un)->inq_ansi == RDF_SCSI_SPC5) /* * Values for un_ctype @@ -1821,6 +1827,10 @@ struct sd_fm_internal { #define SD_PM_CAPABLE_IS_SPC_4(pm_cap) \ ((pm_cap & PM_CAPABLE_PM_MASK) == PM_CAPABLE_SPC4) +#define SD_PM_CAPABLE_IS_GE_SPC_4(pm_cap) \ + (((pm_cap & PM_CAPABLE_PM_MASK) == PM_CAPABLE_SPC4) || \ + ((pm_cap & PM_CAPABLE_PM_MASK) == PM_CAPABLE_SPC5)) + #define SD_PM_CAP_LOG_SUPPORTED(pm_cap) \ ((pm_cap & PM_CAPABLE_LOG_SUPPORTED) ? TRUE : FALSE) diff --git a/usr/src/uts/common/sys/shm.h b/usr/src/uts/common/sys/shm.h index 0219fc2cf7..8f530afda2 100644 --- a/usr/src/uts/common/sys/shm.h +++ b/usr/src/uts/common/sys/shm.h @@ -21,6 +21,7 @@ */ /* * Copyright 2014 Garrett D'Amore <garrett@damore.org> + * Copyright 2016 Joyent, Inc. * * Copyright 2003 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. @@ -120,6 +121,10 @@ struct shmid_ds { #define SHM_LOCK 3 /* Lock segment in core */ #define SHM_UNLOCK 4 /* Unlock segment */ +#if defined(_KERNEL) +#define SHM_RMID 5 /* Private RMID for lx support */ +#endif + #if !defined(_KERNEL) int shmget(key_t, size_t, int); int shmids(int *, uint_t, uint_t *); diff --git a/usr/src/uts/common/sys/shm_impl.h b/usr/src/uts/common/sys/shm_impl.h index 4d8cdcede5..1eae2ca0a4 100644 --- a/usr/src/uts/common/sys/shm_impl.h +++ b/usr/src/uts/common/sys/shm_impl.h @@ -21,13 +21,12 @@ /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2016 Joyent, Inc. */ #ifndef _SYS_SHM_IMPL_H #define _SYS_SHM_IMPL_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/ipc_impl.h> #if defined(_KERNEL) || defined(_KMEMUSER) #include <sys/shm.h> @@ -70,7 +69,11 @@ typedef struct kshmid { time_t shm_ctime; /* last change time */ struct sptinfo *shm_sptinfo; /* info about ISM segment */ struct seg *shm_sptseg; /* pointer to ISM segment */ - long shm_sptprot; /* was reserved (still a "long") */ + ulong_t shm_opts; + /* + * Composed of: sptprot (uchar_t) and + * RM_PENDING flag (1 bit). + */ } kshmid_t; /* @@ -78,6 +81,14 @@ typedef struct kshmid { */ #define SHMSA_ISM 1 /* uses shared page table */ +/* + * shm_opts definitions + * Low byte in shm_opts is used for sptprot (see PROT_ALL). The upper bits are + * used for additional options. + */ +#define SHM_PROT_MASK 0xff +#define SHM_RM_PENDING 0x100 + typedef struct sptinfo { struct as *sptas; /* dummy as ptr. for spt segment */ } sptinfo_t; diff --git a/usr/src/uts/common/sys/signal.h b/usr/src/uts/common/sys/signal.h index aece147bec..b12dff6034 100644 --- a/usr/src/uts/common/sys/signal.h +++ b/usr/src/uts/common/sys/signal.h @@ -22,6 +22,7 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2015, Joyent, Inc. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -158,8 +159,8 @@ struct sigaction32 { * use of these symbols by applications is injurious * to binary compatibility */ -#define NSIG 74 /* valid signals range from 1 to NSIG-1 */ -#define MAXSIG 73 /* size of u_signal[], NSIG-1 <= MAXSIG */ +#define NSIG 75 /* valid signals range from 1 to NSIG-1 */ +#define MAXSIG 74 /* size of u_signal[], NSIG-1 <= MAXSIG */ #endif /* defined(__EXTENSIONS__) || !defined(_XPG4_2) */ #define MINSIGSTKSZ 2048 diff --git a/usr/src/uts/common/sys/socket.h b/usr/src/uts/common/sys/socket.h index 9e61bc7bb0..25880522e9 100644 --- a/usr/src/uts/common/sys/socket.h +++ b/usr/src/uts/common/sys/socket.h @@ -40,6 +40,9 @@ /* Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved. */ +/* + * Copyright (c) 2014, Joyent, Inc. All rights reserved. + */ #ifndef _SYS_SOCKET_H #define _SYS_SOCKET_H @@ -205,6 +208,7 @@ struct so_snd_bufinfo { #define SO_SRCADDR 0x2001 /* Internal: AF_UNIX source address */ #define SO_FILEP 0x2002 /* Internal: AF_UNIX file pointer */ #define SO_UNIX_CLOSE 0x2003 /* Internal: AF_UNIX peer closed */ +#define SO_REUSEPORT 0x2004 /* allow simultaneous port reuse */ #endif /* _KERNEL */ /* @@ -304,8 +308,9 @@ struct linger { #define AF_INET_OFFLOAD 30 /* Sun private; do not use */ #define AF_TRILL 31 /* TRILL interface */ #define AF_PACKET 32 /* PF_PACKET Linux socket interface */ +#define AF_LX_NETLINK 33 /* Linux-compatible netlink */ -#define AF_MAX 32 +#define AF_MAX 33 /* * Protocol families, same as address families for now. @@ -345,6 +350,7 @@ struct linger { #define PF_INET_OFFLOAD AF_INET_OFFLOAD /* Sun private; do not use */ #define PF_TRILL AF_TRILL #define PF_PACKET AF_PACKET +#define PF_LX_NETLINK AF_LX_NETLINK #define PF_MAX AF_MAX diff --git a/usr/src/uts/common/sys/socketvar.h b/usr/src/uts/common/sys/socketvar.h index 479641a11b..1e48b00dd7 100644 --- a/usr/src/uts/common/sys/socketvar.h +++ b/usr/src/uts/common/sys/socketvar.h @@ -304,15 +304,16 @@ struct sonode { #define SS_OOBPEND 0x00002000 /* OOB pending or present - poll */ #define SS_HAVEOOBDATA 0x00004000 /* OOB data present */ #define SS_HADOOBDATA 0x00008000 /* OOB data consumed */ -#define SS_CLOSING 0x00010000 /* in process of closing */ +#define SS_CLOSING 0x00010000 /* in process of closing */ #define SS_FIL_DEFER 0x00020000 /* filter deferred notification */ #define SS_FILOP_OK 0x00040000 /* socket can attach filters */ #define SS_FIL_RCV_FLOWCTRL 0x00080000 /* filter asserted rcv flow ctrl */ + #define SS_FIL_SND_FLOWCTRL 0x00100000 /* filter asserted snd flow ctrl */ #define SS_FIL_STOP 0x00200000 /* no more filter actions */ - #define SS_SODIRECT 0x00400000 /* transport supports sodirect */ +#define SS_FILOP_UNSF 0x00800000 /* block attaching unsafe filters */ #define SS_SENTLASTREADSIG 0x01000000 /* last rx signal has been sent */ #define SS_SENTLASTWRITESIG 0x02000000 /* last tx signal has been sent */ @@ -328,7 +329,8 @@ struct sonode { /* * Sockets that can fall back to TPI must ensure that fall back is not - * initiated while a thread is using a socket. + * initiated while a thread is using a socket. Otherwise this disables all + * future filter attachment. */ #define SO_BLOCK_FALLBACK(so, fn) \ ASSERT(MUTEX_NOT_HELD(&(so)->so_lock)); \ @@ -344,6 +346,24 @@ struct sonode { } \ } +/* + * Sockets that can fall back to TPI must ensure that fall back is not + * initiated while a thread is using a socket. Otherwise this disables all + * future unsafe filter attachment. Safe filters can still attach after + * we execute the function in which this macro is used. + */ +#define SO_BLOCK_FALLBACK_SAFE(so, fn) \ + ASSERT(MUTEX_NOT_HELD(&(so)->so_lock)); \ + rw_enter(&(so)->so_fallback_rwlock, RW_READER); \ + if ((so)->so_state & SS_FALLBACK_COMP) { \ + rw_exit(&(so)->so_fallback_rwlock); \ + return (fn); \ + } else if (((so)->so_state & SS_FILOP_UNSF) == 0) { \ + mutex_enter(&(so)->so_lock); \ + (so)->so_state |= SS_FILOP_UNSF; \ + mutex_exit(&(so)->so_lock); \ + } + #define SO_UNBLOCK_FALLBACK(so) { \ rw_exit(&(so)->so_fallback_rwlock); \ } @@ -375,6 +395,7 @@ struct sonode { /* The modes below are only for non-streams sockets */ #define SM_ACCEPTSUPP 0x400 /* can handle accept() */ #define SM_SENDFILESUPP 0x800 /* Private: proto supp sendfile */ +#define SM_DEFERERR 0x1000 /* Private: defer so_error delivery */ /* * Socket versions. Used by the socket library when calling _so_socket(). diff --git a/usr/src/uts/common/sys/sockfilter.h b/usr/src/uts/common/sys/sockfilter.h index 9f6d8b499b..c4dd6539de 100644 --- a/usr/src/uts/common/sys/sockfilter.h +++ b/usr/src/uts/common/sys/sockfilter.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2015 Joyent, Inc. */ #ifndef _SYS_SOCKFILTER_H @@ -129,6 +130,15 @@ typedef struct sof_ops { #define SOF_VERSION 1 +/* + * Flag indicating that the filter module is safe to attach after bind, + * getsockname, getsockopt or setsockopt calls. By default filters are unsafe + * so may not be attached after any socket operation. However, a safe filter + * can still be attached after one of the above calls. This makes attaching + * the filter less dependent on the initial socket setup order. + */ +#define SOF_ATT_SAFE 0x1 + extern int sof_register(int, const char *, const sof_ops_t *, int); extern int sof_unregister(const char *); diff --git a/usr/src/uts/common/sys/squeue.h b/usr/src/uts/common/sys/squeue.h index 040963eef7..89b355970e 100644 --- a/usr/src/uts/common/sys/squeue.h +++ b/usr/src/uts/common/sys/squeue.h @@ -30,6 +30,17 @@ extern "C" { #endif +/* + * Originally in illumos, we had an IP-centric view of the serialization queue + * abstraction. While that has useful properties, the implementation of squeues + * hardcodes various parts of the implementation of IP into it which makes it + * unsuitable for other consumers. To enable them, we created another interface, + * but opted not to port all of the functionality that IP uses in the form of + * ip_squeue.c As other consumers need the functionality that IP has in squeues, + * then we'll come up with more genericized methods and add that functionality + * to <sys/gsqueue.h>. Please do not continue to use this header. + */ + #include <sys/types.h> #include <sys/processor.h> #include <sys/stream.h> @@ -77,12 +88,13 @@ typedef enum { struct ip_recv_attr_s; extern void squeue_init(void); -extern squeue_t *squeue_create(pri_t); +extern squeue_t *squeue_create(pri_t, boolean_t); extern void squeue_bind(squeue_t *, processorid_t); extern void squeue_unbind(squeue_t *); extern void squeue_enter(squeue_t *, mblk_t *, mblk_t *, uint32_t, struct ip_recv_attr_s *, int, uint8_t); extern uintptr_t *squeue_getprivate(squeue_t *, sqprivate_t); +extern void squeue_destroy(squeue_t *); struct conn_s; extern int squeue_synch_enter(struct conn_s *, mblk_t *); diff --git a/usr/src/uts/common/sys/squeue_impl.h b/usr/src/uts/common/sys/squeue_impl.h index 8eb6a30add..2bb717fb52 100644 --- a/usr/src/uts/common/sys/squeue_impl.h +++ b/usr/src/uts/common/sys/squeue_impl.h @@ -114,6 +114,7 @@ struct squeue_s { squeue_set_t *sq_set; /* managed by squeue creator */ pri_t sq_priority; /* squeue thread priority */ + boolean_t sq_isip; /* use IP-centric features */ /* Keep the debug-only fields at the end of the structure */ #ifdef DEBUG @@ -161,6 +162,7 @@ struct squeue_s { #define SQS_POLL_RESTART_DONE 0x01000000 #define SQS_POLL_THR_QUIESCE 0x02000000 #define SQS_PAUSE 0x04000000 /* The squeue has been paused */ +#define SQS_EXIT 0x08000000 /* squeue is being torn down */ #define SQS_WORKER_THR_CONTROL \ (SQS_POLL_QUIESCE | SQS_POLL_RESTART | SQS_POLL_CLEANUP) diff --git a/usr/src/uts/common/sys/stream.h b/usr/src/uts/common/sys/stream.h index ea2c3d8e9a..7d118b09e8 100644 --- a/usr/src/uts/common/sys/stream.h +++ b/usr/src/uts/common/sys/stream.h @@ -21,6 +21,7 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2015 Joyent, Inc. All rights reserved. * Copyright 2018 OmniOS Community Edition (OmniOSce) Association. * Copyright 2015 Joyent, Inc. All rights reserved. * Copyright 2022 Garrett D'Amore diff --git a/usr/src/uts/common/sys/sunddi.h b/usr/src/uts/common/sys/sunddi.h index 442595289f..c0dedf555c 100644 --- a/usr/src/uts/common/sys/sunddi.h +++ b/usr/src/uts/common/sys/sunddi.h @@ -1599,8 +1599,14 @@ int ddi_ffs(long mask); int +ddi_ffsll(long long mask); + +int ddi_fls(long mask); +int +ddi_flsll(long long mask); + /* * The ddi_soft_state* routines comprise generic storage management utilities * for driver soft state structures. Two types of soft_state indexes are diff --git a/usr/src/uts/common/sys/systrace.h b/usr/src/uts/common/sys/systrace.h index d43974451e..17e509d4d8 100644 --- a/usr/src/uts/common/sys/systrace.h +++ b/usr/src/uts/common/sys/systrace.h @@ -22,13 +22,12 @@ /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2014 Joyent, Inc. All rights reserved. */ #ifndef _SYS_SYSTRACE_H #define _SYS_SYSTRACE_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/dtrace.h> #ifdef __cplusplus @@ -47,16 +46,18 @@ extern systrace_sysent_t *systrace_sysent; extern systrace_sysent_t *systrace_sysent32; extern void (*systrace_probe)(dtrace_id_t, uintptr_t, uintptr_t, - uintptr_t, uintptr_t, uintptr_t, uintptr_t); + uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); extern void systrace_stub(dtrace_id_t, uintptr_t, uintptr_t, - uintptr_t, uintptr_t, uintptr_t, uintptr_t); + uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); extern int64_t dtrace_systrace_syscall(uintptr_t arg0, uintptr_t arg1, - uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, uintptr_t arg5); + uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, + uintptr_t arg6, uintptr_t arg7); #ifdef _SYSCALL32_IMPL extern int64_t dtrace_systrace_syscall32(uintptr_t arg0, uintptr_t arg1, - uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, uintptr_t arg5); + uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, + uintptr_t arg6, uintptr_t arg7); #endif #endif diff --git a/usr/src/uts/common/sys/termios.h b/usr/src/uts/common/sys/termios.h index 39106a14fc..4edeb7a41c 100644 --- a/usr/src/uts/common/sys/termios.h +++ b/usr/src/uts/common/sys/termios.h @@ -363,6 +363,24 @@ extern pid_t tcgetsid(int); #define TCSETSF (_TIOC|16) /* + * linux terminal ioctls we need to be aware of + */ +#define TIOCSETLD (_TIOC|123) /* set line discipline parms */ +#define TIOCGETLD (_TIOC|124) /* get line discipline parms */ + +/* + * The VMIN and VTIME and solaris overlap with VEOF and VEOL - This is + * perfectly legal except, linux expects them to be separate. So we keep + * them separately. + */ +struct lx_cc { + unsigned char veof; /* veof value */ + unsigned char veol; /* veol value */ + unsigned char vmin; /* vmin value */ + unsigned char vtime; /* vtime value */ +}; + +/* * NTP PPS ioctls */ #define TIOCGPPS (_TIOC|125) diff --git a/usr/src/uts/common/sys/thread.h b/usr/src/uts/common/sys/thread.h index 53a31c848c..76e6835349 100644 --- a/usr/src/uts/common/sys/thread.h +++ b/usr/src/uts/common/sys/thread.h @@ -375,7 +375,7 @@ typedef struct _kthread { #define T_WOULDBLOCK 0x0020 /* for lockfs */ #define T_DONTBLOCK 0x0040 /* for lockfs */ #define T_DONTPEND 0x0080 /* for lockfs */ -#define T_SYS_PROF 0x0100 /* profiling on for duration of system call */ +#define T_SPLITSTK 0x0100 /* kernel stack is currently split */ #define T_WAITCVSEM 0x0200 /* waiting for a lwp_cv or lwp_sema on sleepq */ #define T_WATCHPT 0x0400 /* thread undergoing a watchpoint emulation */ #define T_PANIC 0x0800 /* thread initiated a system panic */ @@ -427,8 +427,9 @@ typedef struct _kthread { #define TS_RESUME 0x1000 /* setrun() by CPR resume process */ #define TS_CREATE 0x2000 /* setrun() by syslwp_create() */ #define TS_RUNQMATCH 0x4000 /* exact run queue balancing by setbackdq() */ +#define TS_BSTART 0x8000 /* setrun() by brand */ #define TS_ALLSTART \ - (TS_CSTART|TS_UNPAUSE|TS_XSTART|TS_PSTART|TS_RESUME|TS_CREATE) + (TS_CSTART|TS_UNPAUSE|TS_XSTART|TS_PSTART|TS_RESUME|TS_CREATE|TS_BSTART) #define TS_ANYWAITQ (TS_PROJWAITQ|TS_ZONEWAITQ) /* @@ -456,6 +457,10 @@ typedef struct _kthread { #define ISTOPPED(t) ((t)->t_state == TS_STOPPED && \ !((t)->t_schedflag & TS_PSTART)) +/* True if thread is stopped for a brand-specific reason */ +#define BSTOPPED(t) ((t)->t_state == TS_STOPPED && \ + !((t)->t_schedflag & TS_BSTART)) + /* True if thread is asleep and wakeable */ #define ISWAKEABLE(t) (((t)->t_state == TS_SLEEP && \ ((t)->t_flag & T_WAKEABLE))) diff --git a/usr/src/uts/common/sys/time.h b/usr/src/uts/common/sys/time.h index 634d5fb3a6..d82508e6b3 100644 --- a/usr/src/uts/common/sys/time.h +++ b/usr/src/uts/common/sys/time.h @@ -15,6 +15,7 @@ * Use is subject to license terms. * * Copyright 2013 Nexenta Systems, Inc. All rights reserved. + * Copyright 2016 Joyent, Inc. * * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. */ @@ -266,6 +267,14 @@ typedef longlong_t hrtime_t; #if defined(_KERNEL) || defined(_FAKE_KERNEL) +/* + * Unsigned counterpart to hrtime_t + */ +typedef u_longlong_t uhrtime_t; + +#define HRTIME_MAX LLONG_MAX +#define UHRTIME_MAX ULLONG_MAX + #include <sys/time_impl.h> #include <sys/mutex.h> diff --git a/usr/src/uts/common/sys/timer.h b/usr/src/uts/common/sys/timer.h index 4bbc5b4fb8..db27960413 100644 --- a/usr/src/uts/common/sys/timer.h +++ b/usr/src/uts/common/sys/timer.h @@ -35,6 +35,8 @@ #include <sys/proc.h> #include <sys/thread.h> #include <sys/param.h> +#include <sys/siginfo.h> +#include <sys/port.h> #ifdef __cplusplus extern "C" { @@ -65,6 +67,7 @@ extern int timer_max; */ #define IT_SIGNAL 0x01 #define IT_PORT 0x02 /* use event port notification */ +#define IT_CALLBACK 0x04 /* custom callback function */ struct clock_backend; @@ -92,14 +95,27 @@ struct itimer { struct clock_backend *it_backend; void (*it_fire)(itimer_t *); kmutex_t it_mutex; - void *it_portev; /* port_kevent_t pointer */ - void *it_portsrc; /* port_source_t pointer */ - int it_portfd; /* port file descriptor */ + union { + struct { + void *_it_portev; /* port_kevent_t pointer */ + void *_it_portsrc; /* port_source_t pointer */ + int _it_portfd; /* port file descriptor */ + } _it_ev_port; + struct { + void (*_it_cb_func)(itimer_t *); + uintptr_t _it_cb_data[2]; + } _it_ev_cb; + } _it_ev_data; }; #define it_sigq __data.__proc.__it_sigq #define it_lwp __data.__proc.__it_lwp #define it_frontend __data.__it_frontend +#define it_portev _it_ev_data._it_ev_port._it_portev +#define it_portsrc _it_ev_data._it_ev_port._it_portsrc +#define it_portfd _it_ev_data._it_ev_port._it_portfd +#define it_cb_func _it_ev_data._it_ev_cb._it_cb_func +#define it_cb_data _it_ev_data._it_ev_cb._it_cb_data typedef struct clock_backend { struct sigevent clk_default; @@ -116,7 +132,11 @@ typedef struct clock_backend { extern void clock_add_backend(clockid_t clock, clock_backend_t *backend); extern clock_backend_t *clock_get_backend(clockid_t clock); +extern void timer_release(struct proc *, itimer_t *); +extern void timer_delete_grabbed(struct proc *, timer_t tid, itimer_t *it); extern void timer_lwpbind(); +extern int timer_setup(clock_backend_t *, struct sigevent *, port_notify_t *, + itimer_t **, timer_t *); extern void timer_func(sigqueue_t *); extern void timer_exit(void); diff --git a/usr/src/uts/common/sys/ts.h b/usr/src/uts/common/sys/ts.h index 7949058565..2cf5dcade3 100644 --- a/usr/src/uts/common/sys/ts.h +++ b/usr/src/uts/common/sys/ts.h @@ -79,6 +79,8 @@ typedef struct tsproc { } tsproc_t; /* flags */ + +/* Formerly: TSKPRI 0x01 - thread at kernel mode priority */ #define TSBACKQ 0x02 /* thread goes to back of dispq if preempted */ #define TSIA 0x04 /* thread is interactive */ #define TSIASET 0x08 /* interactive thread is "on" */ diff --git a/usr/src/uts/common/sys/uadmin.h b/usr/src/uts/common/sys/uadmin.h index 904b52cac4..75d000b831 100644 --- a/usr/src/uts/common/sys/uadmin.h +++ b/usr/src/uts/common/sys/uadmin.h @@ -23,6 +23,7 @@ * * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2011 Joyent, Inc. All rights reserved. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -159,7 +160,7 @@ extern kmutex_t ualock; extern void mdboot(int, int, char *, boolean_t); extern void mdpreboot(int, int, char *); extern int kadmin(int, int, void *, cred_t *); -extern void killall(zoneid_t); +extern void killall(zoneid_t, boolean_t); #endif extern int uadmin(int, int, uintptr_t); diff --git a/usr/src/uts/common/sys/uio.h b/usr/src/uts/common/sys/uio.h index bca1ed1fa3..9584be559f 100644 --- a/usr/src/uts/common/sys/uio.h +++ b/usr/src/uts/common/sys/uio.h @@ -145,7 +145,8 @@ typedef struct uioa_s { */ typedef enum xuio_type { UIOTYPE_ASYNCIO, - UIOTYPE_ZEROCOPY + UIOTYPE_ZEROCOPY, + UIOTYPE_PEEKSIZE } xuio_type_t; typedef struct xuio { @@ -175,6 +176,15 @@ typedef struct xuio { int xu_zc_rw; /* read or write buffer */ void *xu_zc_priv; /* fs specific */ } xu_zc; + + /* + * Peek Size Support -- facilitate peeking at the size of a + * waiting message on a socket. + */ + struct { + ssize_t xu_ps_size; /* size of waiting msg */ + boolean_t xu_ps_set; /* was size calculated? */ + } xu_ps; } xu_ext; } xuio_t; diff --git a/usr/src/uts/common/sys/user.h b/usr/src/uts/common/sys/user.h index 7f54dcf3ab..90fde4ef98 100644 --- a/usr/src/uts/common/sys/user.h +++ b/usr/src/uts/common/sys/user.h @@ -26,7 +26,7 @@ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ /* - * Copyright (c) 2018, Joyent, Inc. + * Copyright 2019 Joyent, Inc. * Copyright 2022 Oxide Computer Company */ @@ -203,9 +203,9 @@ typedef struct { /* kernel syscall set type */ * This value should not be changed in a patch. */ #if defined(__sparc) -#define __KERN_NAUXV_IMPL 20 +#define __KERN_NAUXV_IMPL 24 #elif defined(__i386) || defined(__amd64) -#define __KERN_NAUXV_IMPL 26 +#define __KERN_NAUXV_IMPL 29 #endif struct execsw; @@ -228,7 +228,11 @@ typedef struct user { char u_psargs[PSARGSZ]; /* arguments from exec */ int u_argc; /* value of argc passed to main() */ uintptr_t u_argv; /* value of argv passed to main() */ + uintptr_t u_argvstrs; /* argv string space pointer */ + size_t u_argvstrsize; /* size of argv string space */ uintptr_t u_envp; /* value of envp passed to main() */ + uintptr_t u_envstrs; /* env string space pointer */ + size_t u_envstrsize; /* size of env string space */ uintptr_t u_commpagep; /* address of mapped comm page */ /* diff --git a/usr/src/uts/common/sys/vm.h b/usr/src/uts/common/sys/vm.h index 14b5754b28..b32a789d36 100644 --- a/usr/src/uts/common/sys/vm.h +++ b/usr/src/uts/common/sys/vm.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 1983, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2017 Joyent, Inc. */ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ @@ -57,6 +58,8 @@ int queue_io_request(struct vnode *, u_offset_t); extern kmutex_t memavail_lock; extern kcondvar_t memavail_cv; +#define WAKE_PAGEOUT_SCANNER() cv_broadcast(&proc_pageout->p_cv) + #endif /* defined(_KERNEL) */ #ifdef __cplusplus diff --git a/usr/src/uts/common/sys/vm_usage.h b/usr/src/uts/common/sys/vm_usage.h index 1aa4a8ee6d..afbf438eff 100644 --- a/usr/src/uts/common/sys/vm_usage.h +++ b/usr/src/uts/common/sys/vm_usage.h @@ -21,6 +21,7 @@ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2017 Joyent, Inc. All rights reserved. */ #ifndef _SYS_VM_USAGE_H @@ -79,8 +80,12 @@ extern "C" { /* zoneid */ #define VMUSAGE_COL_EUSERS 0x2000 /* same as VMUSAGE_COL_RUSERS, but by */ /* euser */ +#define VMUSAGE_A_ZONE 0x4000 /* rss/swap for a specified zone */ -#define VMUSAGE_MASK 0x3fff /* all valid flags for getvmusage() */ +#define VMUSAGE_MASK 0x7fff /* all valid flags for getvmusage() */ + +#define VMUSAGE_ZONE_FLAGS (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES | \ + VMUSAGE_A_ZONE) typedef struct vmusage { id_t vmu_zoneid; /* zoneid, or ALL_ZONES for */ diff --git a/usr/src/uts/common/sys/vmsystm.h b/usr/src/uts/common/sys/vmsystm.h index e8e30b7608..daf76f9f51 100644 --- a/usr/src/uts/common/sys/vmsystm.h +++ b/usr/src/uts/common/sys/vmsystm.h @@ -19,6 +19,9 @@ * CDDL HEADER END */ /* + * Copyright (c) 2017, Joyent, Inc. All rights reserved. + */ +/* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -58,6 +61,9 @@ extern pgcnt_t desscan; /* desired pages scanned per second */ extern pgcnt_t slowscan; extern pgcnt_t fastscan; extern pgcnt_t pushes; /* number of pages pushed to swap device */ +extern uint64_t low_mem_scan; /* num times page scan due to low memory */ +extern uint64_t zone_cap_scan; /* num times page scan due to zone cap */ +extern uint64_t n_throttle; /* num times page create throttled */ /* writable copies of tunables */ extern pgcnt_t maxpgio; /* max paging i/o per sec before start swaps */ @@ -160,6 +166,8 @@ extern void *boot_virt_alloc(void *addr, size_t size); extern size_t exec_get_spslew(void); +extern caddr_t map_userlimit(proc_t *pp, struct as *as, int flags); + #endif /* _KERNEL */ #ifdef __cplusplus diff --git a/usr/src/uts/common/sys/vnd.h b/usr/src/uts/common/sys/vnd.h new file mode 100644 index 0000000000..bc7c9c3122 --- /dev/null +++ b/usr/src/uts/common/sys/vnd.h @@ -0,0 +1,141 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014 Joyent, Inc. All rights reserved. + */ + +#ifndef _SYS_VND_H +#define _SYS_VND_H + +#include <sys/types.h> +#include <sys/vnd_errno.h> +#include <sys/frameio.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * We distinguish between normal ioctls and private ioctls we issues to out + * streams version. Streams ioctls have the upper bit set in the lowest byte. + * Note that there are no STREAMs ioctls for userland and all definitions + * related to them are not present in this file. + */ +#define VND_IOC (('v' << 24) | ('n' << 16) | ('d' << 8)) + +/* + * Attach the current minor instance to a given dlpi datalink identified by a + * vnd_ioc_name_t argument. This fails if it's already been attached. Note that + * unlike the other ioctls, this is passed directly as opposed to every other + * function which is passed as a pointer to the value. + */ +#define VND_IOC_ATTACH (VND_IOC | 0x1) + +#define VND_NAMELEN 32 + +typedef struct vnd_ioc_attach { + char via_name[VND_NAMELEN]; + zoneid_t via_zoneid; + uint32_t via_errno; +} vnd_ioc_attach_t; + +/* + * Link the current minor instance into the /devices name space. + * + * This ioctl adds entries into /devices with a name of the form z%d:%s vil_zid, + * vil_name. The device will be namespaced to the zone. The global zone will be + * able to see all minor nodes. In the zone, only the /dev entries will exist. + * At this time, a given device can only have one link at a time. Note that a + * user cannot specify the zone to pass in, rather it is the zone that the + * device was attached in. + */ +#define VND_IOC_LINK (VND_IOC | 0x2) + +typedef struct vnd_ioc_link { + char vil_name[VND_NAMELEN]; + uint32_t vil_errno; +} vnd_ioc_link_t; + +/* + * Unlink the opened minor instance from the /devices name space. A zone may use + * this to unlink an extent entry in /dev; however, they will not be able to + * link it in again. + */ +#define VND_IOC_UNLINK (VND_IOC | 0x3) +typedef struct vnd_ioc_unlink { + uint32_t viu_errno; +} vnd_ioc_unlink_t; + +/* + * Controls to get and set the current buffer recieve buffer size. + */ +typedef struct vnd_ioc_buf { + uint64_t vib_size; + uint32_t vib_filler; + uint32_t vib_errno; +} vnd_ioc_buf_t; + +#define VND_IOC_GETRXBUF (VND_IOC | 0x04) +#define VND_IOC_SETRXBUF (VND_IOC | 0x05) +#define VND_IOC_GETMAXBUF (VND_IOC | 0x06) +#define VND_IOC_GETTXBUF (VND_IOC | 0x07) +#define VND_IOC_SETTXBUF (VND_IOC | 0x08) +#define VND_IOC_GETMINTU (VND_IOC | 0x09) +#define VND_IOC_GETMAXTU (VND_IOC | 0x0a) + +/* + * Information and listing ioctls + * + * This gets information about all of the active vnd instances. vl_actents is + * always updated to the number around and vl_nents is the number of + * vnd_ioc_info_t elements are allocated in vl_ents. + */ +typedef struct vnd_ioc_info { + uint32_t vii_version; + zoneid_t vii_zone; + char vii_name[VND_NAMELEN]; + char vii_datalink[VND_NAMELEN]; +} vnd_ioc_info_t; + +typedef struct vnd_ioc_list { + uint_t vl_nents; + uint_t vl_actents; + vnd_ioc_info_t *vl_ents; +} vnd_ioc_list_t; + +#ifdef _KERNEL + +typedef struct vnd_ioc_list32 { + uint_t vl_nents; + uint_t vl_actents; + caddr32_t vl_ents; +} vnd_ioc_list32_t; + +#endif /* _KERNEL */ + +#define VND_IOC_LIST (VND_IOC | 0x20) + +/* + * Framed I/O ioctls + * + * Users should use the standard frameio_t as opposed to a vnd specific type. + * This is a consolidation private ioctl pending futher stability in the form of + * specific system work. + */ +#define VND_IOC_FRAMEIO_READ (VND_IOC | 0x30) +#define VND_IOC_FRAMEIO_WRITE (VND_IOC | 0x31) + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_VND_H */ diff --git a/usr/src/uts/common/sys/vnd_errno.h b/usr/src/uts/common/sys/vnd_errno.h new file mode 100644 index 0000000000..89e5fc2543 --- /dev/null +++ b/usr/src/uts/common/sys/vnd_errno.h @@ -0,0 +1,72 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014 Joyent, Inc. All rights reserved. + */ + +#ifndef _SYS_VND_ERRNO_H +#define _SYS_VND_ERRNO_H + +/* + * This header contains all of the available vnd errors. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum vnd_errno { + VND_E_SUCCESS = 0, /* no error */ + VND_E_NOMEM, /* no memory */ + VND_E_NODATALINK, /* no such datalink */ + VND_E_NOTETHER, /* not DL_ETHER */ + VND_E_DLPIINVAL, /* Unknown DLPI failures */ + VND_E_ATTACHFAIL, /* DL_ATTACH_REQ failed */ + VND_E_BINDFAIL, /* DL_BIND_REQ failed */ + VND_E_PROMISCFAIL, /* DL_PROMISCON_REQ failed */ + VND_E_DIRECTFAIL, /* DLD_CAPAB_DIRECT enable failed */ + VND_E_CAPACKINVAL, /* bad dl_capability_ack_t */ + VND_E_SUBCAPINVAL, /* bad dl_capability_sub_t */ + VND_E_DLDBADVERS, /* bad dld version */ + VND_E_KSTATCREATE, /* failed to create kstats */ + VND_E_NODEV, /* no such vnd link */ + VND_E_NONETSTACK, /* netstack doesn't exist */ + VND_E_ASSOCIATED, /* device already associated */ + VND_E_ATTACHED, /* device already attached */ + VND_E_LINKED, /* device already linked */ + VND_E_BADNAME, /* invalid name */ + VND_E_PERM, /* can't touch this */ + VND_E_NOZONE, /* no such zone */ + VND_E_STRINIT, /* failed to initialize vnd stream module */ + VND_E_NOTATTACHED, /* device not attached */ + VND_E_NOTLINKED, /* device not linked */ + VND_E_LINKEXISTS, /* another device has the same link name */ + VND_E_MINORNODE, /* failed to create minor node */ + VND_E_BUFTOOBIG, /* requested buffer size is too large */ + VND_E_BUFTOOSMALL, /* requested buffer size is too small */ + VND_E_DLEXCL, /* unable to get dlpi excl access */ + VND_E_DIRECTNOTSUP, + /* DLD direct capability not suported over data link */ + VND_E_BADPROPSIZE, /* invalid property size */ + VND_E_BADPROP, /* invalid property */ + VND_E_PROPRDONLY, /* property is read only */ + VND_E_SYS, /* unexpected system error */ + VND_E_CAPABPASS, + /* capabilities invalid, pass-through module detected */ + VND_E_UNKNOWN /* unknown error */ +} vnd_errno_t; + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_VND_ERRNO_H */ diff --git a/usr/src/uts/common/sys/vnode.h b/usr/src/uts/common/sys/vnode.h index b8702bc8f5..df5da6c2e7 100644 --- a/usr/src/uts/common/sys/vnode.h +++ b/usr/src/uts/common/sys/vnode.h @@ -21,7 +21,7 @@ /* * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2018, Joyent, Inc. + * Copyright 2020 Joyent, Inc. * Copyright (c) 2011, 2017 by Delphix. All rights reserved. * Copyright 2017 RackTop Systems. */ @@ -197,6 +197,7 @@ struct vsd_node { * v_count * v_shrlocks * v_path + * v_phantom_count * v_vsd * v_xattrdir * @@ -214,6 +215,7 @@ struct vsd_node { * v_lock * v_flag * v_count + * v_phantom_count * v_data * v_vfsp * v_stream @@ -285,6 +287,8 @@ typedef struct vnode { kmutex_t v_lock; /* protects vnode fields */ uint_t v_flag; /* vnode flags (see below) */ uint_t v_count; /* reference count */ + /* non vn_count() ref count (see below) */ + uint_t v_phantom_count; void *v_data; /* private data for fs */ struct vfs *v_vfsp; /* ptr to containing VFS */ struct stdata *v_stream; /* associated stream */ @@ -811,13 +815,15 @@ typedef enum vnevent { VE_REMOVE = 3, /* Remove of vnode's name */ VE_RMDIR = 4, /* Remove of directory vnode's name */ VE_CREATE = 5, /* Create with vnode's name which exists */ - VE_LINK = 6, /* Link with vnode's name as source */ - VE_RENAME_DEST_DIR = 7, /* Rename with vnode as target dir */ - VE_MOUNTEDOVER = 8, /* File or Filesystem got mounted over vnode */ + VE_LINK = 6, /* Link with vnode's name as source */ + VE_RENAME_DEST_DIR = 7, /* Rename with vnode as target dir */ + VE_MOUNTEDOVER = 8, /* File or Filesystem got mounted over vnode */ VE_TRUNCATE = 9, /* Truncate */ VE_PRE_RENAME_SRC = 10, /* Pre-rename, with vnode as source */ VE_PRE_RENAME_DEST = 11, /* Pre-rename, with vnode as target/dest. */ - VE_PRE_RENAME_DEST_DIR = 12 /* Pre-rename with vnode as target dir */ + VE_PRE_RENAME_DEST_DIR = 12, /* Pre-rename with vnode as target dir */ + VE_RENAME_SRC_DIR = 13, /* Rename with vnode as source dir */ + VE_RESIZE = 14 /* Resize/truncate to non-zero offset */ } vnevent_t; /* @@ -1292,9 +1298,9 @@ void vn_recycle(vnode_t *); void vn_free(vnode_t *); int vn_is_readonly(vnode_t *); -int vn_is_opened(vnode_t *, v_mode_t); -int vn_is_mapped(vnode_t *, v_mode_t); -int vn_has_other_opens(vnode_t *, v_mode_t); +int vn_is_opened(vnode_t *, v_mode_t); +int vn_is_mapped(vnode_t *, v_mode_t); +int vn_has_other_opens(vnode_t *, v_mode_t); void vn_open_upgrade(vnode_t *, int); void vn_open_downgrade(vnode_t *, int); @@ -1333,10 +1339,12 @@ int vn_createat(char *pnamep, enum uio_seg seg, struct vattr *vap, int vn_rdwr(enum uio_rw rw, struct vnode *vp, caddr_t base, ssize_t len, offset_t offset, enum uio_seg seg, int ioflag, rlim64_t ulimit, cred_t *cr, ssize_t *residp); +uint_t vn_count(struct vnode *vp); void vn_rele(struct vnode *vp); void vn_rele_async(struct vnode *vp, struct taskq *taskq); void vn_rele_dnlc(struct vnode *vp); void vn_rele_stream(struct vnode *vp); +void vn_phantom_rele(struct vnode *vp); int vn_link(char *from, char *to, enum uio_seg seg); int vn_linkat(vnode_t *fstartvp, char *from, enum symfollow follow, vnode_t *tstartvp, char *to, enum uio_seg seg); @@ -1377,7 +1385,8 @@ void vnevent_remove(vnode_t *, vnode_t *, char *, caller_context_t *); void vnevent_rmdir(vnode_t *, vnode_t *, char *, caller_context_t *); void vnevent_create(vnode_t *, caller_context_t *); void vnevent_link(vnode_t *, caller_context_t *); -void vnevent_rename_dest_dir(vnode_t *, caller_context_t *ct); +void vnevent_rename_dest_dir(vnode_t *, vnode_t *, char *, + caller_context_t *ct); void vnevent_mountedover(vnode_t *, caller_context_t *); void vnevent_truncate(vnode_t *, caller_context_t *); int vnevent_support(vnode_t *, caller_context_t *); @@ -1387,6 +1396,7 @@ void vnevent_pre_rename_dest(vnode_t *, vnode_t *, char *, caller_context_t *); void vnevent_pre_rename_dest_dir(vnode_t *, vnode_t *, char *, caller_context_t *); +void vnevent_resize(vnode_t *, caller_context_t *); /* Vnode specific data */ void vsd_create(uint_t *, void (*)(void *)); @@ -1439,6 +1449,16 @@ extern uint_t pvn_vmodsort_supported; * this->vp->v_path == NULL ? "NULL" : stringof(this->vp->v_path), * this->vp->v_count) * }' + * + * There are some situations where we don't want a hold to make the vnode + * 'busy'. For example, watching a directory via port events or inotify + * should not prevent a filesystem from mounting on a watched directory. + * For those instances, a phantom hold is used via VN_PHANTOM_HOLD(). + * + * A phantom hold works identically to regular hold, except that those holds + * are excluded from the return value of vn_count(). + * + * A phantom hold must be released by VN_PHANTOM_RELE(). */ #define VN_HOLD_LOCKED(vp) { \ ASSERT(mutex_owned(&(vp)->v_lock)); \ @@ -1467,6 +1487,22 @@ extern uint_t pvn_vmodsort_supported; DTRACE_PROBE1(vn__rele, vnode_t *, vp); \ } +#define VN_PHANTOM_HOLD_LOCKED(vp) { \ + VN_HOLD_LOCKED(vp); \ + (vp)->v_phantom_count++; \ + DTRACE_PROBE1(vn__phantom_hold, vnode_t *, vp); \ +} + +#define VN_PHANTOM_HOLD(vp) { \ + mutex_enter(&(vp)->v_lock); \ + VN_PHANTOM_HOLD_LOCKED(vp); \ + mutex_exit(&(vp)->v_lock); \ +} + +#define VN_PHANTOM_RELE(vp) { \ + vn_phantom_rele(vp); \ +} + #define VN_SET_VFS_TYPE_DEV(vp, vfsp, type, dev) { \ (vp)->v_vfsp = (vfsp); \ (vp)->v_type = (type); \ @@ -1477,7 +1513,7 @@ extern uint_t pvn_vmodsort_supported; * Compare two vnodes for equality. In general this macro should be used * in preference to calling VOP_CMP directly. */ -#define VN_CMP(VP1, VP2) ((VP1) == (VP2) ? 1 : \ +#define VN_CMP(VP1, VP2) ((VP1) == (VP2) ? 1 : \ ((VP1) && (VP2) && (vn_getops(VP1) == vn_getops(VP2)) ? \ VOP_CMP(VP1, VP2, NULL) : 0)) diff --git a/usr/src/uts/common/sys/zfd.h b/usr/src/uts/common/sys/zfd.h new file mode 100644 index 0000000000..e08d75ecba --- /dev/null +++ b/usr/src/uts/common/sys/zfd.h @@ -0,0 +1,78 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ +/* + * Copyright 2015 Joyent, Inc. + */ + +#ifndef _SYS_ZFD_H +#define _SYS_ZFD_H + +#include <sys/types.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Minor node name of the global zone side (often called the "master" side) + * of the zfd dev. + */ +#define ZFD_MASTER_NAME "master" + +/* + * Minor node name of the non-global zone side (often called the "slave" + * side) of the zfd dev. + */ +#define ZFD_SLAVE_NAME "slave" + +#define ZFD_NAME_LEN 16 + +/* + * ZFD_IOC forms the base for all zfd ioctls. + */ +#define ZFD_IOC (('Z' << 24) | ('f' << 16) | ('d' << 8)) + +/* + * This ioctl tells the slave side it should push the TTY stream modules + * so that the fd looks like a tty. + */ +#define ZFD_MAKETTY (ZFD_IOC | 0) + +/* + * This ioctl puts a hangup into the stream so that the slave side sees EOF. + */ +#define ZFD_EOF (ZFD_IOC | 1) + +/* + * This ioctl succeeds if the slave side is open. + */ +#define ZFD_HAS_SLAVE (ZFD_IOC | 2) + +/* + * This ioctl links two streams into a multiplexer configuration for in-zone + * logging. + */ +#define ZFD_MUX (ZFD_IOC | 3) + +/* + * This ioctl controls the flow control setting for the log multiplexer stream + * (1 = true, 0 = false). The default is false which implies teeing into the + * log stream is "best-effort" but data will be discarded if the stream + * becomes full. If set and the log stream begins to fill up, the primary + * stream will stop flowing. + */ +#define ZFD_MUX_FLOWCON (ZFD_IOC | 4) + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_ZFD_H */ diff --git a/usr/src/uts/common/sys/zone.h b/usr/src/uts/common/sys/zone.h index 26b74ca34a..afef75013f 100644 --- a/usr/src/uts/common/sys/zone.h +++ b/usr/src/uts/common/sys/zone.h @@ -50,8 +50,10 @@ #include <sys/socket_impl.h> #include <sys/secflags.h> #include <sys/cpu_uarray.h> +#include <sys/nvpair.h> #include <sys/list.h> #include <sys/loadavg.h> +#include <sys/vnode.h> #endif /* _KERNEL */ #ifdef __cplusplus @@ -62,15 +64,27 @@ extern "C" { * NOTE * * The contents of this file are private to the implementation of - * Solaris and are subject to change at any time without notice. + * illumos and are subject to change at any time without notice. * Applications and drivers using these interfaces may fail to * run on future releases. */ /* Available both in kernel and for user space */ -/* zone id restrictions and special ids */ -#define MAX_ZONEID 9999 +/* + * zone id restrictions and special ids. + * See 'maxzones' for run-time zone limit. + * + * The current 8k value for MAX_ZONES was originally derived from the virtual + * interface limit in IP when "shared-stack" was the only supported networking + * for zones. The virtual interface limit is the number of addresses allowed + * on an interface (see MAX_ADDRS_PER_IF). Even with exclusive stacks, an 8k + * zone limit is still a reasonable choice at this time, given other limits + * within the kernel. Since we only support 8192 zones (which includes GZ), + * there is no point in allowing MAX_ZONEID > 8k. + */ +#define MAX_ZONES 8192 +#define MAX_ZONEID (MAX_ZONES - 1) #define MIN_USERZONEID 1 /* lowest user-creatable zone ID */ #define MIN_ZONEID 0 /* minimum zone ID on system */ #define GLOBAL_ZONEID 0 @@ -97,7 +111,13 @@ extern "C" { #define ZONE_CHECK_DATALINK 12 #define ZONE_LIST_DATALINK 13 -/* zone attributes */ +/* + * zone attributes + * + * Note that values up to ZONE_ATTR_HOSTID are baked into things like Solaris + * 10 which can be run under the s10 brand; don't renumber or change them. Ones + * which are no longer used are commented out. + */ #define ZONE_ATTR_ROOT 1 #define ZONE_ATTR_NAME 2 #define ZONE_ATTR_STATUS 3 @@ -109,17 +129,24 @@ extern "C" { #define ZONE_ATTR_INITNAME 9 #define ZONE_ATTR_BOOTARGS 10 #define ZONE_ATTR_BRAND 11 -#define ZONE_ATTR_PHYS_MCAP 12 +/* #define ZONE_ATTR_PHYS_MCAP 12 */ #define ZONE_ATTR_SCHED_CLASS 13 #define ZONE_ATTR_FLAGS 14 #define ZONE_ATTR_HOSTID 15 #define ZONE_ATTR_FS_ALLOWED 16 #define ZONE_ATTR_NETWORK 17 + +/* illumos extensions */ #define ZONE_ATTR_INITNORESTART 20 #define ZONE_ATTR_SECFLAGS 21 #define ZONE_ATTR_INITRESTART0 22 #define ZONE_ATTR_INITREBOOT 23 +/* OmniOS/SmartOS extensions */ +#define ZONE_ATTR_DID 30 +#define ZONE_ATTR_APP_SVC_CT 31 +#define ZONE_ATTR_SCHED_FIXEDHI 32 + /* Start of the brand-specific attribute namespace */ #define ZONE_ATTR_BRAND_ATTRS 32768 @@ -134,13 +161,18 @@ extern "C" { #define ZONE_EVENT_READY "ready" #define ZONE_EVENT_RUNNING "running" #define ZONE_EVENT_SHUTTING_DOWN "shutting_down" +#define ZONE_EVENT_FREE "free" #define ZONE_CB_NAME "zonename" #define ZONE_CB_NEWSTATE "newstate" #define ZONE_CB_OLDSTATE "oldstate" +#define ZONE_CB_RESTARTS "restarts" #define ZONE_CB_TIMESTAMP "when" #define ZONE_CB_ZONEID "zoneid" +#define ZONE_EVENT_INIT_CLASS "init" +#define ZONE_EVENT_INIT_RESTART_SC "restart" + /* * Exit values that may be returned by scripts or programs invoked by various * zone commands. @@ -199,6 +231,7 @@ typedef struct { uint32_t doi; /* DOI for label */ caddr32_t label; /* label associated with zone */ int flags; + zoneid_t zoneid; /* requested zoneid */ } zone_def32; #endif typedef struct { @@ -215,6 +248,7 @@ typedef struct { uint32_t doi; /* DOI for label */ const bslabel_t *label; /* label associated with zone */ int flags; + zoneid_t zoneid; /* requested zoneid */ } zone_def; /* extended error information */ @@ -239,7 +273,8 @@ typedef enum { ZONE_IS_EMPTY, ZONE_IS_DOWN, ZONE_IS_DYING, - ZONE_IS_DEAD + ZONE_IS_DEAD, + ZONE_IS_FREE /* transient state for zone sysevent */ } zone_status_t; #define ZONE_MIN_STATE ZONE_IS_UNINITIALIZED #define ZONE_MAX_STATE ZONE_IS_DEAD @@ -259,9 +294,12 @@ typedef enum zone_cmd { typedef struct zone_cmd_arg { uint64_t uniqid; /* unique "generation number" */ zone_cmd_t cmd; /* requested action */ - uint32_t _pad; /* need consistent 32/64 bit alignmt */ + int status; /* init status on shutdown */ + uint32_t debug; /* enable brand hook debug */ char locale[MAXPATHLEN]; /* locale in which to render messages */ char bootbuf[BOOTARGS_MAX]; /* arguments passed to zone_boot() */ + /* Needed for 32/64 zoneadm -> zoneadmd door arg size check. */ + int pad; } zone_cmd_arg_t; /* @@ -389,7 +427,7 @@ typedef struct zone_dataset { } zone_dataset_t; /* - * structure for zone kstats + * structure for rctl zone kstats */ typedef struct zone_kstat { kstat_named_t zk_zonename; @@ -400,12 +438,57 @@ typedef struct zone_kstat { struct cpucap; typedef struct { + hrtime_t cycle_start; + uint_t cycle_cnt; + hrtime_t zone_avg_cnt; +} sys_zio_cntr_t; + +typedef struct { + kstat_named_t zv_zonename; + kstat_named_t zv_nread; + kstat_named_t zv_reads; + kstat_named_t zv_rtime; + kstat_named_t zv_rlentime; + kstat_named_t zv_rcnt; + kstat_named_t zv_nwritten; + kstat_named_t zv_writes; + kstat_named_t zv_wtime; + kstat_named_t zv_wlentime; + kstat_named_t zv_wcnt; + kstat_named_t zv_10ms_ops; + kstat_named_t zv_100ms_ops; + kstat_named_t zv_1s_ops; + kstat_named_t zv_10s_ops; + kstat_named_t zv_delay_cnt; + kstat_named_t zv_delay_time; +} zone_vfs_kstat_t; + +typedef struct { + kstat_named_t zz_zonename; + kstat_named_t zz_nread; + kstat_named_t zz_reads; + kstat_named_t zz_rtime; + kstat_named_t zz_rlentime; + kstat_named_t zz_nwritten; + kstat_named_t zz_writes; + kstat_named_t zz_waittime; +} zone_zfs_kstat_t; + +typedef struct { kstat_named_t zm_zonename; + kstat_named_t zm_rss; + kstat_named_t zm_phys_cap; + kstat_named_t zm_swap; + kstat_named_t zm_swap_cap; + kstat_named_t zm_nover; + kstat_named_t zm_pagedout; kstat_named_t zm_pgpgin; kstat_named_t zm_anonpgin; kstat_named_t zm_execpgin; kstat_named_t zm_fspgin; kstat_named_t zm_anon_alloc_fail; + kstat_named_t zm_pf_throttle; + kstat_named_t zm_pf_throttle_usec; } zone_mcap_kstat_t; typedef struct { @@ -420,8 +503,10 @@ typedef struct { kstat_named_t zm_ffnoproc; kstat_named_t zm_ffnomem; kstat_named_t zm_ffmisc; + kstat_named_t zm_mfseglim; kstat_named_t zm_nested_intp; kstat_named_t zm_init_pid; + kstat_named_t zm_init_restarts; kstat_named_t zm_boot_time; } zone_misc_kstat_t; @@ -464,6 +549,7 @@ typedef struct zone { */ list_node_t zone_linkage; zoneid_t zone_id; /* ID of zone */ + zoneid_t zone_did; /* persistent debug ID of zone */ uint_t zone_ref; /* count of zone_hold()s on zone */ uint_t zone_cred_ref; /* count of zone_hold_cred()s on zone */ /* @@ -516,10 +602,11 @@ typedef struct zone { kcondvar_t zone_cv; /* used to signal state changes */ struct proc *zone_zsched; /* Dummy kernel "zsched" process */ pid_t zone_proc_initpid; /* pid of "init" for this zone */ - char *zone_initname; /* fs path to 'init' */ + uint_t zone_proc_init_restarts; /* times init restarted */ + char *zone_initname; /* fs path to 'init' */ + int zone_init_status; /* init's exit status */ int zone_boot_err; /* for zone_boot() if boot fails */ char *zone_bootargs; /* arguments passed via zone_boot() */ - uint64_t zone_phys_mcap; /* physical memory cap */ /* * zone_kthreads is protected by zone_status_lock. */ @@ -559,9 +646,11 @@ typedef struct zone { boolean_t zone_restart_init; /* Restart init if it dies? */ boolean_t zone_reboot_on_init_exit; /* Reboot if init dies? */ boolean_t zone_restart_init_0; /* Restart only if it exits 0 */ + boolean_t zone_setup_app_contract; /* setup contract? */ struct brand *zone_brand; /* zone's brand */ void *zone_brand_data; /* store brand specific data */ id_t zone_defaultcid; /* dflt scheduling class id */ + boolean_t zone_fixed_hipri; /* fixed sched. hi prio */ kstat_t *zone_swapresv_kstat; kstat_t *zone_lockedmem_kstat; /* @@ -570,8 +659,24 @@ typedef struct zone { list_t zone_dl_list; netstack_t *zone_netstack; struct cpucap *zone_cpucap; /* CPU caps data */ + /* - * Solaris Auditing per-zone audit context + * kstats and counters for VFS ops and bytes. + */ + kmutex_t zone_vfs_lock; /* protects VFS statistics */ + kstat_t *zone_vfs_ksp; + kstat_io_t zone_vfs_rwstats; + zone_vfs_kstat_t *zone_vfs_stats; + + /* + * kstats for ZFS I/O ops and bytes. + */ + kmutex_t zone_zfs_lock; /* protects ZFS statistics */ + kstat_t *zone_zfs_ksp; + zone_zfs_kstat_t *zone_zfs_stats; + + /* + * illumos Auditing per-zone audit context */ struct au_kcontext *zone_audit_kctxt; /* @@ -588,7 +693,11 @@ typedef struct zone { /* zone_rctls->rcs_lock */ kstat_t *zone_nprocs_kstat; - kmutex_t zone_mcap_lock; /* protects mcap statistics */ + /* + * kstats and counters for physical memory capping. + */ + kstat_t *zone_physmem_kstat; + kmutex_t zone_mcap_lock; /* protects mcap statistics */ kstat_t *zone_mcap_ksp; zone_mcap_kstat_t *zone_mcap_stats; uint64_t zone_pgpgin; /* pages paged in */ @@ -613,6 +722,8 @@ typedef struct zone { uint32_t zone_ffnomem; /* as_dup/memory error */ uint32_t zone_ffmisc; /* misc. other error */ + uint32_t zone_mfseglim; /* map failure (# segs limit) */ + uint32_t zone_nested_intp; /* nested interp. kstat */ struct loadavg_s zone_loadavg; /* loadavg for this zone */ @@ -640,6 +751,53 @@ typedef struct zone { } zone_t; /* + * Data and counters used for ZFS fair-share disk IO. + */ +typedef struct zone_zfs_io { + uint16_t zpers_zfs_io_pri; /* ZFS IO priority - 16k max */ + uint_t zpers_zfs_queued[2]; /* sync I/O enqueued count */ + sys_zio_cntr_t zpers_rd_ops; /* Counters for ZFS reads, */ + sys_zio_cntr_t zpers_wr_ops; /* writes, and */ + sys_zio_cntr_t zpers_lwr_ops; /* logical writes. */ + kstat_io_t zpers_zfs_rwstats; + uint64_t zpers_io_util; /* IO utilization metric */ + uint64_t zpers_zfs_rd_waittime; + uint8_t zpers_io_delay; /* IO delay on logical r/w */ + uint8_t zpers_zfs_weight; /* used to prevent starvation */ + uint8_t zpers_io_util_above_avg; /* IO util percent > avg. */ +} zone_zfs_io_t; + +/* + * "Persistent" zone data which can be accessed idependently of the zone_t. + */ +typedef struct zone_persist { + kmutex_t zpers_zfs_lock; /* Protects zpers_zfsp references */ + zone_zfs_io_t *zpers_zfsp; /* ZFS fair-share IO data */ + uint8_t zpers_over; /* currently over cap */ + uint32_t zpers_pg_cnt; /* current RSS in pages */ + uint32_t zpers_pg_limit; /* current RRS limit in pages */ + uint32_t zpers_nover; /* # of times over phys. cap */ +#ifndef DEBUG + uint64_t zpers_pg_out; /* # pages flushed */ +#else + /* + * To conserve memory, some detailed kstats are only kept for DEBUG + * builds. + */ + uint64_t zpers_zfs_rd_waittime; + + uint64_t zpers_pg_anon; /* # clean anon pages flushed */ + uint64_t zpers_pg_anondirty; /* # dirty anon pages flushed */ + uint64_t zpers_pg_fs; /* # clean fs pages flushed */ + uint64_t zpers_pg_fsdirty; /* # dirty fs pages flushed */ +#endif +} zone_persist_t; + +typedef enum zone_pageout_op { + ZPO_DIRTY, ZPO_FS, ZPO_ANON, ZPO_ANONDIRTY +} zone_pageout_op_t; + +/* * Special value of zone_psetid to indicate that pools are disabled. */ #define ZONE_PS_INVAL PS_MYID @@ -668,6 +826,7 @@ extern zone_t *zone_find_by_name(char *); extern zone_t *zone_find_by_any_path(const char *, boolean_t); extern zone_t *zone_find_by_path(const char *); extern zoneid_t getzoneid(void); +extern zoneid_t getzonedid(void); extern zone_t *zone_find_by_id_nolock(zoneid_t); extern int zone_datalink_walk(zoneid_t, int (*)(datalink_id_t, void *), void *); extern int zone_check_datalink(zoneid_t *, datalink_id_t); @@ -802,7 +961,7 @@ struct zsd_entry { * NOTE: Using the VN_ prefix, even though it's defined here in zone.h. * NOTE2: See above warning about ZONE_ROOTVP(). */ -#define VN_IS_CURZONEROOT(vp) (VN_CMP(vp, ZONE_ROOTVP())) +#define VN_IS_CURZONEROOT(vp) (VN_CMP(vp, ZONE_ROOTVP())) /* * Zone-safe version of thread_create() to be used when the caller wants to @@ -868,6 +1027,7 @@ extern int zone_ncpus_online_get(zone_t *); * Returns true if the named pool/dataset is visible in the current zone. */ extern int zone_dataset_visible(const char *, int *); +extern int zone_dataset_visible_inzone(zone_t *, const char *, int *); /* * zone version of kadmin() @@ -880,10 +1040,25 @@ extern void mount_completed(zone_t *); extern int zone_walk(int (*)(zone_t *, void *), void *); +struct page; +extern void zone_add_page(struct page *); +extern void zone_rm_page(struct page *); +extern void zone_pageout_stat(int, zone_pageout_op_t); +extern void zone_get_physmem_data(int, pgcnt_t *, pgcnt_t *); + +/* Interfaces for page scanning */ +extern uint_t zone_num_over_cap; +extern zone_persist_t zone_pdata[MAX_ZONES]; + extern rctl_hndl_t rc_zone_locked_mem; extern rctl_hndl_t rc_zone_max_swap; +extern rctl_hndl_t rc_zone_phys_mem; extern rctl_hndl_t rc_zone_max_lofi; +/* For publishing sysevents related to a particular zone */ +extern void zone_sysevent_publish(zone_t *, const char *, const char *, + nvlist_t *); + #endif /* _KERNEL */ #ifdef __cplusplus |