summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/sys
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/sys')
-rw-r--r--usr/src/uts/common/sys/Makefile12
-rw-r--r--usr/src/uts/common/sys/acct.h3
-rw-r--r--usr/src/uts/common/sys/auxv.h30
-rw-r--r--usr/src/uts/common/sys/brand.h102
-rw-r--r--usr/src/uts/common/sys/buf.h8
-rw-r--r--usr/src/uts/common/sys/contract/process.h6
-rw-r--r--usr/src/uts/common/sys/cpucaps.h5
-rw-r--r--usr/src/uts/common/sys/cpucaps_impl.h6
-rw-r--r--usr/src/uts/common/sys/cpuvar.h3
-rw-r--r--usr/src/uts/common/sys/cred.h1
-rw-r--r--usr/src/uts/common/sys/dktp/dadk.h2
-rw-r--r--usr/src/uts/common/sys/dld.h12
-rw-r--r--usr/src/uts/common/sys/dld_impl.h5
-rw-r--r--usr/src/uts/common/sys/dlpi.h11
-rw-r--r--usr/src/uts/common/sys/dls.h10
-rw-r--r--usr/src/uts/common/sys/dls_impl.h6
-rw-r--r--usr/src/uts/common/sys/dls_mgmt.h9
-rw-r--r--usr/src/uts/common/sys/dumpadm.h3
-rw-r--r--usr/src/uts/common/sys/dumphdr.h29
-rw-r--r--usr/src/uts/common/sys/elf.h44
-rw-r--r--usr/src/uts/common/sys/eventfd.h10
-rw-r--r--usr/src/uts/common/sys/exec.h20
-rw-r--r--usr/src/uts/common/sys/file.h11
-rw-r--r--usr/src/uts/common/sys/frameio.h107
-rw-r--r--usr/src/uts/common/sys/fs/fifonode.h16
-rw-r--r--usr/src/uts/common/sys/fs/hyprlofs.h91
-rw-r--r--usr/src/uts/common/sys/fs/hyprlofs_info.h174
-rw-r--r--usr/src/uts/common/sys/fs/sdev_impl.h1
-rw-r--r--usr/src/uts/common/sys/fs/tmp.h54
-rw-r--r--usr/src/uts/common/sys/fss.h1
-rw-r--r--usr/src/uts/common/sys/fx.h10
-rw-r--r--usr/src/uts/common/sys/gsqueue.h59
-rw-r--r--usr/src/uts/common/sys/ia.h1
-rw-r--r--usr/src/uts/common/sys/inotify.h153
-rw-r--r--usr/src/uts/common/sys/ipc_impl.h2
-rw-r--r--usr/src/uts/common/sys/ipd.h4
-rw-r--r--usr/src/uts/common/sys/iso/signal_iso.h3
-rw-r--r--usr/src/uts/common/sys/klwp.h11
-rw-r--r--usr/src/uts/common/sys/mac_client.h1
-rw-r--r--usr/src/uts/common/sys/mac_client_impl.h1
-rw-r--r--usr/src/uts/common/sys/mac_client_priv.h3
-rw-r--r--usr/src/uts/common/sys/mac_flow.h22
-rw-r--r--usr/src/uts/common/sys/mman.h1
-rw-r--r--usr/src/uts/common/sys/mntent.h2
-rw-r--r--usr/src/uts/common/sys/netconfig.h1
-rw-r--r--usr/src/uts/common/sys/neti.h2
-rw-r--r--usr/src/uts/common/sys/netstack.h3
-rw-r--r--usr/src/uts/common/sys/param.h2
-rw-r--r--usr/src/uts/common/sys/policy.h2
-rw-r--r--usr/src/uts/common/sys/poll_impl.h1
-rw-r--r--usr/src/uts/common/sys/proc.h10
-rw-r--r--usr/src/uts/common/sys/procfs.h7
-rw-r--r--usr/src/uts/common/sys/ptms.h19
-rw-r--r--usr/src/uts/common/sys/refhash.h8
-rw-r--r--usr/src/uts/common/sys/resource.h1
-rw-r--r--usr/src/uts/common/sys/rt.h11
-rw-r--r--usr/src/uts/common/sys/scsi/adapters/mpt_sas/mpi/mpi2_pci.h147
-rw-r--r--usr/src/uts/common/sys/scsi/generic/inquiry.h7
-rw-r--r--usr/src/uts/common/sys/scsi/targets/sddef.h10
-rw-r--r--usr/src/uts/common/sys/shm.h5
-rw-r--r--usr/src/uts/common/sys/shm_impl.h17
-rw-r--r--usr/src/uts/common/sys/signal.h5
-rw-r--r--usr/src/uts/common/sys/socket.h8
-rw-r--r--usr/src/uts/common/sys/socketvar.h27
-rw-r--r--usr/src/uts/common/sys/sockfilter.h10
-rw-r--r--usr/src/uts/common/sys/squeue.h14
-rw-r--r--usr/src/uts/common/sys/squeue_impl.h2
-rw-r--r--usr/src/uts/common/sys/stream.h1
-rw-r--r--usr/src/uts/common/sys/sunddi.h6
-rw-r--r--usr/src/uts/common/sys/systrace.h13
-rw-r--r--usr/src/uts/common/sys/termios.h18
-rw-r--r--usr/src/uts/common/sys/thread.h9
-rw-r--r--usr/src/uts/common/sys/time.h9
-rw-r--r--usr/src/uts/common/sys/timer.h26
-rw-r--r--usr/src/uts/common/sys/ts.h2
-rw-r--r--usr/src/uts/common/sys/uadmin.h3
-rw-r--r--usr/src/uts/common/sys/uio.h12
-rw-r--r--usr/src/uts/common/sys/user.h10
-rw-r--r--usr/src/uts/common/sys/vm.h3
-rw-r--r--usr/src/uts/common/sys/vm_usage.h7
-rw-r--r--usr/src/uts/common/sys/vmsystm.h8
-rw-r--r--usr/src/uts/common/sys/vnd.h141
-rw-r--r--usr/src/uts/common/sys/vnd_errno.h72
-rw-r--r--usr/src/uts/common/sys/vnode.h56
-rw-r--r--usr/src/uts/common/sys/zfd.h78
-rw-r--r--usr/src/uts/common/sys/zone.h201
86 files changed, 1913 insertions, 146 deletions
diff --git a/usr/src/uts/common/sys/Makefile b/usr/src/uts/common/sys/Makefile
index 70c8342377..94922f459b 100644
--- a/usr/src/uts/common/sys/Makefile
+++ b/usr/src/uts/common/sys/Makefile
@@ -259,6 +259,7 @@ CHKHDRS= \
flock.h \
flock_impl.h \
fork.h \
+ frameio.h \
fss.h \
fsspriocntl.h \
fsid.h \
@@ -284,6 +285,7 @@ CHKHDRS= \
idmap.h \
ieeefp.h \
id_space.h \
+ inotify.h \
instance.h \
int_const.h \
int_fmtio.h \
@@ -496,6 +498,7 @@ CHKHDRS= \
rctl_impl.h \
rds.h \
reboot.h \
+ refhash.h \
refstr.h \
refstr_impl.h \
resource.h \
@@ -661,6 +664,8 @@ CHKHDRS= \
vmem.h \
vmem_impl.h \
vmsystm.h \
+ vnd.h \
+ vnd_errno.h \
vnic.h \
vnic_impl.h \
vnode.h \
@@ -678,6 +683,7 @@ CHKHDRS= \
watchpoint.h \
winlockio.h \
zcons.h \
+ zfd.h \
zone.h \
xti_inet.h \
xti_osi.h \
@@ -839,14 +845,14 @@ FSHDRS= \
autofs.h \
decomp.h \
dv_node.h \
- sdev_impl.h \
- sdev_plugin.h \
fifonode.h \
hsfs_isospec.h \
hsfs_node.h \
hsfs_rrip.h \
hsfs_spec.h \
hsfs_susp.h \
+ hyprlofs.h \
+ hyprlofs_info.h \
lofs_info.h \
lofs_node.h \
mntdata.h \
@@ -856,6 +862,8 @@ FSHDRS= \
pc_label.h \
pc_node.h \
pxfs_ki.h \
+ sdev_impl.h \
+ sdev_plugin.h \
snode.h \
swapnode.h \
tmp.h \
diff --git a/usr/src/uts/common/sys/acct.h b/usr/src/uts/common/sys/acct.h
index f00884681b..e01ad61025 100644
--- a/usr/src/uts/common/sys/acct.h
+++ b/usr/src/uts/common/sys/acct.h
@@ -22,6 +22,7 @@
/*
* Copyright 2014 Garrett D'Amore <garrett@damore.org>
* Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2017 Joyent, Inc.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -88,7 +89,7 @@ extern int acct(const char *);
#if defined(_KERNEL)
-void acct(char);
+void acct(int);
int sysacct(char *);
struct vnode;
diff --git a/usr/src/uts/common/sys/auxv.h b/usr/src/uts/common/sys/auxv.h
index e591320025..203b884cd3 100644
--- a/usr/src/uts/common/sys/auxv.h
+++ b/usr/src/uts/common/sys/auxv.h
@@ -79,6 +79,9 @@ typedef struct {
#define AT_FLAGS 8 /* processor flags */
#define AT_ENTRY 9 /* a.out entry point */
+/* First introduced on Linux */
+#define AT_RANDOM 25 /* address of 16 random bytes */
+
/*
* These relate to the original PPC ABI document; Linux reused
* the values for other things (see below), so disambiguation of
@@ -91,19 +94,18 @@ typedef struct {
* These are the values from LSB 1.3, the first five are also described
* in the draft amd64 ABI.
*
- * At the time of writing, Solaris doesn't place any of these values into
- * the aux vector, except AT_CLKTCK which is placed on the aux vector for
- * lx branded processes; also, we do similar things via AT_SUN_ values.
+ * At the time of writing, illumos doesn't place any of these values into the
+ * aux vector, except where noted. We do similar things via AT_SUN_ values.
*
* AT_NOTELF 10 program is not ELF?
- * AT_UID 11 real user id
- * AT_EUID 12 effective user id
- * AT_GID 13 real group id
- * AT_EGID 14 effective group id
+ * AT_UID 11 real user id (provided in LX)
+ * AT_EUID 12 effective user id (provided in LX)
+ * AT_GID 13 real group id (provided in LX)
+ * AT_EGID 14 effective group id (provided in LX)
*
* AT_PLATFORM 15
* AT_HWCAP 16
- * AT_CLKTCK 17 c.f. _SC_CLK_TCK
+ * AT_CLKTCK 17 c.f. _SC_CLK_TCK (provided in LX)
* AT_FPUCW 18
*
* AT_DCACHEBSIZE 19 (moved from 10)
@@ -111,6 +113,16 @@ typedef struct {
* AT_UCACHEBSIZE 21 (moved from 12)
*
* AT_IGNOREPPC 22
+ *
+ * On Linux:
+ * AT_* values 18 through 22 are reserved
+ * AT_SECURE 23 secure mode boolean (provided in LX)
+ * AT_BASE_PLATFORM 24 string identifying real platform, may
+ * differ from AT_PLATFORM.
+ * AT_HWCAP2 26 extension of AT_HWCAP
+ * AT_EXECFN 31 filename of program
+ * AT_SYSINFO 32
+ * AT_SYSINFO_EHDR 33 The vDSO location
*/
/*
@@ -190,6 +202,8 @@ extern uint_t getisax(uint32_t *, uint_t);
#define AT_SUN_BRAND_AUX1 2020
#define AT_SUN_BRAND_AUX2 2021
#define AT_SUN_BRAND_AUX3 2022
+#define AT_SUN_BRAND_AUX4 2025
+#define AT_SUN_BRAND_NROOT 2024
/*
* Aux vector for comm page
diff --git a/usr/src/uts/common/sys/brand.h b/usr/src/uts/common/sys/brand.h
index a2feda573d..bace1142f9 100644
--- a/usr/src/uts/common/sys/brand.h
+++ b/usr/src/uts/common/sys/brand.h
@@ -103,28 +103,106 @@ struct brand_mach_ops;
struct intpdata;
struct execa;
+/*
+ * Common structure to define hooks for brand operation.
+ *
+ * Required Fields:
+ * b_init_brand_data - Setup zone brand data during zone_setbrand
+ * b_free_brand_data - Free zone brand data during zone_destroy
+ * b_brandsys - Syscall handler for brandsys
+ * b_setbrand - Initialize process brand data
+ * b_getattr - Get brand-custom zone attribute
+ * b_setattr - Set brand-custom zone attribute
+ * b_copy_procdata - Copy process brand data during fork
+ * b_proc_exit - Perform process brand exit processing
+ * b_exec - Reset branded process state on exec
+ * b_lwp_setrval - Set return code for forked child
+ * b_initlwp - Initialize lwp brand data (cannot drop p->p_lock)
+ * b_forklwp - Copy lwp brand data during fork
+ * b_freelwp - Free lwp brand data
+ * b_lwpexit - Perform lwp-specific brand exit processing
+ * b_elfexec - Load and execute ELF binary
+ * b_sigset_native_to_brand - Convert sigset native->brand
+ * b_sigset_brand_to_native - Convert sigset brand->native
+ * b_nsig - Maxiumum signal number
+ * b_sendsig - Update process state after sendsig
+ *
+ * Optional Fields:
+ * b_lwpdata_alloc - Speculatively allocate data for use in b_initlwp
+ * b_lwpdata_free - Free data from allocated by b_lwpdata_alloc if errors occur
+ * during lwp creation before b_initlwp could be called.
+ * b_initlwp_post - Complete lwp branding (can temporarily drop p->p_lock)
+ * b_exit_with_sig - Instead of sending SIGCLD, exit with custom behavior
+ * b_psig_to_proc - Custom additional behavior during psig
+ * b_wait_filter - Filter processes from being matched by waitid
+ * b_native_exec - Provide interpreter path prefix for executables
+ * b_ptrace_exectrap - Custom behavior for legacy ptrace traps
+ * b_map32limit - Specify alternate limit for MAP_32BIT mappings
+ * b_stop_notify - Hook process stop events
+ * b_waitid_helper - Generate synthetic results for waitid
+ * b_sigcld_repost - Post synthetic SIGCLD signals
+ * b_issig_stop - Alter/suppress signal delivery during issig
+ * b_sig_ignorable - Disallow discarding of signals
+ * b_savecontext - Alter context during savecontext
+ * b_restorecontext - Alter context during restorecontext
+ * b_sendsig_stack - Override stack used for signal delivery
+ * b_setid_clear - Override setid_clear behavior
+ * b_pagefault - Trap pagefault events
+ * b_intp_parse_arg - Controls interpreter argument handling (allow 1 or all)
+ * b_clearbrand - Perform any actions necessary when clearing the brand.
+ * b_rpc_statd - Upcall to rpc.statd running within the zone
+ * b_acct_out - Output properly formatted accounting record
+ */
struct brand_ops {
- void (*b_init_brand_data)(zone_t *);
+ void (*b_init_brand_data)(zone_t *, kmutex_t *);
void (*b_free_brand_data)(zone_t *);
int (*b_brandsys)(int, int64_t *, uintptr_t, uintptr_t, uintptr_t,
- uintptr_t, uintptr_t, uintptr_t);
+ uintptr_t);
void (*b_setbrand)(struct proc *);
int (*b_getattr)(zone_t *, int, void *, size_t *);
int (*b_setattr)(zone_t *, int, void *, size_t);
void (*b_copy_procdata)(struct proc *, struct proc *);
- void (*b_proc_exit)(struct proc *, klwp_t *);
+ void (*b_proc_exit)(struct proc *);
void (*b_exec)();
void (*b_lwp_setrval)(klwp_t *, int, int);
- int (*b_initlwp)(klwp_t *);
+ void *(*b_lwpdata_alloc)(struct proc *);
+ void (*b_lwpdata_free)(void *);
+ void (*b_initlwp)(klwp_t *, void *);
+ void (*b_initlwp_post)(klwp_t *);
void (*b_forklwp)(klwp_t *, klwp_t *);
void (*b_freelwp)(klwp_t *);
void (*b_lwpexit)(klwp_t *);
int (*b_elfexec)(struct vnode *, struct execa *, struct uarg *,
- struct intpdata *, int, size_t *, int, caddr_t, struct cred *,
- int);
+ struct intpdata *, int, size_t *, int, caddr_t, struct cred *,
+ int *);
void (*b_sigset_native_to_brand)(sigset_t *);
void (*b_sigset_brand_to_native)(sigset_t *);
+ void (*b_sigfd_translate)(k_siginfo_t *);
int b_nsig;
+ void (*b_exit_with_sig)(proc_t *, sigqueue_t *);
+ boolean_t (*b_wait_filter)(proc_t *, proc_t *);
+ boolean_t (*b_native_exec)(uint8_t, const char **);
+ uint32_t (*b_map32limit)(proc_t *);
+ void (*b_stop_notify)(proc_t *, klwp_t *, ushort_t, ushort_t);
+ int (*b_waitid_helper)(idtype_t, id_t, k_siginfo_t *, int,
+ boolean_t *, int *);
+ int (*b_sigcld_repost)(proc_t *, sigqueue_t *);
+ int (*b_issig_stop)(proc_t *, klwp_t *);
+ boolean_t (*b_sig_ignorable)(proc_t *, klwp_t *, int);
+ void (*b_savecontext)(ucontext_t *);
+#if defined(_SYSCALL32_IMPL)
+ void (*b_savecontext32)(ucontext32_t *);
+#endif
+ void (*b_restorecontext)(ucontext_t *);
+ caddr_t (*b_sendsig_stack)(int);
+ void (*b_sendsig)(int);
+ int (*b_setid_clear)(vattr_t *vap, cred_t *cr);
+ int (*b_pagefault)(proc_t *, klwp_t *, caddr_t, enum fault_type,
+ enum seg_rw);
+ boolean_t b_intp_parse_arg;
+ void (*b_clearbrand)(proc_t *, boolean_t);
+ void (*b_rpc_statd)(int, void *, void *);
+ void (*b_acct_out)(struct vnode *, int);
};
/*
@@ -135,6 +213,7 @@ typedef struct brand {
char *b_name;
struct brand_ops *b_ops;
struct brand_mach_ops *b_machops;
+ size_t b_data_size;
} brand_t;
extern brand_t native_brand;
@@ -165,7 +244,7 @@ extern brand_t *brand_register_zone(struct brand_attr *);
extern brand_t *brand_find_name(char *);
extern void brand_unregister_zone(brand_t *);
extern int brand_zone_count(brand_t *);
-extern void brand_setbrand(proc_t *);
+extern int brand_setbrand(proc_t *, boolean_t);
extern void brand_clearbrand(proc_t *, boolean_t);
/*
@@ -178,17 +257,16 @@ extern int brand_solaris_cmd(int, uintptr_t, uintptr_t, uintptr_t,
extern void brand_solaris_copy_procdata(proc_t *, proc_t *,
struct brand *);
extern int brand_solaris_elfexec(vnode_t *, execa_t *, uarg_t *,
- intpdata_t *, int, size_t *, int, caddr_t, cred_t *, int,
- struct brand *, char *, char *, char *, char *, char *);
+ intpdata_t *, int, size_t *, int, caddr_t, cred_t *, int *,
+ struct brand *, char *, char *, char *);
extern void brand_solaris_exec(struct brand *);
extern int brand_solaris_fini(char **, struct modlinkage *,
struct brand *);
extern void brand_solaris_forklwp(klwp_t *, klwp_t *, struct brand *);
extern void brand_solaris_freelwp(klwp_t *, struct brand *);
-extern int brand_solaris_initlwp(klwp_t *, struct brand *);
+extern void brand_solaris_initlwp(klwp_t *, struct brand *);
extern void brand_solaris_lwpexit(klwp_t *, struct brand *);
-extern void brand_solaris_proc_exit(struct proc *, klwp_t *,
- struct brand *);
+extern void brand_solaris_proc_exit(struct proc *, struct brand *);
extern void brand_solaris_setbrand(proc_t *, struct brand *);
#if defined(_SYSCALL32)
diff --git a/usr/src/uts/common/sys/buf.h b/usr/src/uts/common/sys/buf.h
index e20e0e0c35..b6b5c20e44 100644
--- a/usr/src/uts/common/sys/buf.h
+++ b/usr/src/uts/common/sys/buf.h
@@ -21,6 +21,7 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2012 Joyent, Inc. All rights reserved.
*
* Copyright 2017 RackTop Systems.
*/
@@ -188,6 +189,7 @@ struct biostats {
#define B_STARTED 0x2000000 /* io:::start probe called for buf */
#define B_ABRWRITE 0x4000000 /* Application based recovery active */
#define B_PAGE_NOWAIT 0x8000000 /* Skip the page if it is locked */
+#define B_INVALCURONLY 0x10000000 /* invalidate only for curproc */
/*
* There is some confusion over the meaning of B_FREE and B_INVAL and what
@@ -200,6 +202,12 @@ struct biostats {
* between the sole use of these two flags. In both cases, IO will be done
* if the page is not yet committed to storage.
*
+ * The B_INVALCURONLY flag modifies the behavior of the B_INVAL flag and is
+ * intended to be used in conjunction with B_INVAL. B_INVALCURONLY has no
+ * meaning on its own. When both B_INVALCURONLY and B_INVAL are set, then
+ * the mapping for the page is only invalidated for the current process.
+ * In this case, the page is not destroyed unless this was the final mapping.
+ *
* In order to discard pages without writing them back, (B_INVAL | B_TRUNC)
* should be used.
*
diff --git a/usr/src/uts/common/sys/contract/process.h b/usr/src/uts/common/sys/contract/process.h
index 21cf94dcf9..2c70d7c9f1 100644
--- a/usr/src/uts/common/sys/contract/process.h
+++ b/usr/src/uts/common/sys/contract/process.h
@@ -21,13 +21,12 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc.
*/
#ifndef _SYS_CONTRACT_PROCESS_H
#define _SYS_CONTRACT_PROCESS_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/contract.h>
#include <sys/time.h>
@@ -55,7 +54,8 @@ typedef struct cont_process cont_process_t;
#define CT_PR_NOORPHAN 0x2 /* kill when contract is abandoned */
#define CT_PR_PGRPONLY 0x4 /* only kill process group on fatal errors */
#define CT_PR_REGENT 0x8 /* automatically detach inherited contracts */
-#define CT_PR_ALLPARAM 0xf
+#define CT_PR_KEEP_EXEC 0x10 /* preserve template accross exec */
+#define CT_PR_ALLPARAM 0x1f
/*
* ctr_ev_* flags
diff --git a/usr/src/uts/common/sys/cpucaps.h b/usr/src/uts/common/sys/cpucaps.h
index 6063ff4380..6bc042108c 100644
--- a/usr/src/uts/common/sys/cpucaps.h
+++ b/usr/src/uts/common/sys/cpucaps.h
@@ -22,6 +22,7 @@
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2011, 2012, Joyent, Inc. All rights reserved.
*/
#ifndef _SYS_CPUCAPS_H
@@ -84,12 +85,16 @@ extern void cpucaps_zone_remove(zone_t *);
*/
extern int cpucaps_project_set(kproject_t *, rctl_qty_t);
extern int cpucaps_zone_set(zone_t *, rctl_qty_t);
+extern int cpucaps_zone_set_base(zone_t *, rctl_qty_t);
+extern int cpucaps_zone_set_burst_time(zone_t *, rctl_qty_t);
/*
* Get current CPU usage for a project/zone.
*/
extern rctl_qty_t cpucaps_project_get(kproject_t *);
extern rctl_qty_t cpucaps_zone_get(zone_t *);
+extern rctl_qty_t cpucaps_zone_get_base(zone_t *);
+extern rctl_qty_t cpucaps_zone_get_burst_time(zone_t *);
/*
* Scheduling class hooks into CPU caps framework.
diff --git a/usr/src/uts/common/sys/cpucaps_impl.h b/usr/src/uts/common/sys/cpucaps_impl.h
index 95afd21827..2cd4ed644d 100644
--- a/usr/src/uts/common/sys/cpucaps_impl.h
+++ b/usr/src/uts/common/sys/cpucaps_impl.h
@@ -22,6 +22,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2011, 2012, Joyent, Inc. All rights reserved.
*/
#ifndef _SYS_CPUCAPS_IMPL_H
@@ -66,8 +67,12 @@ typedef struct cpucap {
waitq_t cap_waitq; /* waitq for capped threads */
kstat_t *cap_kstat; /* cpucaps specific kstat */
int64_t cap_gen; /* zone cap specific */
+ hrtime_t cap_chk_value; /* effective CPU usage cap */
hrtime_t cap_value; /* scaled CPU usage cap */
hrtime_t cap_usage; /* current CPU usage */
+ hrtime_t cap_base; /* base CPU for burst */
+ u_longlong_t cap_burst_limit; /* max secs (in tics) for a burst */
+ u_longlong_t cap_bursting; /* # of ticks currently bursting */
disp_lock_t cap_usagelock; /* protects cap_usage above */
/*
* Per cap statistics.
@@ -75,6 +80,7 @@ typedef struct cpucap {
hrtime_t cap_maxusage; /* maximum cap usage */
u_longlong_t cap_below; /* # of ticks spend below the cap */
u_longlong_t cap_above; /* # of ticks spend above the cap */
+ u_longlong_t cap_above_base; /* # of ticks spent above the base */
} cpucap_t;
/*
diff --git a/usr/src/uts/common/sys/cpuvar.h b/usr/src/uts/common/sys/cpuvar.h
index 7b153a3e9e..24adbb7418 100644
--- a/usr/src/uts/common/sys/cpuvar.h
+++ b/usr/src/uts/common/sys/cpuvar.h
@@ -390,6 +390,8 @@ extern cpu_core_t cpu_core[];
#define CPU_DISP_DONTSTEAL 0x01 /* CPU undergoing context swtch */
#define CPU_DISP_HALTED 0x02 /* CPU halted waiting for interrupt */
+/* Note: inside ifdef: _KERNEL || _KMEMUSER || _BOOT */
+
/*
* Macros for manipulating sets of CPUs as a bitmap. Note that this
* bitmap may vary in size depending on the maximum CPU id a specific
@@ -512,6 +514,7 @@ extern struct cpu **cpu_seq; /* indexed by sequential CPU id */
extern cpu_t *cpu_list; /* list of CPUs */
extern cpu_t *cpu_active; /* list of active CPUs */
extern cpuset_t cpu_active_set; /* cached set of active CPUs */
+extern cpuset_t cpu_available; /* cached set of available CPUs */
extern int ncpus; /* number of CPUs present */
extern int ncpus_online; /* number of CPUs not quiesced */
extern int ncpus_intr_enabled; /* nr of CPUs taking I/O intrs */
diff --git a/usr/src/uts/common/sys/cred.h b/usr/src/uts/common/sys/cred.h
index fb79dfecde..1f938132e0 100644
--- a/usr/src/uts/common/sys/cred.h
+++ b/usr/src/uts/common/sys/cred.h
@@ -93,6 +93,7 @@ extern gid_t crgetgid(const cred_t *);
extern gid_t crgetrgid(const cred_t *);
extern gid_t crgetsgid(const cred_t *);
extern zoneid_t crgetzoneid(const cred_t *);
+extern zoneid_t crgetzonedid(const cred_t *);
extern projid_t crgetprojid(const cred_t *);
extern cred_t *crgetmapped(const cred_t *);
diff --git a/usr/src/uts/common/sys/dktp/dadk.h b/usr/src/uts/common/sys/dktp/dadk.h
index f5c990e7c0..2178ad1f0d 100644
--- a/usr/src/uts/common/sys/dktp/dadk.h
+++ b/usr/src/uts/common/sys/dktp/dadk.h
@@ -65,6 +65,8 @@ struct dadk {
kstat_t *dad_errstats; /* error stats */
kmutex_t dad_cmd_mutex;
int dad_cmd_count;
+ uint32_t dad_err_cnt; /* number of recent errors */
+ hrtime_t dad_last_log; /* time of last error log */
};
#define DAD_SECSIZ dad_phyg.g_secsiz
diff --git a/usr/src/uts/common/sys/dld.h b/usr/src/uts/common/sys/dld.h
index de7ac46db4..b73d22249a 100644
--- a/usr/src/uts/common/sys/dld.h
+++ b/usr/src/uts/common/sys/dld.h
@@ -192,6 +192,7 @@ typedef struct dld_ioc_rename {
datalink_id_t dir_linkid1;
datalink_id_t dir_linkid2;
char dir_link[MAXLINKNAMELEN];
+ boolean_t dir_zoneinit;
} dld_ioc_rename_t;
/*
@@ -204,6 +205,7 @@ typedef struct dld_ioc_rename {
typedef struct dld_ioc_zid {
zoneid_t diz_zid;
datalink_id_t diz_linkid;
+ boolean_t diz_transient;
} dld_ioc_zid_t;
/*
@@ -356,6 +358,7 @@ typedef struct dld_ioc_led {
#define DLD_CAPAB_POLL 0x00000002
#define DLD_CAPAB_PERIM 0x00000003
#define DLD_CAPAB_LSO 0x00000004
+#define DLD_CAPAB_IPCHECK 0x00000005
#define DLD_ENABLE 0x00000001
#define DLD_DISABLE 0x00000002
@@ -382,6 +385,7 @@ typedef struct dld_ioc_led {
*/
typedef int (*dld_capab_func_t)(void *, uint_t, void *, uint_t);
+#define DI_DIRECT_RAW 0x1
/*
* Direct Tx/Rx capability.
*/
@@ -406,8 +410,16 @@ typedef struct dld_capab_direct_s {
/* flow control "can I put on a ring" callback */
uintptr_t di_tx_fctl_df; /* canput-like callback */
void *di_tx_fctl_dh;
+
+ /* flags that control our behavior */
+ uint_t di_flags;
} dld_capab_direct_t;
+typedef struct dld_capab_ipcheck_s {
+ uintptr_t ipc_allowed_df;
+ void *ipc_allowed_dh;
+} dld_capab_ipcheck_t;
+
/*
* Polling/softring capability.
*/
diff --git a/usr/src/uts/common/sys/dld_impl.h b/usr/src/uts/common/sys/dld_impl.h
index 035eea893a..336fa9cb67 100644
--- a/usr/src/uts/common/sys/dld_impl.h
+++ b/usr/src/uts/common/sys/dld_impl.h
@@ -53,7 +53,8 @@ typedef enum {
typedef enum {
DLD_UNINITIALIZED,
DLD_PASSIVE,
- DLD_ACTIVE
+ DLD_ACTIVE,
+ DLD_EXCLUSIVE
} dld_passivestate_t;
/*
@@ -256,6 +257,8 @@ extern void dld_str_rx_unitdata(void *, mac_resource_handle_t,
extern void dld_str_notify_ind(dld_str_t *);
extern mac_tx_cookie_t str_mdata_fastpath_put(dld_str_t *, mblk_t *,
uintptr_t, uint16_t);
+extern mac_tx_cookie_t str_mdata_raw_fastpath_put(dld_str_t *, mblk_t *,
+ uintptr_t, uint16_t);
extern int dld_flow_ctl_callb(dld_str_t *, uint64_t,
int (*func)(), void *);
diff --git a/usr/src/uts/common/sys/dlpi.h b/usr/src/uts/common/sys/dlpi.h
index e9ac27cddd..e71a55ab84 100644
--- a/usr/src/uts/common/sys/dlpi.h
+++ b/usr/src/uts/common/sys/dlpi.h
@@ -109,6 +109,7 @@ typedef struct dl_ipnetinfo {
#define DL_PASSIVE_REQ 0x114 /* Allow access to aggregated link */
#define DL_INTR_MODE_REQ 0x115 /* Request Rx processing in INTR mode */
#define DL_NOTIFY_CONF 0x116 /* Notification from upstream */
+#define DL_EXCLUSIVE_REQ 0x117 /* Make bind active */
/*
* Primitives used for Connectionless Service
@@ -391,6 +392,7 @@ typedef struct dl_ipnetinfo {
#define DL_PROMISC_SAP 0x02 /* promiscuous mode at sap level */
#define DL_PROMISC_MULTI 0x03 /* promiscuous mode for multicast */
#define DL_PROMISC_RX_ONLY 0x04 /* above only enabled for rx */
+#define DL_PROMISC_FIXUPS 0x05 /* above will be fixed up */
/*
* DLPI notification codes for DL_NOTIFY_REQ primitives.
@@ -1085,6 +1087,13 @@ typedef struct {
} dl_intr_mode_req_t;
/*
+ * DL_EXCLUSIVE_REQ, M_PROTO type
+ */
+typedef struct {
+ t_uscalar_t dl_primitive;
+} dl_exclusive_req_t;
+
+/*
* CONNECTION-ORIENTED SERVICE PRIMITIVES
*/
@@ -1506,6 +1515,7 @@ union DL_primitives {
dl_control_ack_t control_ack;
dl_passive_req_t passive_req;
dl_intr_mode_req_t intr_mode_req;
+ dl_exclusive_req_t exclusive_req;
};
#define DL_INFO_REQ_SIZE sizeof (dl_info_req_t)
@@ -1574,6 +1584,7 @@ union DL_primitives {
#define DL_CONTROL_ACK_SIZE sizeof (dl_control_ack_t)
#define DL_PASSIVE_REQ_SIZE sizeof (dl_passive_req_t)
#define DL_INTR_MODE_REQ_SIZE sizeof (dl_intr_mode_req_t)
+#define DL_EXCLUSIVE_REQ_SIZE sizeof (dl_exclusive_req_t)
#ifdef _KERNEL
/*
diff --git a/usr/src/uts/common/sys/dls.h b/usr/src/uts/common/sys/dls.h
index cd3749dc21..0c5ffb0dd7 100644
--- a/usr/src/uts/common/sys/dls.h
+++ b/usr/src/uts/common/sys/dls.h
@@ -21,6 +21,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc.
*/
#ifndef _SYS_DLS_H
@@ -86,6 +87,7 @@ typedef struct dls_link_s dls_link_t;
#define DLS_PROMISC_MULTI 0x00000002
#define DLS_PROMISC_PHYS 0x00000004
#define DLS_PROMISC_RX_ONLY 0x00000008
+#define DLS_PROMISC_FIXUPS 0x00000010
extern int dls_open(dls_link_t *, dls_dl_handle_t, dld_str_t *);
extern void dls_close(dld_str_t *);
@@ -107,11 +109,13 @@ extern void str_notify(void *, mac_notify_type_t);
extern int dls_devnet_open(const char *,
dls_dl_handle_t *, dev_t *);
+extern int dls_devnet_open_in_zone(const char *,
+ dls_dl_handle_t *, dev_t *, zoneid_t);
extern void dls_devnet_close(dls_dl_handle_t);
extern boolean_t dls_devnet_rebuild();
extern int dls_devnet_rename(datalink_id_t, datalink_id_t,
- const char *);
+ const char *, boolean_t);
extern int dls_devnet_create(mac_handle_t, datalink_id_t,
zoneid_t);
extern int dls_devnet_destroy(mac_handle_t, datalink_id_t *,
@@ -129,7 +133,7 @@ extern uint16_t dls_devnet_vid(dls_dl_handle_t);
extern datalink_id_t dls_devnet_linkid(dls_dl_handle_t);
extern int dls_devnet_dev2linkid(dev_t, datalink_id_t *);
extern int dls_devnet_phydev(datalink_id_t, dev_t *);
-extern int dls_devnet_setzid(dls_dl_handle_t, zoneid_t);
+extern int dls_devnet_setzid(dls_dl_handle_t, zoneid_t, boolean_t);
extern zoneid_t dls_devnet_getzid(dls_dl_handle_t);
extern zoneid_t dls_devnet_getownerzid(dls_dl_handle_t);
extern boolean_t dls_devnet_islinkvisible(datalink_id_t, zoneid_t);
@@ -143,6 +147,8 @@ extern int dls_mgmt_update(const char *, uint32_t, boolean_t,
extern int dls_mgmt_get_linkinfo(datalink_id_t, char *,
datalink_class_t *, uint32_t *, uint32_t *);
extern int dls_mgmt_get_linkid(const char *, datalink_id_t *);
+extern int dls_mgmt_get_linkid_in_zone(const char *,
+ datalink_id_t *, zoneid_t);
extern datalink_id_t dls_mgmt_get_next(datalink_id_t, datalink_class_t,
datalink_media_t, uint32_t);
extern int dls_devnet_macname2linkid(const char *,
diff --git a/usr/src/uts/common/sys/dls_impl.h b/usr/src/uts/common/sys/dls_impl.h
index cd13a41413..329f8dd08e 100644
--- a/usr/src/uts/common/sys/dls_impl.h
+++ b/usr/src/uts/common/sys/dls_impl.h
@@ -21,6 +21,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc.
*/
#ifndef _SYS_DLS_IMPL_H
@@ -62,6 +63,7 @@ struct dls_link_s { /* Protected by */
uint_t dl_zone_ref;
link_tagmode_t dl_tagmode; /* atomic */
uint_t dl_nonip_cnt; /* SL */
+ uint_t dl_exclusive; /* SL */
};
typedef struct dls_head_s {
@@ -97,7 +99,8 @@ extern void dls_create_str_kstats(dld_str_t *);
extern int dls_stat_update(kstat_t *, dls_link_t *, int);
extern int dls_stat_create(const char *, int, const char *,
zoneid_t, int (*)(struct kstat *, int), void *,
- kstat_t **);
+ kstat_t **, zoneid_t);
+extern void dls_stat_delete(kstat_t *);
extern int dls_devnet_open_by_dev(dev_t, dls_link_t **,
dls_dl_handle_t *);
@@ -129,6 +132,7 @@ extern void dls_mgmt_init(void);
extern void dls_mgmt_fini(void);
extern int dls_mgmt_get_phydev(datalink_id_t, dev_t *);
+extern int dls_exclusive_set(dld_str_t *, boolean_t);
#ifdef __cplusplus
}
diff --git a/usr/src/uts/common/sys/dls_mgmt.h b/usr/src/uts/common/sys/dls_mgmt.h
index b60e53b267..6fec277991 100644
--- a/usr/src/uts/common/sys/dls_mgmt.h
+++ b/usr/src/uts/common/sys/dls_mgmt.h
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2015, Joyent, Inc.
+ * Copyright 2016 Joyent, Inc.
*/
#ifndef _DLS_MGMT_H
@@ -114,10 +114,14 @@ typedef uint64_t datalink_media_t;
#define DLMGMT_CMD_BASE 128
/*
- * Indicate the link mapping is active or persistent
+ * Indicate if the link mapping is active, persistent, or transient. A
+ * transient link is an active link with a twist -- it is an active
+ * link which is destroyed along with the zone rather than reassigned
+ * to the GZ.
*/
#define DLMGMT_ACTIVE 0x01
#define DLMGMT_PERSIST 0x02
+#define DLMGMT_TRANSIENT 0x04
/* upcall argument */
typedef struct dlmgmt_door_arg {
@@ -168,6 +172,7 @@ typedef struct dlmgmt_door_getname {
typedef struct dlmgmt_door_getlinkid {
int ld_cmd;
char ld_link[MAXLINKNAMELEN];
+ zoneid_t ld_zoneid;
} dlmgmt_door_getlinkid_t;
typedef struct dlmgmt_door_getnext_s {
diff --git a/usr/src/uts/common/sys/dumpadm.h b/usr/src/uts/common/sys/dumpadm.h
index 616828bb2b..8ca10ff3c5 100644
--- a/usr/src/uts/common/sys/dumpadm.h
+++ b/usr/src/uts/common/sys/dumpadm.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright 2019 Joyent, Inc.
*/
#ifndef _SYS_DUMPADM_H
@@ -44,11 +45,13 @@ extern "C" {
#define DIOCSETUUID (DDIOC | 0x17)
#define DIOCGETUUID (DDIOC | 0x18)
#define DIOCRMDEV (DDIOC | 0x19)
+#define DIOCSCRYPTKEY (DDIOC | 0x1a)
/*
* Kernel-controlled dump state flags for dump_conflags
*/
#define DUMP_EXCL 0x00000001 /* dedicated dump device (not swap) */
+#define DUMP_ENCRYPT 0x00000002 /* encrypt dump */
#define DUMP_STATE 0x0000ffff /* the set of all kernel flags */
/*
diff --git a/usr/src/uts/common/sys/dumphdr.h b/usr/src/uts/common/sys/dumphdr.h
index 2019f60a5d..aa2fbde7a5 100644
--- a/usr/src/uts/common/sys/dumphdr.h
+++ b/usr/src/uts/common/sys/dumphdr.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 by Delphix. All rights reserved.
+ * Copyright 2019 Joyent, Inc.
*/
#ifndef _SYS_DUMPHDR_H
@@ -60,6 +61,22 @@ extern "C" {
sizeof (summary_dump_t) + 1024), \
DUMP_OFFSET)) /* summary save area */
+#define DUMP_CRYPT_KEYLEN 32 /* byte len for crypto key */
+#define DUMP_CRYPT_NONCELEN 8 /* byte len for nonce */
+#define DUMP_CRYPT_HMACLEN 64 /* byte len for HMAC */
+#define DUMP_CRYPT_BLOCKSHIFT 6 /* 64-byte blocks */
+
+#define DUMP_CRYPT_ALGO_NONE 0 /* dump not encrypted */
+#define DUMP_CRYPT_ALGO_CHACHA20 1 /* ChaCha20 */
+
+#if DUMP_OFFSET & ((1 << DUMP_CRYPT_BLOCKSHIFT) - 1)
+#error DUMP_OFFSET not DUMP_CRYPT_BLOCKSHIFT aligned
+#endif
+
+#if DUMP_LOGSIZE & ((1 << DUMP_CRYPT_BLOCKSHIFT) - 1)
+#error DUMP_LOGSIZE not DUMP_CRYPT_BLOCKSHIFT aligned
+#endif
+
typedef struct dumphdr {
uint32_t dump_magic; /* magic number */
uint32_t dump_version; /* version number */
@@ -86,12 +103,22 @@ typedef struct dumphdr {
} dumphdr_t;
/*
+ * If DF_ENCRYPTED is set, this header will be found after the dumphdr.
+ */
+typedef struct dump_crypt {
+ uint8_t dump_crypt_algo; /* encryption algorithm */
+ uint8_t dump_crypt_hmac[DUMP_CRYPT_HMACLEN]; /* HMAC for crypto key */
+ uint8_t dump_crypt_nonce[DUMP_CRYPT_NONCELEN]; /* encryption none */
+} dump_crypt_t;
+
+/*
* Values for dump_flags
*/
#define DF_VALID 0x00000001 /* Dump is valid (savecore clears) */
#define DF_COMPLETE 0x00000002 /* All pages present as configured */
#define DF_LIVE 0x00000004 /* Dump was taken on a live system */
#define DF_COMPRESSED 0x00000008 /* Dump is compressed */
+#define DF_ENCRYPTED 0x00000010 /* Dump is encrypted */
#define DF_KERNEL 0x00010000 /* Contains kernel pages only */
#define DF_ALL 0x00020000 /* Contains all pages */
#define DF_CURPROC 0x00040000 /* Contains kernel + cur proc pages */
@@ -175,6 +202,8 @@ extern u_offset_t dumpvp_size;
extern struct dumphdr *dumphdr;
extern int dump_conflags;
extern char *dumppath;
+extern uint8_t dump_crypt_key[DUMP_CRYPT_KEYLEN];
+extern uint8_t dump_crypt_nonce[DUMP_CRYPT_NONCELEN];
extern int dump_timeout;
extern int dump_timeleft;
diff --git a/usr/src/uts/common/sys/elf.h b/usr/src/uts/common/sys/elf.h
index 1f290c282c..556a49c60f 100644
--- a/usr/src/uts/common/sys/elf.h
+++ b/usr/src/uts/common/sys/elf.h
@@ -501,6 +501,11 @@ typedef struct {
#define PT_GNU_STACK 0x6474e551 /* Indicates stack executability */
#define PT_GNU_RELRO 0x6474e552 /* Read-only after relocation */
+/*
+ * Linux specific program headers not even used by Linux (!!)
+ */
+#define PT_PAX_FLAGS 0x65041580 /* PaX flags (see below) */
+
#define PT_LOSUNW 0x6ffffffa
#define PT_SUNWBSS 0x6ffffffa /* Sun Specific segment (unused) */
#define PT_SUNWSTACK 0x6ffffffb /* describes the stack segment */
@@ -516,6 +521,45 @@ typedef struct {
#define PF_W 0x2
#define PF_X 0x1
+/*
+ * PaX is a regrettable series of never-integrated Linux patches for a
+ * facility to provide additional protections on memory pages for purposes of
+ * increasing security, and for allowing binaries to demand (or refuse) those
+ * protections via the PT_PAX_FLAGS program header. (Portents of its
+ * rudderless existence, "PaX" is a term of indefinite origin written by an
+ * unknown group of people.) This facility is unfortunate in any number of
+ * ways, and was largely obviated by the broad adoption of non-executable
+ * stacks at any rate -- but it lives on in binaries that continue to mark
+ * themselves to explicitly refuse the (never-integrated, now-obviated)
+ * facility. One might cringe that PaX overloads the meaning of the p_flags
+ * to specify protections, but that is the least of its transgressions:
+ * instead of using one p_type constant to explicitly enable a series of
+ * protections and another to explicitly disable others, it insists on
+ * conflating both actions into PT_PAX_FLAGS. The resulting doubling of
+ * constant definitions (two constant definitions for every protection instead
+ * of merely one) assures that the values can't even fit in the eight
+ * PF_MASKOS bits putatively defined to provide a modicum of cleanliness for
+ * such filthy functionality. And were all of this not enough, there is one
+ * final nomenclature insult to be added to this semantic injury: the
+ * constants for the p_flags don't even embed "_PAX_" in their name -- despite
+ * the fact that this is their only purpose! We resist the temptation to
+ * right this final wrong here; we grit our teeth and provide exactly the
+ * Linux definitions -- or rather, what would have been the Linux definitions
+ * had this belching jalopy ever been permitted to crash itself into mainline.
+ */
+#define PF_PAGEEXEC 0x00000010 /* PaX: enable PAGEEXEC */
+#define PF_NOPAGEEXEC 0x00000020 /* PaX: disable PAGEEXEC */
+#define PF_SEGMEXEC 0x00000040 /* PaX: enable SEGMEXEC */
+#define PF_NOSEGMEXEC 0x00000080 /* PaX: disable SEGMEXEC */
+#define PF_MPROTECT 0x00000100 /* PaX: enable MPROTECT */
+#define PF_NOMPROTECT 0x00000200 /* PaX: disable MPROTECT */
+#define PF_RANDEXEC 0x00000400 /* PaX: enable RANDEXEC */
+#define PF_NORANDEXEC 0x00000800 /* PaX: disable RANDEXEC */
+#define PF_EMUTRAMP 0x00001000 /* PaX: enable EMUTRAMP */
+#define PF_NOEMUTRAMP 0x00002000 /* PaX: disable EMUTRAMP */
+#define PF_RANDMMAP 0x00004000 /* PaX: enable RANDMMAP */
+#define PF_NORANDMMAP 0x00008000 /* PaX: disable RANDMMAP */
+
#define PF_MASKOS 0x0ff00000 /* OS specific values */
#define PF_MASKPROC 0xf0000000 /* processor specific values */
diff --git a/usr/src/uts/common/sys/eventfd.h b/usr/src/uts/common/sys/eventfd.h
index 1b0d961b0b..b64a101348 100644
--- a/usr/src/uts/common/sys/eventfd.h
+++ b/usr/src/uts/common/sys/eventfd.h
@@ -10,7 +10,7 @@
*/
/*
- * Copyright (c) 2015 Joyent, Inc. All rights reserved.
+ * Copyright (c) 2017, Joyent, Inc.
*/
/*
@@ -47,6 +47,13 @@ typedef uint64_t eventfd_t;
#define EVENTFDIOC (('e' << 24) | ('f' << 16) | ('d' << 8))
#define EVENTFDIOC_SEMAPHORE (EVENTFDIOC | 1) /* toggle sem state */
+/*
+ * Kernel-internal method to write to eventfd while bypassing overflow limits,
+ * therefore avoiding potential to block as well. This is used to fulfill AIO
+ * behavior in LX related to eventfd notification.
+ */
+#define EVENTFDIOC_POST (EVENTFDIOC | 2)
+
#ifndef _KERNEL
extern int eventfd(unsigned int, int);
@@ -58,6 +65,7 @@ extern int eventfd_write(int, eventfd_t);
#define EVENTFDMNRN_EVENTFD 0
#define EVENTFDMNRN_CLONE 1
#define EVENTFD_VALMAX (ULLONG_MAX - 1ULL)
+#define EVENTFD_VALOVERFLOW ULLONG_MAX
#endif /* _KERNEL */
diff --git a/usr/src/uts/common/sys/exec.h b/usr/src/uts/common/sys/exec.h
index 23eb5b6bf7..0d5b4c4611 100644
--- a/usr/src/uts/common/sys/exec.h
+++ b/usr/src/uts/common/sys/exec.h
@@ -81,7 +81,8 @@ typedef struct uarg {
ssize_t na;
ssize_t ne;
ssize_t nc;
- ssize_t arglen;
+ size_t argstrlen;
+ size_t envstrlen;
char *fname;
char *pathname;
size_t auxsize;
@@ -107,10 +108,13 @@ typedef struct uarg {
vnode_t *ex_vp;
char *emulator;
char *brandname;
+ const char *brand_nroot;
char *auxp_auxflags; /* addr of auxflags auxv on the user stack */
char *auxp_brand; /* address of first brand auxv on user stack */
cred_t *pfcred;
boolean_t scrubenv;
+ uintptr_t maxstack;
+ boolean_t stk_prot_override;
uintptr_t commpage;
} uarg_t;
@@ -181,7 +185,7 @@ struct execsw {
int (*exec_func)(struct vnode *vp, struct execa *uap,
struct uarg *args, struct intpdata *idata, int level,
size_t *execsz, int setid, caddr_t exec_file,
- struct cred *cred, int brand_action);
+ struct cred *cred, int *brand_action);
int (*exec_core)(struct vnode *vp, struct proc *p,
struct cred *cred, rlim64_t rlimit, int sig,
core_content_t content);
@@ -209,7 +213,7 @@ extern int exec_common(const char *fname, const char **argp,
const char **envp, int brand_action);
extern int gexec(vnode_t **vp, struct execa *uap, struct uarg *args,
struct intpdata *idata, int level, size_t *execsz, caddr_t exec_file,
- struct cred *cred, int brand_action);
+ struct cred *cred, int *brand_action);
extern struct execsw *allocate_execsw(char *name, char *magic,
size_t magic_size);
extern struct execsw *findexecsw(char *magic);
@@ -234,18 +238,20 @@ extern void exec_set_sp(size_t);
* when compiling the 32-bit compatability elf code in the elfexec module.
*/
extern int elfexec(vnode_t *, execa_t *, uarg_t *, intpdata_t *, int,
- size_t *, int, caddr_t, cred_t *, int);
+ size_t *, int, caddr_t, cred_t *, int *);
extern int mapexec_brand(vnode_t *, uarg_t *, Ehdr *, Addr *,
- intptr_t *, caddr_t, int *, caddr_t *, caddr_t *, size_t *, uintptr_t *);
+ intptr_t *, caddr_t, char **, caddr_t *, caddr_t *, size_t *,
+ uintptr_t *, uintptr_t *);
extern int elfreadhdr(vnode_t *, cred_t *, Ehdr *, uint_t *, caddr_t *,
size_t *);
#endif /* !_ELF32_COMPAT */
#if defined(_LP64)
extern int elf32exec(vnode_t *, execa_t *, uarg_t *, intpdata_t *, int,
- size_t *, int, caddr_t, cred_t *, int);
+ size_t *, int, caddr_t, cred_t *, int *);
extern int mapexec32_brand(vnode_t *, uarg_t *, Elf32_Ehdr *, Elf32_Addr *,
- intptr_t *, caddr_t, int *, caddr_t *, caddr_t *, size_t *, uintptr_t *);
+ intptr_t *, caddr_t, char **, caddr_t *, caddr_t *, size_t *,
+ uintptr_t *, uintptr_t *);
extern int elf32readhdr(vnode_t *, cred_t *, Elf32_Ehdr *, uint_t *, caddr_t *,
size_t *);
#endif /* _LP64 */
diff --git a/usr/src/uts/common/sys/file.h b/usr/src/uts/common/sys/file.h
index d300b940e2..66620ab7b9 100644
--- a/usr/src/uts/common/sys/file.h
+++ b/usr/src/uts/common/sys/file.h
@@ -28,6 +28,7 @@
/* Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved. */
/* Copyright 2020 Joyent, Inc. */
+/* Copyright 2021 OmniOS Community Edition (OmniOSce) Association. */
#ifndef _SYS_FILE_H
#define _SYS_FILE_H
@@ -120,6 +121,15 @@ typedef struct fpollinfo {
#define FCLOEXEC 0x800000 /* O_CLOEXEC = 0x800000 */
#define FDIRECTORY 0x1000000 /* O_DIRECTORY = 0x1000000 */
#define FDIRECT 0x2000000 /* O_DIRECT = 0x2000000 */
+/*
+ * Private interface for lx O_PATH|O_NOFOLLOW emulation for symlinks.
+ */
+#define __FLXPATH 0x80000000
+/*
+ * Private interface for lx fstatat(AT_NO_AUTOMOUNT) emulation.
+ * Since usage is disjoint, the __FLXPATH bit is re-used.
+ */
+#define __FLXNOAUTO 0x80000000
#if defined(_KERNEL) || defined(_FAKE_KERNEL)
@@ -224,6 +234,7 @@ extern void fcnt_add(struct uf_info *, int);
extern void close_exec(struct uf_info *);
extern void clear_stale_fd(void);
extern void clear_active_fd(int);
+extern void set_active_fd(int);
extern void free_afd(afd_t *afd);
extern int fgetstartvp(int, char *, struct vnode **);
extern int fsetattrat(int, char *, int, struct vattr *);
diff --git a/usr/src/uts/common/sys/frameio.h b/usr/src/uts/common/sys/frameio.h
new file mode 100644
index 0000000000..54e6dbeedf
--- /dev/null
+++ b/usr/src/uts/common/sys/frameio.h
@@ -0,0 +1,107 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_FRAMEIO_H
+#define _SYS_FRAMEIO_H
+
+/*
+ * Frame I/O definitions
+ */
+
+#include <sys/types.h>
+
+#ifdef _KERNEL
+/* Kernel only headers */
+#include <sys/stream.h>
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * An individual frame vector component. Collections of these are used to make
+ * ioctls.
+ */
+typedef struct framevec {
+ void *fv_buf; /* Buffer with data */
+ size_t fv_buflen; /* Size of the buffer */
+ size_t fv_actlen; /* Amount of buffer consumed, ignore on error */
+} framevec_t;
+
+/*
+ * The base unit used with frameio.
+ */
+typedef struct frameio {
+ uint_t fio_version; /* Should always be FRAMEIO_CURRENT_VERSION */
+ uint_t fio_nvpf; /* How many vectors make up one frame */
+ uint_t fio_nvecs; /* The total number of vectors */
+ framevec_t fio_vecs[]; /* C99 VLA */
+} frameio_t;
+
+
+#define FRAMEIO_VERSION_ONE 1
+#define FRAMEIO_CURRENT_VERSION FRAMEIO_VERSION_ONE
+
+#define FRAMEIO_NVECS_MAX 32
+
+/*
+ * Definitions for kernel modules to include as helpers. These are consolidation
+ * private.
+ */
+#ifdef _KERNEL
+
+/*
+ * 32-bit versions for 64-bit kernels
+ */
+typedef struct framevec32 {
+ caddr32_t fv_buf;
+ size32_t fv_buflen;
+ size32_t fv_actlen;
+} framevec32_t;
+
+typedef struct frameio32 {
+ uint_t fio_version;
+ uint_t fio_vecspframe;
+ uint_t fio_nvecs;
+ framevec32_t fio_vecs[];
+} frameio32_t;
+
+/*
+ * Describe the different ways that vectors should map to frames.
+ */
+typedef enum frameio_write_mblk_map {
+ MAP_BLK_FRAME
+} frameio_write_mblk_map_t;
+
+int frameio_init(void);
+void frameio_fini(void);
+frameio_t *frameio_alloc(int);
+void frameio_free(frameio_t *);
+int frameio_hdr_copyin(frameio_t *, int, const void *, uint_t);
+int frameio_mblk_chain_read(frameio_t *, mblk_t **, int *, int);
+int frameio_mblk_chain_write(frameio_t *, frameio_write_mblk_map_t, mblk_t *,
+ int *, int);
+int frameio_hdr_copyout(frameio_t *, int, void *, uint_t);
+size_t frameio_frame_length(frameio_t *, framevec_t *);
+void frameio_mark_consumed(frameio_t *, int);
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_FRAMEIO_H */
diff --git a/usr/src/uts/common/sys/fs/fifonode.h b/usr/src/uts/common/sys/fs/fifonode.h
index d8b158ce3c..1ea8563e1c 100644
--- a/usr/src/uts/common/sys/fs/fifonode.h
+++ b/usr/src/uts/common/sys/fs/fifonode.h
@@ -21,6 +21,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2017 Joyent, Inc.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -83,6 +84,7 @@ struct fifonode {
struct msgb *fn_tail; /* last message to read */
fifolock_t *fn_lock; /* pointer to per fifo lock */
uint_t fn_count; /* Number of bytes on fn_mp */
+ uint_t fn_hiwat; /* pipe (fifofast) high water */
kcondvar_t fn_wait_cv; /* fifo conditional variable */
ushort_t fn_wcnt; /* number of writers */
ushort_t fn_rcnt; /* number of readers */
@@ -135,6 +137,8 @@ typedef struct fifodata {
#define FIFOPOLLRBAND 0x20000
#define FIFOSTAYFAST 0x40000 /* don't turn into stream mode */
#define FIFOWAITMODE 0x80000 /* waiting for the possibility to change mode */
+/* Data on loan, block reads. Use in conjunction with FIFOSTAYFAST. */
+#define FIFORDBLOCK 0x100000
#define FIFOHIWAT (16 * 1024)
#define FIFOLOWAT (0)
@@ -147,16 +151,6 @@ typedef struct fifodata {
#if defined(_KERNEL)
-/*
- * Fifohiwat defined as a variable is to allow tuning of the high
- * water mark if needed. It is not meant to be released.
- */
-#if FIFODEBUG
-extern int Fifohiwat;
-#else /* FIFODEBUG */
-#define Fifohiwat FIFOHIWAT
-#endif /* FIFODEBUG */
-
extern struct vnodeops *fifo_vnodeops;
extern const struct fs_operation_def fifo_vnodeops_template[];
extern struct kmem_cache *fnode_cache;
@@ -181,6 +175,8 @@ extern void fifo_fastoff(fifonode_t *);
extern struct streamtab *fifo_getinfo();
extern void fifo_wakereader(fifonode_t *, fifolock_t *);
extern void fifo_wakewriter(fifonode_t *, fifolock_t *);
+extern boolean_t fifo_stayfast_enter(fifonode_t *);
+extern void fifo_stayfast_exit(fifonode_t *);
#endif /* _KERNEL */
diff --git a/usr/src/uts/common/sys/fs/hyprlofs.h b/usr/src/uts/common/sys/fs/hyprlofs.h
new file mode 100644
index 0000000000..b8c4149df2
--- /dev/null
+++ b/usr/src/uts/common/sys/fs/hyprlofs.h
@@ -0,0 +1,91 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2012, Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_FS_HYPRLOFS_H
+#define _SYS_FS_HYPRLOFS_H
+
+#include <sys/param.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * hyprlofs ioctl numbers.
+ */
+#define HYPRLOFS_IOC ('H' << 8)
+
+#define HYPRLOFS_ADD_ENTRIES (HYPRLOFS_IOC | 1)
+#define HYPRLOFS_RM_ENTRIES (HYPRLOFS_IOC | 2)
+#define HYPRLOFS_RM_ALL (HYPRLOFS_IOC | 3)
+#define HYPRLOFS_GET_ENTRIES (HYPRLOFS_IOC | 4)
+
+typedef struct {
+ char *hle_path;
+ uint_t hle_plen;
+ char *hle_name;
+ uint_t hle_nlen;
+} hyprlofs_entry_t;
+
+typedef struct {
+ hyprlofs_entry_t *hle_entries;
+ uint_t hle_len;
+} hyprlofs_entries_t;
+
+typedef struct {
+ char hce_path[MAXPATHLEN];
+ char hce_name[MAXPATHLEN];
+} hyprlofs_curr_entry_t;
+
+typedef struct {
+ hyprlofs_curr_entry_t *hce_entries;
+ uint_t hce_cnt;
+} hyprlofs_curr_entries_t;
+
+#ifdef _KERNEL
+typedef struct {
+ caddr32_t hle_path;
+ uint_t hle_plen;
+ caddr32_t hle_name;
+ uint_t hle_nlen;
+} hyprlofs_entry32_t;
+
+typedef struct {
+ caddr32_t hle_entries;
+ uint_t hle_len;
+} hyprlofs_entries32_t;
+
+typedef struct {
+ caddr32_t hce_entries;
+ uint_t hce_cnt;
+} hyprlofs_curr_entries32_t;
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_FS_HYPRLOFS_H */
diff --git a/usr/src/uts/common/sys/fs/hyprlofs_info.h b/usr/src/uts/common/sys/fs/hyprlofs_info.h
new file mode 100644
index 0000000000..38389f77d9
--- /dev/null
+++ b/usr/src/uts/common/sys/fs/hyprlofs_info.h
@@ -0,0 +1,174 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2012, Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_FS_HYPRLOFS_INFO_H
+#define _SYS_FS_HYPRLOFS_INFO_H
+
+#include <sys/t_lock.h>
+#include <vm/seg.h>
+#include <vm/seg_vn.h>
+#include <sys/vfs_opreg.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * hlnode is the file system dependent node for hyprlofs.
+ * It is modeled on the tmpfs tmpnode.
+ *
+ * hln_rwlock protects access of the directory list at hln_dir
+ * as well as syncronizing read/writes to directory hlnodes.
+ * hln_tlock protects updates to hln_mode and hln_nlink.
+ * hln_tlock doesn't require any hlnode locks.
+ */
+typedef struct hlnode {
+ struct hlnode *hln_back; /* linked list of hlnodes */
+ struct hlnode *hln_forw; /* linked list of hlnodes */
+ union {
+ struct {
+ struct hldirent *un_dirlist; /* dirent list */
+ uint_t un_dirents; /* number of dirents */
+ } un_dirstruct;
+ vnode_t *un_realvp; /* real vnode */
+ } un_hlnode;
+ vnode_t *hln_vnode; /* vnode for this hlnode */
+ int hln_gen; /* pseudo gen num for hlfid */
+ int hln_looped; /* flag indicating loopback */
+ vattr_t hln_attr; /* attributes */
+ krwlock_t hln_rwlock; /* rw - serialize mods and */
+ /* directory updates */
+ kmutex_t hln_tlock; /* time, flag, and nlink lock */
+} hlnode_t;
+
+/*
+ * hyprlofs per-mount data structure.
+ * All fields are protected by hlm_contents.
+ */
+typedef struct {
+ vfs_t *hlm_vfsp; /* filesystem's vfs struct */
+ hlnode_t *hlm_rootnode; /* root hlnode */
+ char *hlm_mntpath; /* name of hyprlofs mount point */
+ dev_t hlm_dev; /* unique dev # of mounted `device' */
+ uint_t hlm_gen; /* pseudo generation number for files */
+ kmutex_t hlm_contents; /* lock for hlfsmount structure */
+} hlfsmount_t;
+
+/*
+ * hyprlofs directories are made up of a linked list of hldirent structures
+ * hanging off directory hlnodes. File names are not fixed length,
+ * but are null terminated.
+ */
+typedef struct hldirent {
+ hlnode_t *hld_hlnode; /* hlnode for this file */
+ struct hldirent *hld_next; /* next directory entry */
+ struct hldirent *hld_prev; /* prev directory entry */
+ uint_t hld_offset; /* "offset" of dir entry */
+ uint_t hld_hash; /* a hash of td_name */
+ struct hldirent *hld_link; /* linked via the hash table */
+ hlnode_t *hld_parent; /* parent, dir we are in */
+ char *hld_name; /* must be null terminated */
+ /* max length is MAXNAMELEN */
+} hldirent_t;
+
+/*
+ * hlfid overlays the fid structure (for VFS_VGET)
+ */
+typedef struct {
+ uint16_t hlfid_len;
+ ino32_t hlfid_ino;
+ int32_t hlfid_gen;
+} hlfid_t;
+
+/*
+ * File system independent to hyprlofs conversion macros
+ */
+#define VFSTOHLM(vfsp) ((hlfsmount_t *)(vfsp)->vfs_data)
+#define VTOHLM(vp) ((hlfsmount_t *)(vp)->v_vfsp->vfs_data)
+#define VTOHLN(vp) ((hlnode_t *)(vp)->v_data)
+#define HLNTOV(tp) ((tp)->hln_vnode)
+#define REALVP(vp) ((vnode_t *)VTOHLN(vp)->hln_realvp)
+#define hlnode_hold(tp) VN_HOLD(HLNTOV(tp))
+#define hlnode_rele(tp) VN_RELE(HLNTOV(tp))
+
+#define hln_dir un_hlnode.un_dirstruct.un_dirlist
+#define hln_dirents un_hlnode.un_dirstruct.un_dirents
+#define hln_realvp un_hlnode.un_realvp
+
+/*
+ * Attributes
+ */
+#define hln_mask hln_attr.va_mask
+#define hln_type hln_attr.va_type
+#define hln_mode hln_attr.va_mode
+#define hln_uid hln_attr.va_uid
+#define hln_gid hln_attr.va_gid
+#define hln_fsid hln_attr.va_fsid
+#define hln_nodeid hln_attr.va_nodeid
+#define hln_nlink hln_attr.va_nlink
+#define hln_size hln_attr.va_size
+#define hln_atime hln_attr.va_atime
+#define hln_mtime hln_attr.va_mtime
+#define hln_ctime hln_attr.va_ctime
+#define hln_rdev hln_attr.va_rdev
+#define hln_blksize hln_attr.va_blksize
+#define hln_nblocks hln_attr.va_nblocks
+#define hln_seq hln_attr.va_seq
+
+/*
+ * enums
+ */
+enum de_op { DE_CREATE, DE_MKDIR }; /* direnter ops */
+enum dr_op { DR_REMOVE, DR_RMDIR }; /* dirremove ops */
+
+/*
+ * hyprlofs_minfree is the amount (in pages) of anonymous memory that hyprlofs
+ * leaves free for the rest of the system. The default value for
+ * hyprlofs_minfree is btopr(HYPRLOFSMINFREE) but it can be patched to a
+ * different number of pages. Since hyprlofs doesn't actually use much
+ * memory, its unlikely this ever needs to be patched.
+ */
+#define HYPRLOFSMINFREE 8 * 1024 * 1024 /* 8 Megabytes */
+
+extern size_t hyprlofs_minfree; /* Anonymous memory in pages */
+
+extern void hyprlofs_node_init(hlfsmount_t *, hlnode_t *, vattr_t *,
+ cred_t *);
+extern int hyprlofs_dirlookup(hlnode_t *, char *, hlnode_t **, cred_t *);
+extern int hyprlofs_dirdelete(hlnode_t *, hlnode_t *, char *, enum dr_op,
+ cred_t *);
+extern void hyprlofs_dirinit(hlnode_t *, hlnode_t *);
+extern void hyprlofs_dirtrunc(hlnode_t *);
+extern int hyprlofs_taccess(void *, int, cred_t *);
+extern int hyprlofs_direnter(hlfsmount_t *, hlnode_t *, char *, enum de_op,
+ vnode_t *, vattr_t *, hlnode_t **, cred_t *);
+
+extern struct vnodeops *hyprlofs_vnodeops;
+extern const struct fs_operation_def hyprlofs_vnodeops_template[];
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_FS_HYPRLOFS_INFO_H */
diff --git a/usr/src/uts/common/sys/fs/sdev_impl.h b/usr/src/uts/common/sys/fs/sdev_impl.h
index dc6601bb43..676193fcfa 100644
--- a/usr/src/uts/common/sys/fs/sdev_impl.h
+++ b/usr/src/uts/common/sys/fs/sdev_impl.h
@@ -37,6 +37,7 @@ extern "C" {
#include <sys/vfs_opreg.h>
#include <sys/list.h>
#include <sys/nvpair.h>
+#include <sys/fs/sdev_plugin.h>
#include <sys/sunddi.h>
#include <sys/fs/sdev_plugin.h>
diff --git a/usr/src/uts/common/sys/fs/tmp.h b/usr/src/uts/common/sys/fs/tmp.h
index fb07de6588..f4cee09244 100644
--- a/usr/src/uts/common/sys/fs/tmp.h
+++ b/usr/src/uts/common/sys/fs/tmp.h
@@ -23,7 +23,7 @@
* All rights reserved. Use is subject to license terms.
*/
/*
- * Copyright 2015 Joyent, Inc.
+ * Copyright 2016 Joyent, Inc.
*/
#ifndef _SYS_FS_TMP_H
@@ -43,8 +43,10 @@ struct tmount {
struct vfs *tm_vfsp; /* filesystem's vfs struct */
struct tmpnode *tm_rootnode; /* root tmpnode */
char *tm_mntpath; /* name of tmpfs mount point */
- ulong_t tm_anonmax; /* file system max anon reservation */
- pgcnt_t tm_anonmem; /* pages of reserved anon memory */
+ size_t tm_anonmax; /* file system max anon reservation */
+ size_t tm_anonmem; /* bytes of reserved anon memory */
+ /* and allocated kmem for the fs */
+ size_t tm_allocmem; /* bytes alloced from tmp_kmem_ funcs */
dev_t tm_dev; /* unique dev # of mounted `device' */
uint_t tm_gen; /* pseudo generation number for files */
kmutex_t tm_contents; /* lock for tmount structure */
@@ -58,6 +60,7 @@ struct tmount {
#define VTOTM(vp) ((struct tmount *)(vp)->v_vfsp->vfs_data)
#define VTOTN(vp) ((struct tmpnode *)(vp)->v_data)
#define TNTOV(tp) ((tp)->tn_vnode)
+#define TNTOTM(tp) (VTOTM(TNTOV(tp)))
#define tmpnode_hold(tp) VN_HOLD(TNTOV(tp))
#define tmpnode_rele(tp) VN_RELE(TNTOV(tp))
@@ -69,41 +72,39 @@ enum dr_op { DR_REMOVE, DR_RMDIR, DR_RENAME }; /* dirremove ops */
/*
* tmpfs_minfree is the amount (in pages) of anonymous memory that tmpfs
- * leaves free for the rest of the system. E.g. in a system with 32MB of
- * configured swap space, if 16MB were reserved (leaving 16MB free),
- * tmpfs could allocate up to 16MB - tmpfs_minfree. The default value
- * for tmpfs_minfree is btopr(TMPMINFREE) but it can cautiously patched
- * to a different number of pages.
- * NB: If tmpfs allocates too much swap space, other processes will be
- * unable to execute.
+ * leaves free for the rest of the system. In antiquity, this number could be
+ * relevant on a system-wide basis, as physical DRAM was routinely exhausted;
+ * however, in more modern times, the relative growth of DRAM with respect to
+ * application footprint means that this number is only likely to become
+ * factor in a virtualized OS environment (e.g., a zone) -- and even then only
+ * when DRAM and swap have both been capped low to allow for maximum tenancy.
+ * TMPMINFREE -- the value from which tmpfs_minfree is derived -- should
+ * therefore be configured to a value that is roughly the smallest practical
+ * value for memory + swap minus the largest reasonable size for tmpfs in such
+ * a configuration. As of this writing, the smallest practical memory + swap
+ * configuration is 128MB, and it seems reasonable to allow tmpfs to consume
+ * no more than seven-eighths of this, yielding a TMPMINFREE of 16MB. Care
+ * should be exercised in changing this: tuning this value too high will
+ * result in spurious ENOSPC errors in tmpfs in small zones (a problem that
+ * can induce cascading failure surprisingly often); tuning this value too low
+ * will result in tmpfs consumption alone to alone induce application-level
+ * memory allocation failure.
*/
-#define TMPMINFREE 2 * 1024 * 1024 /* 2 Megabytes */
+#define TMPMINFREE 16 * 1024 * 1024 /* 16 Megabytes */
extern size_t tmpfs_minfree; /* Anonymous memory in pages */
-/*
- * tmpfs can allocate only a certain percentage of kernel memory,
- * which is used for tmpnodes, directories, file names, etc.
- * This is statically set as TMPMAXFRACKMEM of physical memory.
- * The actual number of allocatable bytes can be patched in tmpfs_maxkmem.
- */
-#define TMPMAXFRACKMEM 25 /* 1/25 of physical memory */
-
-extern size_t tmp_kmemspace;
-extern size_t tmpfs_maxkmem; /* Allocatable kernel memory in bytes */
-
extern void tmpnode_init(struct tmount *, struct tmpnode *,
struct vattr *, struct cred *);
+extern void tmpnode_cleanup(struct tmpnode *tp);
extern int tmpnode_trunc(struct tmount *, struct tmpnode *, ulong_t);
extern void tmpnode_growmap(struct tmpnode *, ulong_t);
extern int tdirlookup(struct tmpnode *, char *, struct tmpnode **,
struct cred *);
extern int tdirdelete(struct tmpnode *, struct tmpnode *, char *,
enum dr_op, struct cred *);
-extern void tdirinit(struct tmpnode *, struct tmpnode *);
+extern int tdirinit(struct tmpnode *, struct tmpnode *);
extern void tdirtrunc(struct tmpnode *);
-extern void *tmp_memalloc(size_t, int);
-extern void tmp_memfree(void *, size_t);
extern int tmp_resv(struct tmount *, struct tmpnode *, size_t, int);
extern int tmp_taccess(void *, int, struct cred *);
extern int tmp_sticky_remove_access(struct tmpnode *, struct tmpnode *,
@@ -114,6 +115,9 @@ extern int tdirenter(struct tmount *, struct tmpnode *, char *,
enum de_op, struct tmpnode *, struct tmpnode *, struct vattr *,
struct tmpnode **, struct cred *, caller_context_t *);
+extern void *tmp_kmem_zalloc(struct tmount *, size_t, int);
+extern void tmp_kmem_free(struct tmount *, void *, size_t);
+
#define TMP_MUSTHAVE 0x01
#ifdef __cplusplus
diff --git a/usr/src/uts/common/sys/fss.h b/usr/src/uts/common/sys/fss.h
index 6168e9d9ed..87d798d6c1 100644
--- a/usr/src/uts/common/sys/fss.h
+++ b/usr/src/uts/common/sys/fss.h
@@ -160,6 +160,7 @@ typedef struct fsszone {
/*
* fss_flags
*/
+/* Formerly: FSSKPRI 0x01 - the thread is in kernel mode */
#define FSSBACKQ 0x02 /* thread should be placed at the back of */
/* the dispatch queue if preempted */
#define FSSRESTORE 0x04 /* thread was not preempted, due to schedctl */
diff --git a/usr/src/uts/common/sys/fx.h b/usr/src/uts/common/sys/fx.h
index 2d4e1aa7fb..4a48af52a1 100644
--- a/usr/src/uts/common/sys/fx.h
+++ b/usr/src/uts/common/sys/fx.h
@@ -21,13 +21,12 @@
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc.
*/
#ifndef _SYS_FX_H
#define _SYS_FX_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/types.h>
#include <sys/thread.h>
#include <sys/ddi.h>
@@ -145,7 +144,14 @@ typedef struct fxkparms {
uint_t fx_cflags;
} fxkparms_t;
+/*
+ * control flags (kparms->fx_cflags).
+ */
+#define FX_DOUPRILIM 0x01 /* change user priority limit */
+#define FX_DOUPRI 0x02 /* change user priority */
+#define FX_DOTQ 0x04 /* change FX time quantum */
+#define FXMAXUPRI 60 /* maximum user priority setting */
/*
* Interface for partner private code. This is not a public interface.
diff --git a/usr/src/uts/common/sys/gsqueue.h b/usr/src/uts/common/sys/gsqueue.h
new file mode 100644
index 0000000000..91ab46fc44
--- /dev/null
+++ b/usr/src/uts/common/sys/gsqueue.h
@@ -0,0 +1,59 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2017 Joyent, Inc.
+ */
+
+#ifndef _SYS_GSQUEUE_H
+#define _SYS_GSQUEUE_H
+
+/*
+ * Standard interfaces to serializaion queues for everyone (except IP).
+ */
+
+#include <sys/types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _KERNEL
+
+typedef struct gsqueue gsqueue_t;
+typedef struct gsqueue_set gsqueue_set_t;
+
+typedef void (*gsqueue_cb_f)(gsqueue_set_t *, gsqueue_t *, void *, boolean_t);
+typedef void (*gsqueue_proc_f)(void *, mblk_t *, gsqueue_t *, void *);
+
+extern gsqueue_set_t *gsqueue_set_create(pri_t);
+extern void gsqueue_set_destroy(gsqueue_set_t *);
+extern gsqueue_t *gsqueue_set_get(gsqueue_set_t *, uint_t);
+
+extern uintptr_t gsqueue_set_cb_add(gsqueue_set_t *, gsqueue_cb_f, void *);
+extern int gsqueue_set_cb_remove(gsqueue_set_t *, uintptr_t);
+
+#define GSQUEUE_FILL 0x0001
+#define GSQUEUE_NODRAIN 0x0002
+#define GSQUEUE_PROCESS 0x0004
+
+extern void gsqueue_enter_one(gsqueue_t *, mblk_t *, gsqueue_proc_f, void *,
+ int, uint8_t);
+
+#define GSQUEUE_DEFAULT_PRIORITY MAXCLSYSPRI
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_GSQUEUE_H */
diff --git a/usr/src/uts/common/sys/ia.h b/usr/src/uts/common/sys/ia.h
index 02dc29aaec..567c121bb0 100644
--- a/usr/src/uts/common/sys/ia.h
+++ b/usr/src/uts/common/sys/ia.h
@@ -86,6 +86,7 @@ typedef struct iaproc {
/* flags */
+/* Formerly: IAKPRI 0x01 - thread at kernel model priority */
#define IABACKQ 0x02 /* thread goes to back of disp q when preempted */
#define IASLEPT 0x04 /* thread had long-term suspend - give new slice */
diff --git a/usr/src/uts/common/sys/inotify.h b/usr/src/uts/common/sys/inotify.h
new file mode 100644
index 0000000000..8acc1a7280
--- /dev/null
+++ b/usr/src/uts/common/sys/inotify.h
@@ -0,0 +1,153 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Header file to support for the inotify facility. Note that this facility
+ * is designed to be binary compatible with the Linux inotify facility; values
+ * for constants here should therefore exactly match those found in Linux, and
+ * this facility shouldn't be extended independently of Linux.
+ */
+
+#ifndef _SYS_INOTIFY_H
+#define _SYS_INOTIFY_H
+
+#include <sys/types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Events that can be explicitly requested on any inotify watch.
+ */
+#define IN_ACCESS 0x00000001
+#define IN_MODIFY 0x00000002
+#define IN_ATTRIB 0x00000004
+#define IN_CLOSE_WRITE 0x00000008
+#define IN_CLOSE_NOWRITE 0x00000010
+#define IN_OPEN 0x00000020
+#define IN_MOVED_FROM 0x00000040
+#define IN_MOVED_TO 0x00000080
+#define IN_CREATE 0x00000100
+#define IN_DELETE 0x00000200
+#define IN_DELETE_SELF 0x00000400
+#define IN_MOVE_SELF 0x00000800
+
+/*
+ * Events that can be sent to an inotify watch -- requested or not.
+ */
+#define IN_UNMOUNT 0x00002000
+#define IN_Q_OVERFLOW 0x00004000
+#define IN_IGNORED 0x00008000
+
+/*
+ * Flags that can modify an inotify event.
+ */
+#define IN_ONLYDIR 0x01000000
+#define IN_DONT_FOLLOW 0x02000000
+#define IN_EXCL_UNLINK 0x04000000
+#define IN_MASK_ADD 0x20000000
+#define IN_ISDIR 0x40000000
+#define IN_ONESHOT 0x80000000
+
+/*
+ * Helpful constants.
+ */
+#define IN_CLOSE (IN_CLOSE_WRITE | IN_CLOSE_NOWRITE)
+#define IN_MOVE (IN_MOVED_FROM | IN_MOVED_TO)
+#define IN_ALL_EVENTS \
+ (IN_ACCESS | IN_MODIFY | IN_ATTRIB | IN_CLOSE_WRITE | \
+ IN_CLOSE_NOWRITE | IN_OPEN | IN_MOVED_FROM | IN_MOVED_TO | \
+ IN_DELETE | IN_CREATE | IN_DELETE_SELF | IN_MOVE_SELF)
+
+#define IN_CHILD_EVENTS \
+ (IN_ACCESS | IN_MODIFY | IN_ATTRIB | IN_CLOSE_WRITE | \
+ IN_CLOSE_NOWRITE | IN_MODIFY | IN_OPEN)
+
+/*
+ * To assure binary compatibility with Linux, these values are fixed at their
+ * Linux equivalents, not their native ones.
+ */
+#define IN_CLOEXEC 02000000 /* LX_O_CLOEXEC */
+#define IN_NONBLOCK 04000 /* LX_O_NONBLOCK */
+
+struct inotify_event {
+ int32_t wd; /* watch descriptor */
+ uint32_t mask; /* mask of events */
+ uint32_t cookie; /* event association cookie, if any */
+ uint32_t len; /* size of name field */
+ char name[]; /* optional NUL-terminated name */
+};
+
+/*
+ * These ioctl values are specific to the native implementation; applications
+ * shouldn't be using them directly, and they should therefore be safe to
+ * change without breaking apps.
+ */
+#define INOTIFYIOC (('i' << 24) | ('n' << 16) | ('y' << 8))
+#define INOTIFYIOC_ADD_WATCH (INOTIFYIOC | 1) /* add watch */
+#define INOTIFYIOC_RM_WATCH (INOTIFYIOC | 2) /* remove watch */
+#define INOTIFYIOC_ADD_CHILD (INOTIFYIOC | 3) /* add child watch */
+#define INOTIFYIOC_ACTIVATE (INOTIFYIOC | 4) /* activate watch */
+
+#ifndef _LP64
+#ifndef _LITTLE_ENDIAN
+#define INOTIFY_PTR(type, name) uint32_t name##pad; type *name
+#else
+#define INOTIFY_PTR(type, name) type *name; uint32_t name##pad
+#endif
+#else
+#define INOTIFY_PTR(type, name) type *name
+#endif
+
+typedef struct inotify_addwatch {
+ int inaw_fd; /* open fd for object */
+ uint32_t inaw_mask; /* desired mask */
+} inotify_addwatch_t;
+
+typedef struct inotify_addchild {
+ INOTIFY_PTR(char, inac_name); /* pointer to name */
+ int inac_fd; /* open fd for parent */
+} inotify_addchild_t;
+
+#ifndef _KERNEL
+
+extern int inotify_init(void);
+extern int inotify_init1(int);
+extern int inotify_add_watch(int, const char *, uint32_t);
+extern int inotify_rm_watch(int, int);
+
+#else
+
+#define IN_UNMASKABLE \
+ (IN_UNMOUNT | IN_Q_OVERFLOW | IN_IGNORED | IN_ISDIR)
+
+#define IN_MODIFIERS \
+ (IN_EXCL_UNLINK | IN_ONESHOT)
+
+#define IN_FLAGS \
+ (IN_ONLYDIR | IN_DONT_FOLLOW | IN_MASK_ADD)
+
+#define IN_REMOVAL (1ULL << 32)
+#define INOTIFYMNRN_INOTIFY 0
+#define INOTIFYMNRN_CLONE 1
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_INOTIFY_H */
diff --git a/usr/src/uts/common/sys/ipc_impl.h b/usr/src/uts/common/sys/ipc_impl.h
index 0569c3e967..d7dc365c09 100644
--- a/usr/src/uts/common/sys/ipc_impl.h
+++ b/usr/src/uts/common/sys/ipc_impl.h
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2016, Joyent, Inc.
*/
#ifndef _IPC_IMPL_H
@@ -226,6 +227,7 @@ int ipc_commit_begin(ipc_service_t *, key_t, int, kipc_perm_t *);
kmutex_t *ipc_commit_end(ipc_service_t *, kipc_perm_t *);
void ipc_cleanup(ipc_service_t *, kipc_perm_t *);
+void ipc_rmsvc(ipc_service_t *, kipc_perm_t *);
int ipc_rmid(ipc_service_t *, int, cred_t *);
int ipc_ids(ipc_service_t *, int *, uint_t, uint_t *);
diff --git a/usr/src/uts/common/sys/ipd.h b/usr/src/uts/common/sys/ipd.h
index bad74f8b81..f21c3fb5af 100644
--- a/usr/src/uts/common/sys/ipd.h
+++ b/usr/src/uts/common/sys/ipd.h
@@ -20,7 +20,7 @@
*/
/*
- * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2018, Joyent, Inc. All rights reserved.
*/
/*
@@ -35,7 +35,7 @@ extern "C" {
#endif
#define IPD_DEV_PATH "/dev/ipd"
-#define IPD_MAX_DELAY 10000 /* 10 ms in us */
+#define IPD_MAX_DELAY 1000000 /* 1 second in microseconds */
typedef struct ipd_ioc_perturb {
zoneid_t ipip_zoneid;
diff --git a/usr/src/uts/common/sys/iso/signal_iso.h b/usr/src/uts/common/sys/iso/signal_iso.h
index bf89ef0d33..0a76ee19a7 100644
--- a/usr/src/uts/common/sys/iso/signal_iso.h
+++ b/usr/src/uts/common/sys/iso/signal_iso.h
@@ -22,6 +22,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2015, Joyent, Inc.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -95,7 +96,7 @@ extern "C" {
/* insert new signals here, and move _SIGRTM* appropriately */
#define _SIGRTMIN 42 /* first (highest-priority) realtime signal */
-#define _SIGRTMAX 73 /* last (lowest-priority) realtime signal */
+#define _SIGRTMAX 74 /* last (lowest-priority) realtime signal */
extern long _sysconf(int); /* System Private interface to sysconf() */
#define SIGRTMIN ((int)_sysconf(_SC_SIGRT_MIN)) /* first realtime signal */
#define SIGRTMAX ((int)_sysconf(_SC_SIGRT_MAX)) /* last realtime signal */
diff --git a/usr/src/uts/common/sys/klwp.h b/usr/src/uts/common/sys/klwp.h
index 41b70f6a6e..0ea1a396b9 100644
--- a/usr/src/uts/common/sys/klwp.h
+++ b/usr/src/uts/common/sys/klwp.h
@@ -24,7 +24,7 @@
*/
/*
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright 2016 Joyent, Inc.
*/
#ifndef _SYS_KLWP_H
@@ -191,7 +191,14 @@ typedef struct _klwp {
struct ct_template *lwp_ct_active[CTT_MAXTYPE]; /* active templates */
struct contract *lwp_ct_latest[CTT_MAXTYPE]; /* last created contract */
- void *lwp_brand; /* per-lwp brand data */
+ /*
+ * Branding:
+ * lwp_brand - per-lwp brand data
+ * lwp_brand_syscall - brand syscall interposer
+ */
+ void *lwp_brand;
+ int (*lwp_brand_syscall)(void);
+
struct psinfo *lwp_spymaster; /* if an agent LWP, our spymaster */
} klwp_t;
diff --git a/usr/src/uts/common/sys/mac_client.h b/usr/src/uts/common/sys/mac_client.h
index 1d1915a816..8fff314bfe 100644
--- a/usr/src/uts/common/sys/mac_client.h
+++ b/usr/src/uts/common/sys/mac_client.h
@@ -116,6 +116,7 @@ typedef enum {
#define MAC_PROMISC_FLAGS_NO_PHYS 0x0002
#define MAC_PROMISC_FLAGS_VLAN_TAG_STRIP 0x0004
#define MAC_PROMISC_FLAGS_NO_COPY 0x0008
+#define MAC_PROMISC_FLAGS_DO_FIXUPS 0x0010
/* flags passed to mac_tx() */
#define MAC_DROP_ON_NO_DESC 0x01 /* freemsg() if no tx descs */
diff --git a/usr/src/uts/common/sys/mac_client_impl.h b/usr/src/uts/common/sys/mac_client_impl.h
index 0e3a6306e0..0f8be50fde 100644
--- a/usr/src/uts/common/sys/mac_client_impl.h
+++ b/usr/src/uts/common/sys/mac_client_impl.h
@@ -83,6 +83,7 @@ typedef struct mac_promisc_impl_s { /* Protected by */
boolean_t mpi_no_phys; /* WO */
boolean_t mpi_strip_vlan_tag; /* WO */
boolean_t mpi_no_copy; /* WO */
+ boolean_t mpi_do_fixups; /* WO */
} mac_promisc_impl_t;
typedef union mac_tx_percpu_s {
diff --git a/usr/src/uts/common/sys/mac_client_priv.h b/usr/src/uts/common/sys/mac_client_priv.h
index 01cb27644c..97b3fd685a 100644
--- a/usr/src/uts/common/sys/mac_client_priv.h
+++ b/usr/src/uts/common/sys/mac_client_priv.h
@@ -58,6 +58,9 @@ extern const mac_info_t *mac_info(mac_handle_t);
extern boolean_t mac_info_get(const char *, mac_info_t *);
extern boolean_t mac_promisc_get(mac_handle_t);
+extern boolean_t mac_protect_check_addr(mac_client_handle_t, boolean_t,
+ in6_addr_t *);
+
extern int mac_start(mac_handle_t);
extern void mac_stop(mac_handle_t);
diff --git a/usr/src/uts/common/sys/mac_flow.h b/usr/src/uts/common/sys/mac_flow.h
index 04aa8be3f3..a9a2a5f61e 100644
--- a/usr/src/uts/common/sys/mac_flow.h
+++ b/usr/src/uts/common/sys/mac_flow.h
@@ -22,7 +22,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright 2013 Joyent, Inc. All rights reserved.
+ * Copyright 2017 Joyent, Inc. All rights reserved.
* Copyright 2020 RackTop Systems, Inc.
*/
@@ -156,6 +156,14 @@ typedef enum {
#define MPT_MAXIPADDR MPT_MAXCNT
#define MPT_MAXCID MPT_MAXCNT
#define MPT_MAXCIDLEN 256
+#define MPT_FALSE 0x00000000
+#define MPT_TRUE 0x00000001
+
+/* Dynamic address detection types */
+#define MPT_DYN_DHCPV4 0x00000001
+#define MPT_DYN_DHCPV6 0x00000002
+#define MPT_DYN_SLAAC 0x00000004
+#define MPT_DYN_ALL 0x00000007
typedef struct mac_ipaddr_s {
uint32_t ip_version;
@@ -176,11 +184,13 @@ typedef struct mac_dhcpcid_s {
} mac_dhcpcid_t;
typedef struct mac_protect_s {
- uint32_t mp_types;
- uint32_t mp_ipaddrcnt;
- mac_ipaddr_t mp_ipaddrs[MPT_MAXIPADDR];
- uint32_t mp_cidcnt;
- mac_dhcpcid_t mp_cids[MPT_MAXCID];
+ uint32_t mp_types; /* Enabled protection types */
+ uint32_t mp_ipaddrcnt; /* Count of allowed IPs */
+ mac_ipaddr_t mp_ipaddrs[MPT_MAXIPADDR]; /* Allowed IPs */
+ uint32_t mp_cidcnt; /* Count of allowed DHCP CIDs */
+ mac_dhcpcid_t mp_cids[MPT_MAXCID]; /* Allowed DHCP CIDs */
+ uint32_t mp_allcids; /* Whether to allow all CIDs through */
+ uint32_t mp_dynamic; /* Enabled dynamic address methods */
} mac_protect_t;
/* The default priority for links */
diff --git a/usr/src/uts/common/sys/mman.h b/usr/src/uts/common/sys/mman.h
index 11fa46e571..6906cb3dbf 100644
--- a/usr/src/uts/common/sys/mman.h
+++ b/usr/src/uts/common/sys/mman.h
@@ -337,6 +337,7 @@ struct memcntl_mha32 {
#define MS_SYNC 0x4 /* wait for msync */
#define MS_ASYNC 0x1 /* return immediately */
#define MS_INVALIDATE 0x2 /* invalidate caches */
+#define MS_INVALCURPROC 0x8 /* invalidate cache for curproc only */
#if !defined(_STRICT_POSIX) || (_POSIX_C_SOURCE > 2) || defined(_XPG5)
/* flags to mlockall */
diff --git a/usr/src/uts/common/sys/mntent.h b/usr/src/uts/common/sys/mntent.h
index 88c98dc5a4..7196f7b3ac 100644
--- a/usr/src/uts/common/sys/mntent.h
+++ b/usr/src/uts/common/sys/mntent.h
@@ -21,6 +21,7 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2012, Joyent, Inc. All rights reserved.
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
*
* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T
@@ -47,6 +48,7 @@ extern "C" {
#define MNTTYPE_PCFS "pcfs" /* PC (MSDOS) file system */
#define MNTTYPE_PC MNTTYPE_PCFS /* Deprecated name; use MNTTYPE_PCFS */
#define MNTTYPE_LOFS "lofs" /* Loop back file system */
+#define MNTTYPE_HYPRLOFS "hyprlofs" /* Hyperlofs file system */
#define MNTTYPE_LO MNTTYPE_LOFS /* Deprecated name; use MNTTYPE_LOFS */
#define MNTTYPE_HSFS "hsfs" /* High Sierra (9660) file system */
#define MNTTYPE_SWAP "swap" /* Swap file system */
diff --git a/usr/src/uts/common/sys/netconfig.h b/usr/src/uts/common/sys/netconfig.h
index 6407534a3b..658f9f3f6b 100644
--- a/usr/src/uts/common/sys/netconfig.h
+++ b/usr/src/uts/common/sys/netconfig.h
@@ -28,6 +28,7 @@
*
* Copyright 2004 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc.
*/
#ifndef _SYS_NETCONFIG_H
diff --git a/usr/src/uts/common/sys/neti.h b/usr/src/uts/common/sys/neti.h
index e7027f8ece..92bd5b897d 100644
--- a/usr/src/uts/common/sys/neti.h
+++ b/usr/src/uts/common/sys/neti.h
@@ -48,6 +48,8 @@ struct msgb; /* avoiding sys/stream.h here */
#define NHF_INET "NHF_INET"
#define NHF_INET6 "NHF_INET6"
#define NHF_ARP "NHF_ARP"
+#define NHF_VND_INET "NHF_VND_INET"
+#define NHF_VND_INET6 "NHF_VND_INET6"
#define NHF_VIONA "NHF_VIONA"
/*
diff --git a/usr/src/uts/common/sys/netstack.h b/usr/src/uts/common/sys/netstack.h
index 7ee33318cd..b327e69fad 100644
--- a/usr/src/uts/common/sys/netstack.h
+++ b/usr/src/uts/common/sys/netstack.h
@@ -88,7 +88,8 @@ typedef id_t netstackid_t;
#define NS_IPSECESP 16
#define NS_IPNET 17
#define NS_ILB 18
-#define NS_MAX (NS_ILB+1)
+#define NS_VND 19
+#define NS_MAX (NS_VND+1)
/*
* State maintained for each module which tracks the state of
diff --git a/usr/src/uts/common/sys/param.h b/usr/src/uts/common/sys/param.h
index 282d84b912..66bd91f76f 100644
--- a/usr/src/uts/common/sys/param.h
+++ b/usr/src/uts/common/sys/param.h
@@ -116,7 +116,7 @@ extern "C" {
#define DEFAULT_MAXPID 999999
#define DEFAULT_JUMPPID 100000
#else
-#define DEFAULT_MAXPID 30000
+#define DEFAULT_MAXPID 99999
#define DEFAULT_JUMPPID 0
#endif
diff --git a/usr/src/uts/common/sys/policy.h b/usr/src/uts/common/sys/policy.h
index d8983a28c4..9f1b80d390 100644
--- a/usr/src/uts/common/sys/policy.h
+++ b/usr/src/uts/common/sys/policy.h
@@ -108,6 +108,7 @@ int secpolicy_ipc_owner(const cred_t *, const struct kipc_perm *);
int secpolicy_kmdb(const cred_t *);
int secpolicy_lock_memory(const cred_t *);
int secpolicy_meminfo(const cred_t *);
+int secpolicy_fs_import(const cred_t *);
int secpolicy_modctl(const cred_t *, int);
int secpolicy_net(const cred_t *, int, boolean_t);
int secpolicy_net_bindmlp(const cred_t *);
@@ -176,6 +177,7 @@ int secpolicy_setid_setsticky_clear(vnode_t *, vattr_t *,
const vattr_t *, cred_t *);
int secpolicy_xvattr(xvattr_t *, uid_t, cred_t *, vtype_t);
int secpolicy_xvm_control(const cred_t *);
+int secpolicy_hyprlofs_control(const cred_t *);
int secpolicy_basic_exec(const cred_t *, vnode_t *);
int secpolicy_basic_fork(const cred_t *);
diff --git a/usr/src/uts/common/sys/poll_impl.h b/usr/src/uts/common/sys/poll_impl.h
index ff277f89c8..388849a14f 100644
--- a/usr/src/uts/common/sys/poll_impl.h
+++ b/usr/src/uts/common/sys/poll_impl.h
@@ -141,6 +141,7 @@ struct pollstate {
pollstate_t *ps_contend_nextp; /* next in contender list */
pollstate_t **ps_contend_pnextp; /* pointer-to-previous-next */
int ps_flags; /* state flags */
+ short ps_implicit_ev; /* implicit poll event interest */
};
/* pollstate flags */
diff --git a/usr/src/uts/common/sys/proc.h b/usr/src/uts/common/sys/proc.h
index 06e5a8caf4..d05886d1fc 100644
--- a/usr/src/uts/common/sys/proc.h
+++ b/usr/src/uts/common/sys/proc.h
@@ -357,6 +357,7 @@ typedef struct proc {
struct zone *p_zone; /* zone in which process lives */
struct vnode *p_execdir; /* directory that p_exec came from */
struct brand *p_brand; /* process's brand */
+
void *p_brand_data; /* per-process brand state */
psecflags_t p_secflags; /* per-process security flags */
@@ -373,7 +374,6 @@ typedef struct proc {
*/
struct user p_user; /* (see sys/user.h) */
} proc_t;
-
#define PROC_T /* headers relying on proc_t are OK */
#ifdef _KERNEL
@@ -647,6 +647,7 @@ extern int signal_is_blocked(kthread_t *, int);
extern int sigcheck(proc_t *, kthread_t *);
extern void sigdefault(proc_t *);
+extern struct pid *pid_find(pid_t pid);
extern void pid_setmin(void);
extern pid_t pid_allocate(proc_t *, pid_t, int);
extern int pid_rele(struct pid *);
@@ -662,6 +663,7 @@ extern int sprtrylock_proc(proc_t *);
extern void sprwaitlock_proc(proc_t *);
extern void sprlock_proc(proc_t *);
extern void sprunlock(proc_t *);
+extern void sprunprlock(proc_t *);
extern void pid_init(void);
extern proc_t *pid_entry(int);
extern int pid_slot(proc_t *);
@@ -753,6 +755,10 @@ extern kthread_t *thread_unpin(void);
extern void thread_init(void);
extern void thread_load(kthread_t *, void (*)(), caddr_t, size_t);
+extern void thread_splitstack(void (*)(void *), void *, size_t);
+extern void thread_splitstack_run(caddr_t, void (*)(void *), void *);
+extern void thread_splitstack_cleanup(void);
+
extern void tsd_create(uint_t *, void (*)(void *));
extern void tsd_destroy(uint_t *);
extern void *tsd_getcreate(uint_t *, void (*)(void *), void *(*)(void));
@@ -794,7 +800,7 @@ extern void pokelwps(proc_t *);
extern void continuelwps(proc_t *);
extern int exitlwps(int);
extern void lwp_ctmpl_copy(klwp_t *, klwp_t *);
-extern void lwp_ctmpl_clear(klwp_t *);
+extern void lwp_ctmpl_clear(klwp_t *, boolean_t);
extern klwp_t *forklwp(klwp_t *, proc_t *, id_t);
extern void lwp_load(klwp_t *, gregset_t, uintptr_t);
extern void lwp_setrval(klwp_t *, int, int);
diff --git a/usr/src/uts/common/sys/procfs.h b/usr/src/uts/common/sys/procfs.h
index 00ba23594e..3d6760a7b4 100644
--- a/usr/src/uts/common/sys/procfs.h
+++ b/usr/src/uts/common/sys/procfs.h
@@ -25,7 +25,7 @@
*/
/*
* Copyright 2012 DEY Storage Systems, Inc. All rights reserved.
- * Copyright 2018 Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
* Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
*/
@@ -237,6 +237,7 @@ typedef struct pstatus {
#define PR_FAULTED 6
#define PR_SUSPENDED 7
#define PR_CHECKPOINT 8
+#define PR_BRAND 9
/*
* lwp ps(1) information file. /proc/<pid>/lwp/<lwpid>/lwpsinfo
@@ -271,10 +272,12 @@ typedef struct lwpsinfo {
int pr_filler[4]; /* reserved for future use */
} lwpsinfo_t;
+#define PRARGSZ 80 /* number of chars of arguments */
+#define PRMAXARGVLEN 4096 /* max len of /proc/%s/argv */
+
/*
* process ps(1) information file. /proc/<pid>/psinfo
*/
-#define PRARGSZ 80 /* number of chars of arguments */
typedef struct psinfo {
int pr_flag; /* process flags (DEPRECATED; do not use) */
int pr_nlwp; /* number of active lwps in the process */
diff --git a/usr/src/uts/common/sys/ptms.h b/usr/src/uts/common/sys/ptms.h
index 23594fdc13..52d69b3416 100644
--- a/usr/src/uts/common/sys/ptms.h
+++ b/usr/src/uts/common/sys/ptms.h
@@ -125,6 +125,12 @@ extern void ptms_logp(char *, uintptr_t);
#define DDBGP(a, b)
#endif
+typedef struct __ptmptsopencb_arg *ptmptsopencb_arg_t;
+typedef struct ptmptsopencb {
+ boolean_t (*ppocb_func)(ptmptsopencb_arg_t);
+ ptmptsopencb_arg_t ppocb_arg;
+} ptmptsopencb_t;
+
#endif /* _KERNEL */
typedef struct pt_own {
@@ -160,6 +166,19 @@ typedef struct pt_own {
#define ZONEPT (('P'<<8)|4) /* set zone of manager/subsidiary pair */
#define OWNERPT (('P'<<8)|5) /* set owner/group for subsidiary */
+#ifdef _KERNEL
+/*
+ * kernel ioctl commands
+ *
+ * PTMPTSOPENCB: Returns a callback function pointer and opaque argument.
+ * The return value of the callback function when it's invoked
+ * with the opaque argument passed to it will indicate if the
+ * pts slave device is currently open.
+ */
+#define PTMPTSOPENCB (('P'<<8)|6) /* check if the slave is open */
+
+#endif /* _KERNEL */
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/common/sys/refhash.h b/usr/src/uts/common/sys/refhash.h
index b7427a454d..469cb6d686 100644
--- a/usr/src/uts/common/sys/refhash.h
+++ b/usr/src/uts/common/sys/refhash.h
@@ -19,6 +19,10 @@
#include <sys/types.h>
#include <sys/list.h>
+#ifdef __cplusplus
+extern "C" {
+#endif
+
#define RHL_F_DEAD 0x01
typedef struct refhash_link {
@@ -58,4 +62,8 @@ extern void *refhash_first(refhash_t *);
extern void *refhash_next(refhash_t *, void *);
extern boolean_t refhash_obj_valid(refhash_t *hp, const void *);
+#ifdef __cplusplus
+}
+#endif
+
#endif /* _SYS_REFHASH_H */
diff --git a/usr/src/uts/common/sys/resource.h b/usr/src/uts/common/sys/resource.h
index 13166f378d..d65ca00f69 100644
--- a/usr/src/uts/common/sys/resource.h
+++ b/usr/src/uts/common/sys/resource.h
@@ -23,6 +23,7 @@
*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2017 Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
diff --git a/usr/src/uts/common/sys/rt.h b/usr/src/uts/common/sys/rt.h
index d4233aecb5..2ed7320a09 100644
--- a/usr/src/uts/common/sys/rt.h
+++ b/usr/src/uts/common/sys/rt.h
@@ -22,6 +22,7 @@
/*
* Copyright 2004 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -75,6 +76,16 @@ typedef struct rtkparms {
int rt_tqsig; /* real-time time quantum signal */
uint_t rt_cflags; /* real-time control flags */
} rtkparms_t;
+
+#define RTGPPRIO0 100 /* Global priority for RT priority 0 */
+
+/*
+ * control flags (kparms->rt_cflags).
+ */
+#define RT_DOPRI 0x01 /* change priority */
+#define RT_DOTQ 0x02 /* change RT time quantum */
+#define RT_DOSIG 0x04 /* change RT time quantum signal */
+
#endif /* _KERNEL */
#ifdef __cplusplus
diff --git a/usr/src/uts/common/sys/scsi/adapters/mpt_sas/mpi/mpi2_pci.h b/usr/src/uts/common/sys/scsi/adapters/mpt_sas/mpi/mpi2_pci.h
new file mode 100644
index 0000000000..afb7a94c58
--- /dev/null
+++ b/usr/src/uts/common/sys/scsi/adapters/mpt_sas/mpi/mpi2_pci.h
@@ -0,0 +1,147 @@
+/*-
+ * Copyright (c) 2012-2015 LSI Corp.
+ * Copyright (c) 2013-2016 Avago Technologies
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the author nor the names of any co-contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2000-2015 LSI Corporation.
+ * Copyright (c) 2013-2016 Avago Technologies
+ * All rights reserved.
+ *
+ *
+ * Name: mpi2_pci.h
+ * Title: MPI PCIe Attached Devices structures and definitions.
+ * Creation Date: October 9, 2012
+ *
+ * mpi2_pci.h Version: 02.00.02
+ *
+ * NOTE: Names (typedefs, defines, etc.) beginning with an MPI25 or Mpi25
+ * prefix are for use only on MPI v2.5 products, and must not be used
+ * with MPI v2.0 products. Unless otherwise noted, names beginning with
+ * MPI2 or Mpi2 are for use with both MPI v2.0 and MPI v2.5 products.
+ *
+ * Version History
+ * ---------------
+ *
+ * Date Version Description
+ * -------- -------- ------------------------------------------------------
+ * 03-16-15 02.00.00 Initial version.
+ * 02-17-16 02.00.01 Removed AHCI support.
+ * Removed SOP support.
+ * 07-01-16 02.00.02 Added MPI26_NVME_FLAGS_FORCE_ADMIN_ERR_RESP to
+ * NVME Encapsulated Request.
+ * --------------------------------------------------------------------------
+ */
+
+#ifndef MPI2_PCI_H
+#define MPI2_PCI_H
+
+
+/*
+ * Values for the PCIe DeviceInfo field used in PCIe Device Status Change Event
+ * data and PCIe Configuration pages.
+ */
+#define MPI26_PCIE_DEVINFO_DIRECT_ATTACH (0x00000010)
+
+#define MPI26_PCIE_DEVINFO_MASK_DEVICE_TYPE (0x0000000F)
+#define MPI26_PCIE_DEVINFO_NO_DEVICE (0x00000000)
+#define MPI26_PCIE_DEVINFO_PCI_SWITCH (0x00000001)
+#define MPI26_PCIE_DEVINFO_NVME (0x00000003)
+
+
+/****************************************************************************
+* NVMe Encapsulated message
+****************************************************************************/
+
+/* NVME Encapsulated Request Message */
+typedef struct _MPI26_NVME_ENCAPSULATED_REQUEST
+{
+ U16 DevHandle; /* 0x00 */
+ U8 ChainOffset; /* 0x02 */
+ U8 Function; /* 0x03 */
+ U16 EncapsulatedCommandLength; /* 0x04 */
+ U8 Reserved1; /* 0x06 */
+ U8 MsgFlags; /* 0x07 */
+ U8 VP_ID; /* 0x08 */
+ U8 VF_ID; /* 0x09 */
+ U16 Reserved2; /* 0x0A */
+ U32 Reserved3; /* 0x0C */
+ U64 ErrorResponseBaseAddress; /* 0x10 */
+ U16 ErrorResponseAllocationLength; /* 0x18 */
+ U16 Flags; /* 0x1A */
+ U32 DataLength; /* 0x1C */
+ U8 NVMe_Command[4]; /* 0x20 */ /* variable length */
+
+} MPI26_NVME_ENCAPSULATED_REQUEST, MPI2_POINTER PTR_MPI26_NVME_ENCAPSULATED_REQUEST,
+ Mpi26NVMeEncapsulatedRequest_t, MPI2_POINTER pMpi26NVMeEncapsulatedRequest_t;
+
+/* defines for the Flags field */
+#define MPI26_NVME_FLAGS_FORCE_ADMIN_ERR_RESP (0x0020)
+/* Submission Queue Type*/
+#define MPI26_NVME_FLAGS_SUBMISSIONQ_MASK (0x0010)
+#define MPI26_NVME_FLAGS_SUBMISSIONQ_IO (0x0000)
+#define MPI26_NVME_FLAGS_SUBMISSIONQ_ADMIN (0x0010)
+/* Error Response Address Space */
+#define MPI26_NVME_FLAGS_MASK_ERROR_RSP_ADDR (0x000C)
+#define MPI26_NVME_FLAGS_SYSTEM_RSP_ADDR (0x0000)
+#define MPI26_NVME_FLAGS_IOCPLB_RSP_ADDR (0x0008)
+#define MPI26_NVME_FLAGS_IOCPLBNTA_RSP_ADDR (0x000C)
+/* Data Direction*/
+#define MPI26_NVME_FLAGS_DATADIRECTION_MASK (0x0003)
+#define MPI26_NVME_FLAGS_NODATATRANSFER (0x0000)
+#define MPI26_NVME_FLAGS_WRITE (0x0001)
+#define MPI26_NVME_FLAGS_READ (0x0002)
+#define MPI26_NVME_FLAGS_BIDIRECTIONAL (0x0003)
+
+
+/* NVMe Encapuslated Reply Message */
+typedef struct _MPI26_NVME_ENCAPSULATED_ERROR_REPLY
+{
+ U16 DevHandle; /* 0x00 */
+ U8 MsgLength; /* 0x02 */
+ U8 Function; /* 0x03 */
+ U16 EncapsulatedCommandLength; /* 0x04 */
+ U8 Reserved1; /* 0x06 */
+ U8 MsgFlags; /* 0x07 */
+ U8 VP_ID; /* 0x08 */
+ U8 VF_ID; /* 0x09 */
+ U16 Reserved2; /* 0x0A */
+ U16 Reserved3; /* 0x0C */
+ U16 IOCStatus; /* 0x0E */
+ U32 IOCLogInfo; /* 0x10 */
+ U16 ErrorResponseCount; /* 0x14 */
+ U16 Reserved4; /* 0x16 */
+} MPI26_NVME_ENCAPSULATED_ERROR_REPLY,
+ MPI2_POINTER PTR_MPI26_NVME_ENCAPSULATED_ERROR_REPLY,
+ Mpi26NVMeEncapsulatedErrorReply_t,
+ MPI2_POINTER pMpi26NVMeEncapsulatedErrorReply_t;
+
+
+#endif
+
+
diff --git a/usr/src/uts/common/sys/scsi/generic/inquiry.h b/usr/src/uts/common/sys/scsi/generic/inquiry.h
index ddfd683169..fcbf00d5dc 100644
--- a/usr/src/uts/common/sys/scsi/generic/inquiry.h
+++ b/usr/src/uts/common/sys/scsi/generic/inquiry.h
@@ -362,7 +362,8 @@ struct scsi_inquiry {
#define DTYPE_NOTPRESENT (DPQ_NEVER | DTYPE_UNKNOWN)
/*
- * Defined Response Data Formats:
+ * Defined Versions for inquiry data. These represent the base version that a
+ * device supports.
*/
#define RDF_LEVEL0 0x00 /* no conformance claim (SCSI-1) */
#define RDF_CCS 0x01 /* Obsolete (pseudo-spec) */
@@ -370,7 +371,8 @@ struct scsi_inquiry {
#define RDF_SCSI_SPC 0x03 /* ANSI INCITS 301-1997 (SPC) */
#define RDF_SCSI_SPC2 0x04 /* ANSI INCITS 351-2001 (SPC-2) */
#define RDF_SCSI_SPC3 0x05 /* ANSI INCITS 408-2005 (SPC-3) */
-#define RDF_SCSI_SPC4 0x06 /* t10 (SPC-4) */
+#define RDF_SCSI_SPC4 0x06 /* ANSI INCITS 513-2015 (SPC-4) */
+#define RDF_SCSI_SPC5 0x07 /* t10 (SPC-5) */
/*
* Defined Target Port Group Select values:
@@ -436,6 +438,7 @@ struct vpd_desc {
#define PM_CAPABLE_SPC2 RDF_SCSI_SPC2
#define PM_CAPABLE_SPC3 RDF_SCSI_SPC3
#define PM_CAPABLE_SPC4 RDF_SCSI_SPC4
+#define PM_CAPABLE_SPC5 RDF_SCSI_SPC5
#define PM_CAPABLE_LOG_MASK 0xffff0000 /* use upper 16 bit to */
/* indicate log specifics */
#define PM_CAPABLE_LOG_SUPPORTED 0x10000 /* Log page 0xE might be */
diff --git a/usr/src/uts/common/sys/scsi/targets/sddef.h b/usr/src/uts/common/sys/scsi/targets/sddef.h
index d28918d9c5..bb522141af 100644
--- a/usr/src/uts/common/sys/scsi/targets/sddef.h
+++ b/usr/src/uts/common/sys/scsi/targets/sddef.h
@@ -763,6 +763,12 @@ _NOTE(MUTEX_PROTECTS_DATA(sd_lun::un_fi_mutex,
#define SD_FM_LOG(un) (((struct sd_fm_internal *)\
((un)->un_fm_private))->fm_log_level)
+/*
+ * Version Related Macros
+ */
+#define SD_SCSI_VERS_IS_GE_SPC_4(un) \
+ (SD_INQUIRY(un)->inq_ansi == RDF_SCSI_SPC4 || \
+ SD_INQUIRY(un)->inq_ansi == RDF_SCSI_SPC5)
/*
* Values for un_ctype
@@ -1821,6 +1827,10 @@ struct sd_fm_internal {
#define SD_PM_CAPABLE_IS_SPC_4(pm_cap) \
((pm_cap & PM_CAPABLE_PM_MASK) == PM_CAPABLE_SPC4)
+#define SD_PM_CAPABLE_IS_GE_SPC_4(pm_cap) \
+ (((pm_cap & PM_CAPABLE_PM_MASK) == PM_CAPABLE_SPC4) || \
+ ((pm_cap & PM_CAPABLE_PM_MASK) == PM_CAPABLE_SPC5))
+
#define SD_PM_CAP_LOG_SUPPORTED(pm_cap) \
((pm_cap & PM_CAPABLE_LOG_SUPPORTED) ? TRUE : FALSE)
diff --git a/usr/src/uts/common/sys/shm.h b/usr/src/uts/common/sys/shm.h
index 0219fc2cf7..8f530afda2 100644
--- a/usr/src/uts/common/sys/shm.h
+++ b/usr/src/uts/common/sys/shm.h
@@ -21,6 +21,7 @@
*/
/*
* Copyright 2014 Garrett D'Amore <garrett@damore.org>
+ * Copyright 2016 Joyent, Inc.
*
* Copyright 2003 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
@@ -120,6 +121,10 @@ struct shmid_ds {
#define SHM_LOCK 3 /* Lock segment in core */
#define SHM_UNLOCK 4 /* Unlock segment */
+#if defined(_KERNEL)
+#define SHM_RMID 5 /* Private RMID for lx support */
+#endif
+
#if !defined(_KERNEL)
int shmget(key_t, size_t, int);
int shmids(int *, uint_t, uint_t *);
diff --git a/usr/src/uts/common/sys/shm_impl.h b/usr/src/uts/common/sys/shm_impl.h
index 4d8cdcede5..1eae2ca0a4 100644
--- a/usr/src/uts/common/sys/shm_impl.h
+++ b/usr/src/uts/common/sys/shm_impl.h
@@ -21,13 +21,12 @@
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2016 Joyent, Inc.
*/
#ifndef _SYS_SHM_IMPL_H
#define _SYS_SHM_IMPL_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/ipc_impl.h>
#if defined(_KERNEL) || defined(_KMEMUSER)
#include <sys/shm.h>
@@ -70,7 +69,11 @@ typedef struct kshmid {
time_t shm_ctime; /* last change time */
struct sptinfo *shm_sptinfo; /* info about ISM segment */
struct seg *shm_sptseg; /* pointer to ISM segment */
- long shm_sptprot; /* was reserved (still a "long") */
+ ulong_t shm_opts;
+ /*
+ * Composed of: sptprot (uchar_t) and
+ * RM_PENDING flag (1 bit).
+ */
} kshmid_t;
/*
@@ -78,6 +81,14 @@ typedef struct kshmid {
*/
#define SHMSA_ISM 1 /* uses shared page table */
+/*
+ * shm_opts definitions
+ * Low byte in shm_opts is used for sptprot (see PROT_ALL). The upper bits are
+ * used for additional options.
+ */
+#define SHM_PROT_MASK 0xff
+#define SHM_RM_PENDING 0x100
+
typedef struct sptinfo {
struct as *sptas; /* dummy as ptr. for spt segment */
} sptinfo_t;
diff --git a/usr/src/uts/common/sys/signal.h b/usr/src/uts/common/sys/signal.h
index aece147bec..b12dff6034 100644
--- a/usr/src/uts/common/sys/signal.h
+++ b/usr/src/uts/common/sys/signal.h
@@ -22,6 +22,7 @@
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2015, Joyent, Inc.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -158,8 +159,8 @@ struct sigaction32 {
* use of these symbols by applications is injurious
* to binary compatibility
*/
-#define NSIG 74 /* valid signals range from 1 to NSIG-1 */
-#define MAXSIG 73 /* size of u_signal[], NSIG-1 <= MAXSIG */
+#define NSIG 75 /* valid signals range from 1 to NSIG-1 */
+#define MAXSIG 74 /* size of u_signal[], NSIG-1 <= MAXSIG */
#endif /* defined(__EXTENSIONS__) || !defined(_XPG4_2) */
#define MINSIGSTKSZ 2048
diff --git a/usr/src/uts/common/sys/socket.h b/usr/src/uts/common/sys/socket.h
index 9e61bc7bb0..25880522e9 100644
--- a/usr/src/uts/common/sys/socket.h
+++ b/usr/src/uts/common/sys/socket.h
@@ -40,6 +40,9 @@
/* Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved. */
+/*
+ * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ */
#ifndef _SYS_SOCKET_H
#define _SYS_SOCKET_H
@@ -205,6 +208,7 @@ struct so_snd_bufinfo {
#define SO_SRCADDR 0x2001 /* Internal: AF_UNIX source address */
#define SO_FILEP 0x2002 /* Internal: AF_UNIX file pointer */
#define SO_UNIX_CLOSE 0x2003 /* Internal: AF_UNIX peer closed */
+#define SO_REUSEPORT 0x2004 /* allow simultaneous port reuse */
#endif /* _KERNEL */
/*
@@ -304,8 +308,9 @@ struct linger {
#define AF_INET_OFFLOAD 30 /* Sun private; do not use */
#define AF_TRILL 31 /* TRILL interface */
#define AF_PACKET 32 /* PF_PACKET Linux socket interface */
+#define AF_LX_NETLINK 33 /* Linux-compatible netlink */
-#define AF_MAX 32
+#define AF_MAX 33
/*
* Protocol families, same as address families for now.
@@ -345,6 +350,7 @@ struct linger {
#define PF_INET_OFFLOAD AF_INET_OFFLOAD /* Sun private; do not use */
#define PF_TRILL AF_TRILL
#define PF_PACKET AF_PACKET
+#define PF_LX_NETLINK AF_LX_NETLINK
#define PF_MAX AF_MAX
diff --git a/usr/src/uts/common/sys/socketvar.h b/usr/src/uts/common/sys/socketvar.h
index 479641a11b..1e48b00dd7 100644
--- a/usr/src/uts/common/sys/socketvar.h
+++ b/usr/src/uts/common/sys/socketvar.h
@@ -304,15 +304,16 @@ struct sonode {
#define SS_OOBPEND 0x00002000 /* OOB pending or present - poll */
#define SS_HAVEOOBDATA 0x00004000 /* OOB data present */
#define SS_HADOOBDATA 0x00008000 /* OOB data consumed */
-#define SS_CLOSING 0x00010000 /* in process of closing */
+#define SS_CLOSING 0x00010000 /* in process of closing */
#define SS_FIL_DEFER 0x00020000 /* filter deferred notification */
#define SS_FILOP_OK 0x00040000 /* socket can attach filters */
#define SS_FIL_RCV_FLOWCTRL 0x00080000 /* filter asserted rcv flow ctrl */
+
#define SS_FIL_SND_FLOWCTRL 0x00100000 /* filter asserted snd flow ctrl */
#define SS_FIL_STOP 0x00200000 /* no more filter actions */
-
#define SS_SODIRECT 0x00400000 /* transport supports sodirect */
+#define SS_FILOP_UNSF 0x00800000 /* block attaching unsafe filters */
#define SS_SENTLASTREADSIG 0x01000000 /* last rx signal has been sent */
#define SS_SENTLASTWRITESIG 0x02000000 /* last tx signal has been sent */
@@ -328,7 +329,8 @@ struct sonode {
/*
* Sockets that can fall back to TPI must ensure that fall back is not
- * initiated while a thread is using a socket.
+ * initiated while a thread is using a socket. Otherwise this disables all
+ * future filter attachment.
*/
#define SO_BLOCK_FALLBACK(so, fn) \
ASSERT(MUTEX_NOT_HELD(&(so)->so_lock)); \
@@ -344,6 +346,24 @@ struct sonode {
} \
}
+/*
+ * Sockets that can fall back to TPI must ensure that fall back is not
+ * initiated while a thread is using a socket. Otherwise this disables all
+ * future unsafe filter attachment. Safe filters can still attach after
+ * we execute the function in which this macro is used.
+ */
+#define SO_BLOCK_FALLBACK_SAFE(so, fn) \
+ ASSERT(MUTEX_NOT_HELD(&(so)->so_lock)); \
+ rw_enter(&(so)->so_fallback_rwlock, RW_READER); \
+ if ((so)->so_state & SS_FALLBACK_COMP) { \
+ rw_exit(&(so)->so_fallback_rwlock); \
+ return (fn); \
+ } else if (((so)->so_state & SS_FILOP_UNSF) == 0) { \
+ mutex_enter(&(so)->so_lock); \
+ (so)->so_state |= SS_FILOP_UNSF; \
+ mutex_exit(&(so)->so_lock); \
+ }
+
#define SO_UNBLOCK_FALLBACK(so) { \
rw_exit(&(so)->so_fallback_rwlock); \
}
@@ -375,6 +395,7 @@ struct sonode {
/* The modes below are only for non-streams sockets */
#define SM_ACCEPTSUPP 0x400 /* can handle accept() */
#define SM_SENDFILESUPP 0x800 /* Private: proto supp sendfile */
+#define SM_DEFERERR 0x1000 /* Private: defer so_error delivery */
/*
* Socket versions. Used by the socket library when calling _so_socket().
diff --git a/usr/src/uts/common/sys/sockfilter.h b/usr/src/uts/common/sys/sockfilter.h
index 9f6d8b499b..c4dd6539de 100644
--- a/usr/src/uts/common/sys/sockfilter.h
+++ b/usr/src/uts/common/sys/sockfilter.h
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
*/
#ifndef _SYS_SOCKFILTER_H
@@ -129,6 +130,15 @@ typedef struct sof_ops {
#define SOF_VERSION 1
+/*
+ * Flag indicating that the filter module is safe to attach after bind,
+ * getsockname, getsockopt or setsockopt calls. By default filters are unsafe
+ * so may not be attached after any socket operation. However, a safe filter
+ * can still be attached after one of the above calls. This makes attaching
+ * the filter less dependent on the initial socket setup order.
+ */
+#define SOF_ATT_SAFE 0x1
+
extern int sof_register(int, const char *, const sof_ops_t *, int);
extern int sof_unregister(const char *);
diff --git a/usr/src/uts/common/sys/squeue.h b/usr/src/uts/common/sys/squeue.h
index 040963eef7..89b355970e 100644
--- a/usr/src/uts/common/sys/squeue.h
+++ b/usr/src/uts/common/sys/squeue.h
@@ -30,6 +30,17 @@
extern "C" {
#endif
+/*
+ * Originally in illumos, we had an IP-centric view of the serialization queue
+ * abstraction. While that has useful properties, the implementation of squeues
+ * hardcodes various parts of the implementation of IP into it which makes it
+ * unsuitable for other consumers. To enable them, we created another interface,
+ * but opted not to port all of the functionality that IP uses in the form of
+ * ip_squeue.c As other consumers need the functionality that IP has in squeues,
+ * then we'll come up with more genericized methods and add that functionality
+ * to <sys/gsqueue.h>. Please do not continue to use this header.
+ */
+
#include <sys/types.h>
#include <sys/processor.h>
#include <sys/stream.h>
@@ -77,12 +88,13 @@ typedef enum {
struct ip_recv_attr_s;
extern void squeue_init(void);
-extern squeue_t *squeue_create(pri_t);
+extern squeue_t *squeue_create(pri_t, boolean_t);
extern void squeue_bind(squeue_t *, processorid_t);
extern void squeue_unbind(squeue_t *);
extern void squeue_enter(squeue_t *, mblk_t *, mblk_t *,
uint32_t, struct ip_recv_attr_s *, int, uint8_t);
extern uintptr_t *squeue_getprivate(squeue_t *, sqprivate_t);
+extern void squeue_destroy(squeue_t *);
struct conn_s;
extern int squeue_synch_enter(struct conn_s *, mblk_t *);
diff --git a/usr/src/uts/common/sys/squeue_impl.h b/usr/src/uts/common/sys/squeue_impl.h
index 8eb6a30add..2bb717fb52 100644
--- a/usr/src/uts/common/sys/squeue_impl.h
+++ b/usr/src/uts/common/sys/squeue_impl.h
@@ -114,6 +114,7 @@ struct squeue_s {
squeue_set_t *sq_set; /* managed by squeue creator */
pri_t sq_priority; /* squeue thread priority */
+ boolean_t sq_isip; /* use IP-centric features */
/* Keep the debug-only fields at the end of the structure */
#ifdef DEBUG
@@ -161,6 +162,7 @@ struct squeue_s {
#define SQS_POLL_RESTART_DONE 0x01000000
#define SQS_POLL_THR_QUIESCE 0x02000000
#define SQS_PAUSE 0x04000000 /* The squeue has been paused */
+#define SQS_EXIT 0x08000000 /* squeue is being torn down */
#define SQS_WORKER_THR_CONTROL \
(SQS_POLL_QUIESCE | SQS_POLL_RESTART | SQS_POLL_CLEANUP)
diff --git a/usr/src/uts/common/sys/stream.h b/usr/src/uts/common/sys/stream.h
index ea2c3d8e9a..7d118b09e8 100644
--- a/usr/src/uts/common/sys/stream.h
+++ b/usr/src/uts/common/sys/stream.h
@@ -21,6 +21,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2015 Joyent, Inc. All rights reserved.
* Copyright 2018 OmniOS Community Edition (OmniOSce) Association.
* Copyright 2015 Joyent, Inc. All rights reserved.
* Copyright 2022 Garrett D'Amore
diff --git a/usr/src/uts/common/sys/sunddi.h b/usr/src/uts/common/sys/sunddi.h
index 442595289f..c0dedf555c 100644
--- a/usr/src/uts/common/sys/sunddi.h
+++ b/usr/src/uts/common/sys/sunddi.h
@@ -1599,8 +1599,14 @@ int
ddi_ffs(long mask);
int
+ddi_ffsll(long long mask);
+
+int
ddi_fls(long mask);
+int
+ddi_flsll(long long mask);
+
/*
* The ddi_soft_state* routines comprise generic storage management utilities
* for driver soft state structures. Two types of soft_state indexes are
diff --git a/usr/src/uts/common/sys/systrace.h b/usr/src/uts/common/sys/systrace.h
index d43974451e..17e509d4d8 100644
--- a/usr/src/uts/common/sys/systrace.h
+++ b/usr/src/uts/common/sys/systrace.h
@@ -22,13 +22,12 @@
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2014 Joyent, Inc. All rights reserved.
*/
#ifndef _SYS_SYSTRACE_H
#define _SYS_SYSTRACE_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/dtrace.h>
#ifdef __cplusplus
@@ -47,16 +46,18 @@ extern systrace_sysent_t *systrace_sysent;
extern systrace_sysent_t *systrace_sysent32;
extern void (*systrace_probe)(dtrace_id_t, uintptr_t, uintptr_t,
- uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+ uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
extern void systrace_stub(dtrace_id_t, uintptr_t, uintptr_t,
- uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+ uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
extern int64_t dtrace_systrace_syscall(uintptr_t arg0, uintptr_t arg1,
- uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, uintptr_t arg5);
+ uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, uintptr_t arg5,
+ uintptr_t arg6, uintptr_t arg7);
#ifdef _SYSCALL32_IMPL
extern int64_t dtrace_systrace_syscall32(uintptr_t arg0, uintptr_t arg1,
- uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, uintptr_t arg5);
+ uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, uintptr_t arg5,
+ uintptr_t arg6, uintptr_t arg7);
#endif
#endif
diff --git a/usr/src/uts/common/sys/termios.h b/usr/src/uts/common/sys/termios.h
index 39106a14fc..4edeb7a41c 100644
--- a/usr/src/uts/common/sys/termios.h
+++ b/usr/src/uts/common/sys/termios.h
@@ -363,6 +363,24 @@ extern pid_t tcgetsid(int);
#define TCSETSF (_TIOC|16)
/*
+ * linux terminal ioctls we need to be aware of
+ */
+#define TIOCSETLD (_TIOC|123) /* set line discipline parms */
+#define TIOCGETLD (_TIOC|124) /* get line discipline parms */
+
+/*
+ * The VMIN and VTIME and solaris overlap with VEOF and VEOL - This is
+ * perfectly legal except, linux expects them to be separate. So we keep
+ * them separately.
+ */
+struct lx_cc {
+ unsigned char veof; /* veof value */
+ unsigned char veol; /* veol value */
+ unsigned char vmin; /* vmin value */
+ unsigned char vtime; /* vtime value */
+};
+
+/*
* NTP PPS ioctls
*/
#define TIOCGPPS (_TIOC|125)
diff --git a/usr/src/uts/common/sys/thread.h b/usr/src/uts/common/sys/thread.h
index 53a31c848c..76e6835349 100644
--- a/usr/src/uts/common/sys/thread.h
+++ b/usr/src/uts/common/sys/thread.h
@@ -375,7 +375,7 @@ typedef struct _kthread {
#define T_WOULDBLOCK 0x0020 /* for lockfs */
#define T_DONTBLOCK 0x0040 /* for lockfs */
#define T_DONTPEND 0x0080 /* for lockfs */
-#define T_SYS_PROF 0x0100 /* profiling on for duration of system call */
+#define T_SPLITSTK 0x0100 /* kernel stack is currently split */
#define T_WAITCVSEM 0x0200 /* waiting for a lwp_cv or lwp_sema on sleepq */
#define T_WATCHPT 0x0400 /* thread undergoing a watchpoint emulation */
#define T_PANIC 0x0800 /* thread initiated a system panic */
@@ -427,8 +427,9 @@ typedef struct _kthread {
#define TS_RESUME 0x1000 /* setrun() by CPR resume process */
#define TS_CREATE 0x2000 /* setrun() by syslwp_create() */
#define TS_RUNQMATCH 0x4000 /* exact run queue balancing by setbackdq() */
+#define TS_BSTART 0x8000 /* setrun() by brand */
#define TS_ALLSTART \
- (TS_CSTART|TS_UNPAUSE|TS_XSTART|TS_PSTART|TS_RESUME|TS_CREATE)
+ (TS_CSTART|TS_UNPAUSE|TS_XSTART|TS_PSTART|TS_RESUME|TS_CREATE|TS_BSTART)
#define TS_ANYWAITQ (TS_PROJWAITQ|TS_ZONEWAITQ)
/*
@@ -456,6 +457,10 @@ typedef struct _kthread {
#define ISTOPPED(t) ((t)->t_state == TS_STOPPED && \
!((t)->t_schedflag & TS_PSTART))
+/* True if thread is stopped for a brand-specific reason */
+#define BSTOPPED(t) ((t)->t_state == TS_STOPPED && \
+ !((t)->t_schedflag & TS_BSTART))
+
/* True if thread is asleep and wakeable */
#define ISWAKEABLE(t) (((t)->t_state == TS_SLEEP && \
((t)->t_flag & T_WAKEABLE)))
diff --git a/usr/src/uts/common/sys/time.h b/usr/src/uts/common/sys/time.h
index 634d5fb3a6..d82508e6b3 100644
--- a/usr/src/uts/common/sys/time.h
+++ b/usr/src/uts/common/sys/time.h
@@ -15,6 +15,7 @@
* Use is subject to license terms.
*
* Copyright 2013 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2016 Joyent, Inc.
*
* Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
*/
@@ -266,6 +267,14 @@ typedef longlong_t hrtime_t;
#if defined(_KERNEL) || defined(_FAKE_KERNEL)
+/*
+ * Unsigned counterpart to hrtime_t
+ */
+typedef u_longlong_t uhrtime_t;
+
+#define HRTIME_MAX LLONG_MAX
+#define UHRTIME_MAX ULLONG_MAX
+
#include <sys/time_impl.h>
#include <sys/mutex.h>
diff --git a/usr/src/uts/common/sys/timer.h b/usr/src/uts/common/sys/timer.h
index 4bbc5b4fb8..db27960413 100644
--- a/usr/src/uts/common/sys/timer.h
+++ b/usr/src/uts/common/sys/timer.h
@@ -35,6 +35,8 @@
#include <sys/proc.h>
#include <sys/thread.h>
#include <sys/param.h>
+#include <sys/siginfo.h>
+#include <sys/port.h>
#ifdef __cplusplus
extern "C" {
@@ -65,6 +67,7 @@ extern int timer_max;
*/
#define IT_SIGNAL 0x01
#define IT_PORT 0x02 /* use event port notification */
+#define IT_CALLBACK 0x04 /* custom callback function */
struct clock_backend;
@@ -92,14 +95,27 @@ struct itimer {
struct clock_backend *it_backend;
void (*it_fire)(itimer_t *);
kmutex_t it_mutex;
- void *it_portev; /* port_kevent_t pointer */
- void *it_portsrc; /* port_source_t pointer */
- int it_portfd; /* port file descriptor */
+ union {
+ struct {
+ void *_it_portev; /* port_kevent_t pointer */
+ void *_it_portsrc; /* port_source_t pointer */
+ int _it_portfd; /* port file descriptor */
+ } _it_ev_port;
+ struct {
+ void (*_it_cb_func)(itimer_t *);
+ uintptr_t _it_cb_data[2];
+ } _it_ev_cb;
+ } _it_ev_data;
};
#define it_sigq __data.__proc.__it_sigq
#define it_lwp __data.__proc.__it_lwp
#define it_frontend __data.__it_frontend
+#define it_portev _it_ev_data._it_ev_port._it_portev
+#define it_portsrc _it_ev_data._it_ev_port._it_portsrc
+#define it_portfd _it_ev_data._it_ev_port._it_portfd
+#define it_cb_func _it_ev_data._it_ev_cb._it_cb_func
+#define it_cb_data _it_ev_data._it_ev_cb._it_cb_data
typedef struct clock_backend {
struct sigevent clk_default;
@@ -116,7 +132,11 @@ typedef struct clock_backend {
extern void clock_add_backend(clockid_t clock, clock_backend_t *backend);
extern clock_backend_t *clock_get_backend(clockid_t clock);
+extern void timer_release(struct proc *, itimer_t *);
+extern void timer_delete_grabbed(struct proc *, timer_t tid, itimer_t *it);
extern void timer_lwpbind();
+extern int timer_setup(clock_backend_t *, struct sigevent *, port_notify_t *,
+ itimer_t **, timer_t *);
extern void timer_func(sigqueue_t *);
extern void timer_exit(void);
diff --git a/usr/src/uts/common/sys/ts.h b/usr/src/uts/common/sys/ts.h
index 7949058565..2cf5dcade3 100644
--- a/usr/src/uts/common/sys/ts.h
+++ b/usr/src/uts/common/sys/ts.h
@@ -79,6 +79,8 @@ typedef struct tsproc {
} tsproc_t;
/* flags */
+
+/* Formerly: TSKPRI 0x01 - thread at kernel mode priority */
#define TSBACKQ 0x02 /* thread goes to back of dispq if preempted */
#define TSIA 0x04 /* thread is interactive */
#define TSIASET 0x08 /* interactive thread is "on" */
diff --git a/usr/src/uts/common/sys/uadmin.h b/usr/src/uts/common/sys/uadmin.h
index 904b52cac4..75d000b831 100644
--- a/usr/src/uts/common/sys/uadmin.h
+++ b/usr/src/uts/common/sys/uadmin.h
@@ -23,6 +23,7 @@
*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2011 Joyent, Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -159,7 +160,7 @@ extern kmutex_t ualock;
extern void mdboot(int, int, char *, boolean_t);
extern void mdpreboot(int, int, char *);
extern int kadmin(int, int, void *, cred_t *);
-extern void killall(zoneid_t);
+extern void killall(zoneid_t, boolean_t);
#endif
extern int uadmin(int, int, uintptr_t);
diff --git a/usr/src/uts/common/sys/uio.h b/usr/src/uts/common/sys/uio.h
index bca1ed1fa3..9584be559f 100644
--- a/usr/src/uts/common/sys/uio.h
+++ b/usr/src/uts/common/sys/uio.h
@@ -145,7 +145,8 @@ typedef struct uioa_s {
*/
typedef enum xuio_type {
UIOTYPE_ASYNCIO,
- UIOTYPE_ZEROCOPY
+ UIOTYPE_ZEROCOPY,
+ UIOTYPE_PEEKSIZE
} xuio_type_t;
typedef struct xuio {
@@ -175,6 +176,15 @@ typedef struct xuio {
int xu_zc_rw; /* read or write buffer */
void *xu_zc_priv; /* fs specific */
} xu_zc;
+
+ /*
+ * Peek Size Support -- facilitate peeking at the size of a
+ * waiting message on a socket.
+ */
+ struct {
+ ssize_t xu_ps_size; /* size of waiting msg */
+ boolean_t xu_ps_set; /* was size calculated? */
+ } xu_ps;
} xu_ext;
} xuio_t;
diff --git a/usr/src/uts/common/sys/user.h b/usr/src/uts/common/sys/user.h
index 7f54dcf3ab..90fde4ef98 100644
--- a/usr/src/uts/common/sys/user.h
+++ b/usr/src/uts/common/sys/user.h
@@ -26,7 +26,7 @@
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
/*
- * Copyright (c) 2018, Joyent, Inc.
+ * Copyright 2019 Joyent, Inc.
* Copyright 2022 Oxide Computer Company
*/
@@ -203,9 +203,9 @@ typedef struct { /* kernel syscall set type */
* This value should not be changed in a patch.
*/
#if defined(__sparc)
-#define __KERN_NAUXV_IMPL 20
+#define __KERN_NAUXV_IMPL 24
#elif defined(__i386) || defined(__amd64)
-#define __KERN_NAUXV_IMPL 26
+#define __KERN_NAUXV_IMPL 29
#endif
struct execsw;
@@ -228,7 +228,11 @@ typedef struct user {
char u_psargs[PSARGSZ]; /* arguments from exec */
int u_argc; /* value of argc passed to main() */
uintptr_t u_argv; /* value of argv passed to main() */
+ uintptr_t u_argvstrs; /* argv string space pointer */
+ size_t u_argvstrsize; /* size of argv string space */
uintptr_t u_envp; /* value of envp passed to main() */
+ uintptr_t u_envstrs; /* env string space pointer */
+ size_t u_envstrsize; /* size of env string space */
uintptr_t u_commpagep; /* address of mapped comm page */
/*
diff --git a/usr/src/uts/common/sys/vm.h b/usr/src/uts/common/sys/vm.h
index 14b5754b28..b32a789d36 100644
--- a/usr/src/uts/common/sys/vm.h
+++ b/usr/src/uts/common/sys/vm.h
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 1983, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2017 Joyent, Inc.
*/
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
@@ -57,6 +58,8 @@ int queue_io_request(struct vnode *, u_offset_t);
extern kmutex_t memavail_lock;
extern kcondvar_t memavail_cv;
+#define WAKE_PAGEOUT_SCANNER() cv_broadcast(&proc_pageout->p_cv)
+
#endif /* defined(_KERNEL) */
#ifdef __cplusplus
diff --git a/usr/src/uts/common/sys/vm_usage.h b/usr/src/uts/common/sys/vm_usage.h
index 1aa4a8ee6d..afbf438eff 100644
--- a/usr/src/uts/common/sys/vm_usage.h
+++ b/usr/src/uts/common/sys/vm_usage.h
@@ -21,6 +21,7 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright 2017 Joyent, Inc. All rights reserved.
*/
#ifndef _SYS_VM_USAGE_H
@@ -79,8 +80,12 @@ extern "C" {
/* zoneid */
#define VMUSAGE_COL_EUSERS 0x2000 /* same as VMUSAGE_COL_RUSERS, but by */
/* euser */
+#define VMUSAGE_A_ZONE 0x4000 /* rss/swap for a specified zone */
-#define VMUSAGE_MASK 0x3fff /* all valid flags for getvmusage() */
+#define VMUSAGE_MASK 0x7fff /* all valid flags for getvmusage() */
+
+#define VMUSAGE_ZONE_FLAGS (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES | \
+ VMUSAGE_A_ZONE)
typedef struct vmusage {
id_t vmu_zoneid; /* zoneid, or ALL_ZONES for */
diff --git a/usr/src/uts/common/sys/vmsystm.h b/usr/src/uts/common/sys/vmsystm.h
index e8e30b7608..daf76f9f51 100644
--- a/usr/src/uts/common/sys/vmsystm.h
+++ b/usr/src/uts/common/sys/vmsystm.h
@@ -19,6 +19,9 @@
* CDDL HEADER END
*/
/*
+ * Copyright (c) 2017, Joyent, Inc. All rights reserved.
+ */
+/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -58,6 +61,9 @@ extern pgcnt_t desscan; /* desired pages scanned per second */
extern pgcnt_t slowscan;
extern pgcnt_t fastscan;
extern pgcnt_t pushes; /* number of pages pushed to swap device */
+extern uint64_t low_mem_scan; /* num times page scan due to low memory */
+extern uint64_t zone_cap_scan; /* num times page scan due to zone cap */
+extern uint64_t n_throttle; /* num times page create throttled */
/* writable copies of tunables */
extern pgcnt_t maxpgio; /* max paging i/o per sec before start swaps */
@@ -160,6 +166,8 @@ extern void *boot_virt_alloc(void *addr, size_t size);
extern size_t exec_get_spslew(void);
+extern caddr_t map_userlimit(proc_t *pp, struct as *as, int flags);
+
#endif /* _KERNEL */
#ifdef __cplusplus
diff --git a/usr/src/uts/common/sys/vnd.h b/usr/src/uts/common/sys/vnd.h
new file mode 100644
index 0000000000..bc7c9c3122
--- /dev/null
+++ b/usr/src/uts/common/sys/vnd.h
@@ -0,0 +1,141 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_VND_H
+#define _SYS_VND_H
+
+#include <sys/types.h>
+#include <sys/vnd_errno.h>
+#include <sys/frameio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * We distinguish between normal ioctls and private ioctls we issues to out
+ * streams version. Streams ioctls have the upper bit set in the lowest byte.
+ * Note that there are no STREAMs ioctls for userland and all definitions
+ * related to them are not present in this file.
+ */
+#define VND_IOC (('v' << 24) | ('n' << 16) | ('d' << 8))
+
+/*
+ * Attach the current minor instance to a given dlpi datalink identified by a
+ * vnd_ioc_name_t argument. This fails if it's already been attached. Note that
+ * unlike the other ioctls, this is passed directly as opposed to every other
+ * function which is passed as a pointer to the value.
+ */
+#define VND_IOC_ATTACH (VND_IOC | 0x1)
+
+#define VND_NAMELEN 32
+
+typedef struct vnd_ioc_attach {
+ char via_name[VND_NAMELEN];
+ zoneid_t via_zoneid;
+ uint32_t via_errno;
+} vnd_ioc_attach_t;
+
+/*
+ * Link the current minor instance into the /devices name space.
+ *
+ * This ioctl adds entries into /devices with a name of the form z%d:%s vil_zid,
+ * vil_name. The device will be namespaced to the zone. The global zone will be
+ * able to see all minor nodes. In the zone, only the /dev entries will exist.
+ * At this time, a given device can only have one link at a time. Note that a
+ * user cannot specify the zone to pass in, rather it is the zone that the
+ * device was attached in.
+ */
+#define VND_IOC_LINK (VND_IOC | 0x2)
+
+typedef struct vnd_ioc_link {
+ char vil_name[VND_NAMELEN];
+ uint32_t vil_errno;
+} vnd_ioc_link_t;
+
+/*
+ * Unlink the opened minor instance from the /devices name space. A zone may use
+ * this to unlink an extent entry in /dev; however, they will not be able to
+ * link it in again.
+ */
+#define VND_IOC_UNLINK (VND_IOC | 0x3)
+typedef struct vnd_ioc_unlink {
+ uint32_t viu_errno;
+} vnd_ioc_unlink_t;
+
+/*
+ * Controls to get and set the current buffer recieve buffer size.
+ */
+typedef struct vnd_ioc_buf {
+ uint64_t vib_size;
+ uint32_t vib_filler;
+ uint32_t vib_errno;
+} vnd_ioc_buf_t;
+
+#define VND_IOC_GETRXBUF (VND_IOC | 0x04)
+#define VND_IOC_SETRXBUF (VND_IOC | 0x05)
+#define VND_IOC_GETMAXBUF (VND_IOC | 0x06)
+#define VND_IOC_GETTXBUF (VND_IOC | 0x07)
+#define VND_IOC_SETTXBUF (VND_IOC | 0x08)
+#define VND_IOC_GETMINTU (VND_IOC | 0x09)
+#define VND_IOC_GETMAXTU (VND_IOC | 0x0a)
+
+/*
+ * Information and listing ioctls
+ *
+ * This gets information about all of the active vnd instances. vl_actents is
+ * always updated to the number around and vl_nents is the number of
+ * vnd_ioc_info_t elements are allocated in vl_ents.
+ */
+typedef struct vnd_ioc_info {
+ uint32_t vii_version;
+ zoneid_t vii_zone;
+ char vii_name[VND_NAMELEN];
+ char vii_datalink[VND_NAMELEN];
+} vnd_ioc_info_t;
+
+typedef struct vnd_ioc_list {
+ uint_t vl_nents;
+ uint_t vl_actents;
+ vnd_ioc_info_t *vl_ents;
+} vnd_ioc_list_t;
+
+#ifdef _KERNEL
+
+typedef struct vnd_ioc_list32 {
+ uint_t vl_nents;
+ uint_t vl_actents;
+ caddr32_t vl_ents;
+} vnd_ioc_list32_t;
+
+#endif /* _KERNEL */
+
+#define VND_IOC_LIST (VND_IOC | 0x20)
+
+/*
+ * Framed I/O ioctls
+ *
+ * Users should use the standard frameio_t as opposed to a vnd specific type.
+ * This is a consolidation private ioctl pending futher stability in the form of
+ * specific system work.
+ */
+#define VND_IOC_FRAMEIO_READ (VND_IOC | 0x30)
+#define VND_IOC_FRAMEIO_WRITE (VND_IOC | 0x31)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_VND_H */
diff --git a/usr/src/uts/common/sys/vnd_errno.h b/usr/src/uts/common/sys/vnd_errno.h
new file mode 100644
index 0000000000..89e5fc2543
--- /dev/null
+++ b/usr/src/uts/common/sys/vnd_errno.h
@@ -0,0 +1,72 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright (c) 2014 Joyent, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_VND_ERRNO_H
+#define _SYS_VND_ERRNO_H
+
+/*
+ * This header contains all of the available vnd errors.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum vnd_errno {
+ VND_E_SUCCESS = 0, /* no error */
+ VND_E_NOMEM, /* no memory */
+ VND_E_NODATALINK, /* no such datalink */
+ VND_E_NOTETHER, /* not DL_ETHER */
+ VND_E_DLPIINVAL, /* Unknown DLPI failures */
+ VND_E_ATTACHFAIL, /* DL_ATTACH_REQ failed */
+ VND_E_BINDFAIL, /* DL_BIND_REQ failed */
+ VND_E_PROMISCFAIL, /* DL_PROMISCON_REQ failed */
+ VND_E_DIRECTFAIL, /* DLD_CAPAB_DIRECT enable failed */
+ VND_E_CAPACKINVAL, /* bad dl_capability_ack_t */
+ VND_E_SUBCAPINVAL, /* bad dl_capability_sub_t */
+ VND_E_DLDBADVERS, /* bad dld version */
+ VND_E_KSTATCREATE, /* failed to create kstats */
+ VND_E_NODEV, /* no such vnd link */
+ VND_E_NONETSTACK, /* netstack doesn't exist */
+ VND_E_ASSOCIATED, /* device already associated */
+ VND_E_ATTACHED, /* device already attached */
+ VND_E_LINKED, /* device already linked */
+ VND_E_BADNAME, /* invalid name */
+ VND_E_PERM, /* can't touch this */
+ VND_E_NOZONE, /* no such zone */
+ VND_E_STRINIT, /* failed to initialize vnd stream module */
+ VND_E_NOTATTACHED, /* device not attached */
+ VND_E_NOTLINKED, /* device not linked */
+ VND_E_LINKEXISTS, /* another device has the same link name */
+ VND_E_MINORNODE, /* failed to create minor node */
+ VND_E_BUFTOOBIG, /* requested buffer size is too large */
+ VND_E_BUFTOOSMALL, /* requested buffer size is too small */
+ VND_E_DLEXCL, /* unable to get dlpi excl access */
+ VND_E_DIRECTNOTSUP,
+ /* DLD direct capability not suported over data link */
+ VND_E_BADPROPSIZE, /* invalid property size */
+ VND_E_BADPROP, /* invalid property */
+ VND_E_PROPRDONLY, /* property is read only */
+ VND_E_SYS, /* unexpected system error */
+ VND_E_CAPABPASS,
+ /* capabilities invalid, pass-through module detected */
+ VND_E_UNKNOWN /* unknown error */
+} vnd_errno_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_VND_ERRNO_H */
diff --git a/usr/src/uts/common/sys/vnode.h b/usr/src/uts/common/sys/vnode.h
index b8702bc8f5..df5da6c2e7 100644
--- a/usr/src/uts/common/sys/vnode.h
+++ b/usr/src/uts/common/sys/vnode.h
@@ -21,7 +21,7 @@
/*
* Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2018, Joyent, Inc.
+ * Copyright 2020 Joyent, Inc.
* Copyright (c) 2011, 2017 by Delphix. All rights reserved.
* Copyright 2017 RackTop Systems.
*/
@@ -197,6 +197,7 @@ struct vsd_node {
* v_count
* v_shrlocks
* v_path
+ * v_phantom_count
* v_vsd
* v_xattrdir
*
@@ -214,6 +215,7 @@ struct vsd_node {
* v_lock
* v_flag
* v_count
+ * v_phantom_count
* v_data
* v_vfsp
* v_stream
@@ -285,6 +287,8 @@ typedef struct vnode {
kmutex_t v_lock; /* protects vnode fields */
uint_t v_flag; /* vnode flags (see below) */
uint_t v_count; /* reference count */
+ /* non vn_count() ref count (see below) */
+ uint_t v_phantom_count;
void *v_data; /* private data for fs */
struct vfs *v_vfsp; /* ptr to containing VFS */
struct stdata *v_stream; /* associated stream */
@@ -811,13 +815,15 @@ typedef enum vnevent {
VE_REMOVE = 3, /* Remove of vnode's name */
VE_RMDIR = 4, /* Remove of directory vnode's name */
VE_CREATE = 5, /* Create with vnode's name which exists */
- VE_LINK = 6, /* Link with vnode's name as source */
- VE_RENAME_DEST_DIR = 7, /* Rename with vnode as target dir */
- VE_MOUNTEDOVER = 8, /* File or Filesystem got mounted over vnode */
+ VE_LINK = 6, /* Link with vnode's name as source */
+ VE_RENAME_DEST_DIR = 7, /* Rename with vnode as target dir */
+ VE_MOUNTEDOVER = 8, /* File or Filesystem got mounted over vnode */
VE_TRUNCATE = 9, /* Truncate */
VE_PRE_RENAME_SRC = 10, /* Pre-rename, with vnode as source */
VE_PRE_RENAME_DEST = 11, /* Pre-rename, with vnode as target/dest. */
- VE_PRE_RENAME_DEST_DIR = 12 /* Pre-rename with vnode as target dir */
+ VE_PRE_RENAME_DEST_DIR = 12, /* Pre-rename with vnode as target dir */
+ VE_RENAME_SRC_DIR = 13, /* Rename with vnode as source dir */
+ VE_RESIZE = 14 /* Resize/truncate to non-zero offset */
} vnevent_t;
/*
@@ -1292,9 +1298,9 @@ void vn_recycle(vnode_t *);
void vn_free(vnode_t *);
int vn_is_readonly(vnode_t *);
-int vn_is_opened(vnode_t *, v_mode_t);
-int vn_is_mapped(vnode_t *, v_mode_t);
-int vn_has_other_opens(vnode_t *, v_mode_t);
+int vn_is_opened(vnode_t *, v_mode_t);
+int vn_is_mapped(vnode_t *, v_mode_t);
+int vn_has_other_opens(vnode_t *, v_mode_t);
void vn_open_upgrade(vnode_t *, int);
void vn_open_downgrade(vnode_t *, int);
@@ -1333,10 +1339,12 @@ int vn_createat(char *pnamep, enum uio_seg seg, struct vattr *vap,
int vn_rdwr(enum uio_rw rw, struct vnode *vp, caddr_t base, ssize_t len,
offset_t offset, enum uio_seg seg, int ioflag, rlim64_t ulimit,
cred_t *cr, ssize_t *residp);
+uint_t vn_count(struct vnode *vp);
void vn_rele(struct vnode *vp);
void vn_rele_async(struct vnode *vp, struct taskq *taskq);
void vn_rele_dnlc(struct vnode *vp);
void vn_rele_stream(struct vnode *vp);
+void vn_phantom_rele(struct vnode *vp);
int vn_link(char *from, char *to, enum uio_seg seg);
int vn_linkat(vnode_t *fstartvp, char *from, enum symfollow follow,
vnode_t *tstartvp, char *to, enum uio_seg seg);
@@ -1377,7 +1385,8 @@ void vnevent_remove(vnode_t *, vnode_t *, char *, caller_context_t *);
void vnevent_rmdir(vnode_t *, vnode_t *, char *, caller_context_t *);
void vnevent_create(vnode_t *, caller_context_t *);
void vnevent_link(vnode_t *, caller_context_t *);
-void vnevent_rename_dest_dir(vnode_t *, caller_context_t *ct);
+void vnevent_rename_dest_dir(vnode_t *, vnode_t *, char *,
+ caller_context_t *ct);
void vnevent_mountedover(vnode_t *, caller_context_t *);
void vnevent_truncate(vnode_t *, caller_context_t *);
int vnevent_support(vnode_t *, caller_context_t *);
@@ -1387,6 +1396,7 @@ void vnevent_pre_rename_dest(vnode_t *, vnode_t *, char *,
caller_context_t *);
void vnevent_pre_rename_dest_dir(vnode_t *, vnode_t *, char *,
caller_context_t *);
+void vnevent_resize(vnode_t *, caller_context_t *);
/* Vnode specific data */
void vsd_create(uint_t *, void (*)(void *));
@@ -1439,6 +1449,16 @@ extern uint_t pvn_vmodsort_supported;
* this->vp->v_path == NULL ? "NULL" : stringof(this->vp->v_path),
* this->vp->v_count)
* }'
+ *
+ * There are some situations where we don't want a hold to make the vnode
+ * 'busy'. For example, watching a directory via port events or inotify
+ * should not prevent a filesystem from mounting on a watched directory.
+ * For those instances, a phantom hold is used via VN_PHANTOM_HOLD().
+ *
+ * A phantom hold works identically to regular hold, except that those holds
+ * are excluded from the return value of vn_count().
+ *
+ * A phantom hold must be released by VN_PHANTOM_RELE().
*/
#define VN_HOLD_LOCKED(vp) { \
ASSERT(mutex_owned(&(vp)->v_lock)); \
@@ -1467,6 +1487,22 @@ extern uint_t pvn_vmodsort_supported;
DTRACE_PROBE1(vn__rele, vnode_t *, vp); \
}
+#define VN_PHANTOM_HOLD_LOCKED(vp) { \
+ VN_HOLD_LOCKED(vp); \
+ (vp)->v_phantom_count++; \
+ DTRACE_PROBE1(vn__phantom_hold, vnode_t *, vp); \
+}
+
+#define VN_PHANTOM_HOLD(vp) { \
+ mutex_enter(&(vp)->v_lock); \
+ VN_PHANTOM_HOLD_LOCKED(vp); \
+ mutex_exit(&(vp)->v_lock); \
+}
+
+#define VN_PHANTOM_RELE(vp) { \
+ vn_phantom_rele(vp); \
+}
+
#define VN_SET_VFS_TYPE_DEV(vp, vfsp, type, dev) { \
(vp)->v_vfsp = (vfsp); \
(vp)->v_type = (type); \
@@ -1477,7 +1513,7 @@ extern uint_t pvn_vmodsort_supported;
* Compare two vnodes for equality. In general this macro should be used
* in preference to calling VOP_CMP directly.
*/
-#define VN_CMP(VP1, VP2) ((VP1) == (VP2) ? 1 : \
+#define VN_CMP(VP1, VP2) ((VP1) == (VP2) ? 1 : \
((VP1) && (VP2) && (vn_getops(VP1) == vn_getops(VP2)) ? \
VOP_CMP(VP1, VP2, NULL) : 0))
diff --git a/usr/src/uts/common/sys/zfd.h b/usr/src/uts/common/sys/zfd.h
new file mode 100644
index 0000000000..e08d75ecba
--- /dev/null
+++ b/usr/src/uts/common/sys/zfd.h
@@ -0,0 +1,78 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#ifndef _SYS_ZFD_H
+#define _SYS_ZFD_H
+
+#include <sys/types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Minor node name of the global zone side (often called the "master" side)
+ * of the zfd dev.
+ */
+#define ZFD_MASTER_NAME "master"
+
+/*
+ * Minor node name of the non-global zone side (often called the "slave"
+ * side) of the zfd dev.
+ */
+#define ZFD_SLAVE_NAME "slave"
+
+#define ZFD_NAME_LEN 16
+
+/*
+ * ZFD_IOC forms the base for all zfd ioctls.
+ */
+#define ZFD_IOC (('Z' << 24) | ('f' << 16) | ('d' << 8))
+
+/*
+ * This ioctl tells the slave side it should push the TTY stream modules
+ * so that the fd looks like a tty.
+ */
+#define ZFD_MAKETTY (ZFD_IOC | 0)
+
+/*
+ * This ioctl puts a hangup into the stream so that the slave side sees EOF.
+ */
+#define ZFD_EOF (ZFD_IOC | 1)
+
+/*
+ * This ioctl succeeds if the slave side is open.
+ */
+#define ZFD_HAS_SLAVE (ZFD_IOC | 2)
+
+/*
+ * This ioctl links two streams into a multiplexer configuration for in-zone
+ * logging.
+ */
+#define ZFD_MUX (ZFD_IOC | 3)
+
+/*
+ * This ioctl controls the flow control setting for the log multiplexer stream
+ * (1 = true, 0 = false). The default is false which implies teeing into the
+ * log stream is "best-effort" but data will be discarded if the stream
+ * becomes full. If set and the log stream begins to fill up, the primary
+ * stream will stop flowing.
+ */
+#define ZFD_MUX_FLOWCON (ZFD_IOC | 4)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_ZFD_H */
diff --git a/usr/src/uts/common/sys/zone.h b/usr/src/uts/common/sys/zone.h
index 26b74ca34a..afef75013f 100644
--- a/usr/src/uts/common/sys/zone.h
+++ b/usr/src/uts/common/sys/zone.h
@@ -50,8 +50,10 @@
#include <sys/socket_impl.h>
#include <sys/secflags.h>
#include <sys/cpu_uarray.h>
+#include <sys/nvpair.h>
#include <sys/list.h>
#include <sys/loadavg.h>
+#include <sys/vnode.h>
#endif /* _KERNEL */
#ifdef __cplusplus
@@ -62,15 +64,27 @@ extern "C" {
* NOTE
*
* The contents of this file are private to the implementation of
- * Solaris and are subject to change at any time without notice.
+ * illumos and are subject to change at any time without notice.
* Applications and drivers using these interfaces may fail to
* run on future releases.
*/
/* Available both in kernel and for user space */
-/* zone id restrictions and special ids */
-#define MAX_ZONEID 9999
+/*
+ * zone id restrictions and special ids.
+ * See 'maxzones' for run-time zone limit.
+ *
+ * The current 8k value for MAX_ZONES was originally derived from the virtual
+ * interface limit in IP when "shared-stack" was the only supported networking
+ * for zones. The virtual interface limit is the number of addresses allowed
+ * on an interface (see MAX_ADDRS_PER_IF). Even with exclusive stacks, an 8k
+ * zone limit is still a reasonable choice at this time, given other limits
+ * within the kernel. Since we only support 8192 zones (which includes GZ),
+ * there is no point in allowing MAX_ZONEID > 8k.
+ */
+#define MAX_ZONES 8192
+#define MAX_ZONEID (MAX_ZONES - 1)
#define MIN_USERZONEID 1 /* lowest user-creatable zone ID */
#define MIN_ZONEID 0 /* minimum zone ID on system */
#define GLOBAL_ZONEID 0
@@ -97,7 +111,13 @@ extern "C" {
#define ZONE_CHECK_DATALINK 12
#define ZONE_LIST_DATALINK 13
-/* zone attributes */
+/*
+ * zone attributes
+ *
+ * Note that values up to ZONE_ATTR_HOSTID are baked into things like Solaris
+ * 10 which can be run under the s10 brand; don't renumber or change them. Ones
+ * which are no longer used are commented out.
+ */
#define ZONE_ATTR_ROOT 1
#define ZONE_ATTR_NAME 2
#define ZONE_ATTR_STATUS 3
@@ -109,17 +129,24 @@ extern "C" {
#define ZONE_ATTR_INITNAME 9
#define ZONE_ATTR_BOOTARGS 10
#define ZONE_ATTR_BRAND 11
-#define ZONE_ATTR_PHYS_MCAP 12
+/* #define ZONE_ATTR_PHYS_MCAP 12 */
#define ZONE_ATTR_SCHED_CLASS 13
#define ZONE_ATTR_FLAGS 14
#define ZONE_ATTR_HOSTID 15
#define ZONE_ATTR_FS_ALLOWED 16
#define ZONE_ATTR_NETWORK 17
+
+/* illumos extensions */
#define ZONE_ATTR_INITNORESTART 20
#define ZONE_ATTR_SECFLAGS 21
#define ZONE_ATTR_INITRESTART0 22
#define ZONE_ATTR_INITREBOOT 23
+/* OmniOS/SmartOS extensions */
+#define ZONE_ATTR_DID 30
+#define ZONE_ATTR_APP_SVC_CT 31
+#define ZONE_ATTR_SCHED_FIXEDHI 32
+
/* Start of the brand-specific attribute namespace */
#define ZONE_ATTR_BRAND_ATTRS 32768
@@ -134,13 +161,18 @@ extern "C" {
#define ZONE_EVENT_READY "ready"
#define ZONE_EVENT_RUNNING "running"
#define ZONE_EVENT_SHUTTING_DOWN "shutting_down"
+#define ZONE_EVENT_FREE "free"
#define ZONE_CB_NAME "zonename"
#define ZONE_CB_NEWSTATE "newstate"
#define ZONE_CB_OLDSTATE "oldstate"
+#define ZONE_CB_RESTARTS "restarts"
#define ZONE_CB_TIMESTAMP "when"
#define ZONE_CB_ZONEID "zoneid"
+#define ZONE_EVENT_INIT_CLASS "init"
+#define ZONE_EVENT_INIT_RESTART_SC "restart"
+
/*
* Exit values that may be returned by scripts or programs invoked by various
* zone commands.
@@ -199,6 +231,7 @@ typedef struct {
uint32_t doi; /* DOI for label */
caddr32_t label; /* label associated with zone */
int flags;
+ zoneid_t zoneid; /* requested zoneid */
} zone_def32;
#endif
typedef struct {
@@ -215,6 +248,7 @@ typedef struct {
uint32_t doi; /* DOI for label */
const bslabel_t *label; /* label associated with zone */
int flags;
+ zoneid_t zoneid; /* requested zoneid */
} zone_def;
/* extended error information */
@@ -239,7 +273,8 @@ typedef enum {
ZONE_IS_EMPTY,
ZONE_IS_DOWN,
ZONE_IS_DYING,
- ZONE_IS_DEAD
+ ZONE_IS_DEAD,
+ ZONE_IS_FREE /* transient state for zone sysevent */
} zone_status_t;
#define ZONE_MIN_STATE ZONE_IS_UNINITIALIZED
#define ZONE_MAX_STATE ZONE_IS_DEAD
@@ -259,9 +294,12 @@ typedef enum zone_cmd {
typedef struct zone_cmd_arg {
uint64_t uniqid; /* unique "generation number" */
zone_cmd_t cmd; /* requested action */
- uint32_t _pad; /* need consistent 32/64 bit alignmt */
+ int status; /* init status on shutdown */
+ uint32_t debug; /* enable brand hook debug */
char locale[MAXPATHLEN]; /* locale in which to render messages */
char bootbuf[BOOTARGS_MAX]; /* arguments passed to zone_boot() */
+ /* Needed for 32/64 zoneadm -> zoneadmd door arg size check. */
+ int pad;
} zone_cmd_arg_t;
/*
@@ -389,7 +427,7 @@ typedef struct zone_dataset {
} zone_dataset_t;
/*
- * structure for zone kstats
+ * structure for rctl zone kstats
*/
typedef struct zone_kstat {
kstat_named_t zk_zonename;
@@ -400,12 +438,57 @@ typedef struct zone_kstat {
struct cpucap;
typedef struct {
+ hrtime_t cycle_start;
+ uint_t cycle_cnt;
+ hrtime_t zone_avg_cnt;
+} sys_zio_cntr_t;
+
+typedef struct {
+ kstat_named_t zv_zonename;
+ kstat_named_t zv_nread;
+ kstat_named_t zv_reads;
+ kstat_named_t zv_rtime;
+ kstat_named_t zv_rlentime;
+ kstat_named_t zv_rcnt;
+ kstat_named_t zv_nwritten;
+ kstat_named_t zv_writes;
+ kstat_named_t zv_wtime;
+ kstat_named_t zv_wlentime;
+ kstat_named_t zv_wcnt;
+ kstat_named_t zv_10ms_ops;
+ kstat_named_t zv_100ms_ops;
+ kstat_named_t zv_1s_ops;
+ kstat_named_t zv_10s_ops;
+ kstat_named_t zv_delay_cnt;
+ kstat_named_t zv_delay_time;
+} zone_vfs_kstat_t;
+
+typedef struct {
+ kstat_named_t zz_zonename;
+ kstat_named_t zz_nread;
+ kstat_named_t zz_reads;
+ kstat_named_t zz_rtime;
+ kstat_named_t zz_rlentime;
+ kstat_named_t zz_nwritten;
+ kstat_named_t zz_writes;
+ kstat_named_t zz_waittime;
+} zone_zfs_kstat_t;
+
+typedef struct {
kstat_named_t zm_zonename;
+ kstat_named_t zm_rss;
+ kstat_named_t zm_phys_cap;
+ kstat_named_t zm_swap;
+ kstat_named_t zm_swap_cap;
+ kstat_named_t zm_nover;
+ kstat_named_t zm_pagedout;
kstat_named_t zm_pgpgin;
kstat_named_t zm_anonpgin;
kstat_named_t zm_execpgin;
kstat_named_t zm_fspgin;
kstat_named_t zm_anon_alloc_fail;
+ kstat_named_t zm_pf_throttle;
+ kstat_named_t zm_pf_throttle_usec;
} zone_mcap_kstat_t;
typedef struct {
@@ -420,8 +503,10 @@ typedef struct {
kstat_named_t zm_ffnoproc;
kstat_named_t zm_ffnomem;
kstat_named_t zm_ffmisc;
+ kstat_named_t zm_mfseglim;
kstat_named_t zm_nested_intp;
kstat_named_t zm_init_pid;
+ kstat_named_t zm_init_restarts;
kstat_named_t zm_boot_time;
} zone_misc_kstat_t;
@@ -464,6 +549,7 @@ typedef struct zone {
*/
list_node_t zone_linkage;
zoneid_t zone_id; /* ID of zone */
+ zoneid_t zone_did; /* persistent debug ID of zone */
uint_t zone_ref; /* count of zone_hold()s on zone */
uint_t zone_cred_ref; /* count of zone_hold_cred()s on zone */
/*
@@ -516,10 +602,11 @@ typedef struct zone {
kcondvar_t zone_cv; /* used to signal state changes */
struct proc *zone_zsched; /* Dummy kernel "zsched" process */
pid_t zone_proc_initpid; /* pid of "init" for this zone */
- char *zone_initname; /* fs path to 'init' */
+ uint_t zone_proc_init_restarts; /* times init restarted */
+ char *zone_initname; /* fs path to 'init' */
+ int zone_init_status; /* init's exit status */
int zone_boot_err; /* for zone_boot() if boot fails */
char *zone_bootargs; /* arguments passed via zone_boot() */
- uint64_t zone_phys_mcap; /* physical memory cap */
/*
* zone_kthreads is protected by zone_status_lock.
*/
@@ -559,9 +646,11 @@ typedef struct zone {
boolean_t zone_restart_init; /* Restart init if it dies? */
boolean_t zone_reboot_on_init_exit; /* Reboot if init dies? */
boolean_t zone_restart_init_0; /* Restart only if it exits 0 */
+ boolean_t zone_setup_app_contract; /* setup contract? */
struct brand *zone_brand; /* zone's brand */
void *zone_brand_data; /* store brand specific data */
id_t zone_defaultcid; /* dflt scheduling class id */
+ boolean_t zone_fixed_hipri; /* fixed sched. hi prio */
kstat_t *zone_swapresv_kstat;
kstat_t *zone_lockedmem_kstat;
/*
@@ -570,8 +659,24 @@ typedef struct zone {
list_t zone_dl_list;
netstack_t *zone_netstack;
struct cpucap *zone_cpucap; /* CPU caps data */
+
/*
- * Solaris Auditing per-zone audit context
+ * kstats and counters for VFS ops and bytes.
+ */
+ kmutex_t zone_vfs_lock; /* protects VFS statistics */
+ kstat_t *zone_vfs_ksp;
+ kstat_io_t zone_vfs_rwstats;
+ zone_vfs_kstat_t *zone_vfs_stats;
+
+ /*
+ * kstats for ZFS I/O ops and bytes.
+ */
+ kmutex_t zone_zfs_lock; /* protects ZFS statistics */
+ kstat_t *zone_zfs_ksp;
+ zone_zfs_kstat_t *zone_zfs_stats;
+
+ /*
+ * illumos Auditing per-zone audit context
*/
struct au_kcontext *zone_audit_kctxt;
/*
@@ -588,7 +693,11 @@ typedef struct zone {
/* zone_rctls->rcs_lock */
kstat_t *zone_nprocs_kstat;
- kmutex_t zone_mcap_lock; /* protects mcap statistics */
+ /*
+ * kstats and counters for physical memory capping.
+ */
+ kstat_t *zone_physmem_kstat;
+ kmutex_t zone_mcap_lock; /* protects mcap statistics */
kstat_t *zone_mcap_ksp;
zone_mcap_kstat_t *zone_mcap_stats;
uint64_t zone_pgpgin; /* pages paged in */
@@ -613,6 +722,8 @@ typedef struct zone {
uint32_t zone_ffnomem; /* as_dup/memory error */
uint32_t zone_ffmisc; /* misc. other error */
+ uint32_t zone_mfseglim; /* map failure (# segs limit) */
+
uint32_t zone_nested_intp; /* nested interp. kstat */
struct loadavg_s zone_loadavg; /* loadavg for this zone */
@@ -640,6 +751,53 @@ typedef struct zone {
} zone_t;
/*
+ * Data and counters used for ZFS fair-share disk IO.
+ */
+typedef struct zone_zfs_io {
+ uint16_t zpers_zfs_io_pri; /* ZFS IO priority - 16k max */
+ uint_t zpers_zfs_queued[2]; /* sync I/O enqueued count */
+ sys_zio_cntr_t zpers_rd_ops; /* Counters for ZFS reads, */
+ sys_zio_cntr_t zpers_wr_ops; /* writes, and */
+ sys_zio_cntr_t zpers_lwr_ops; /* logical writes. */
+ kstat_io_t zpers_zfs_rwstats;
+ uint64_t zpers_io_util; /* IO utilization metric */
+ uint64_t zpers_zfs_rd_waittime;
+ uint8_t zpers_io_delay; /* IO delay on logical r/w */
+ uint8_t zpers_zfs_weight; /* used to prevent starvation */
+ uint8_t zpers_io_util_above_avg; /* IO util percent > avg. */
+} zone_zfs_io_t;
+
+/*
+ * "Persistent" zone data which can be accessed idependently of the zone_t.
+ */
+typedef struct zone_persist {
+ kmutex_t zpers_zfs_lock; /* Protects zpers_zfsp references */
+ zone_zfs_io_t *zpers_zfsp; /* ZFS fair-share IO data */
+ uint8_t zpers_over; /* currently over cap */
+ uint32_t zpers_pg_cnt; /* current RSS in pages */
+ uint32_t zpers_pg_limit; /* current RRS limit in pages */
+ uint32_t zpers_nover; /* # of times over phys. cap */
+#ifndef DEBUG
+ uint64_t zpers_pg_out; /* # pages flushed */
+#else
+ /*
+ * To conserve memory, some detailed kstats are only kept for DEBUG
+ * builds.
+ */
+ uint64_t zpers_zfs_rd_waittime;
+
+ uint64_t zpers_pg_anon; /* # clean anon pages flushed */
+ uint64_t zpers_pg_anondirty; /* # dirty anon pages flushed */
+ uint64_t zpers_pg_fs; /* # clean fs pages flushed */
+ uint64_t zpers_pg_fsdirty; /* # dirty fs pages flushed */
+#endif
+} zone_persist_t;
+
+typedef enum zone_pageout_op {
+ ZPO_DIRTY, ZPO_FS, ZPO_ANON, ZPO_ANONDIRTY
+} zone_pageout_op_t;
+
+/*
* Special value of zone_psetid to indicate that pools are disabled.
*/
#define ZONE_PS_INVAL PS_MYID
@@ -668,6 +826,7 @@ extern zone_t *zone_find_by_name(char *);
extern zone_t *zone_find_by_any_path(const char *, boolean_t);
extern zone_t *zone_find_by_path(const char *);
extern zoneid_t getzoneid(void);
+extern zoneid_t getzonedid(void);
extern zone_t *zone_find_by_id_nolock(zoneid_t);
extern int zone_datalink_walk(zoneid_t, int (*)(datalink_id_t, void *), void *);
extern int zone_check_datalink(zoneid_t *, datalink_id_t);
@@ -802,7 +961,7 @@ struct zsd_entry {
* NOTE: Using the VN_ prefix, even though it's defined here in zone.h.
* NOTE2: See above warning about ZONE_ROOTVP().
*/
-#define VN_IS_CURZONEROOT(vp) (VN_CMP(vp, ZONE_ROOTVP()))
+#define VN_IS_CURZONEROOT(vp) (VN_CMP(vp, ZONE_ROOTVP()))
/*
* Zone-safe version of thread_create() to be used when the caller wants to
@@ -868,6 +1027,7 @@ extern int zone_ncpus_online_get(zone_t *);
* Returns true if the named pool/dataset is visible in the current zone.
*/
extern int zone_dataset_visible(const char *, int *);
+extern int zone_dataset_visible_inzone(zone_t *, const char *, int *);
/*
* zone version of kadmin()
@@ -880,10 +1040,25 @@ extern void mount_completed(zone_t *);
extern int zone_walk(int (*)(zone_t *, void *), void *);
+struct page;
+extern void zone_add_page(struct page *);
+extern void zone_rm_page(struct page *);
+extern void zone_pageout_stat(int, zone_pageout_op_t);
+extern void zone_get_physmem_data(int, pgcnt_t *, pgcnt_t *);
+
+/* Interfaces for page scanning */
+extern uint_t zone_num_over_cap;
+extern zone_persist_t zone_pdata[MAX_ZONES];
+
extern rctl_hndl_t rc_zone_locked_mem;
extern rctl_hndl_t rc_zone_max_swap;
+extern rctl_hndl_t rc_zone_phys_mem;
extern rctl_hndl_t rc_zone_max_lofi;
+/* For publishing sysevents related to a particular zone */
+extern void zone_sysevent_publish(zone_t *, const char *, const char *,
+ nvlist_t *);
+
#endif /* _KERNEL */
#ifdef __cplusplus