diff options
Diffstat (limited to 'usr/src/uts/common/sys')
88 files changed, 3052 insertions, 156 deletions
diff --git a/usr/src/uts/common/sys/Makefile b/usr/src/uts/common/sys/Makefile index ed02832500..779e388f83 100644 --- a/usr/src/uts/common/sys/Makefile +++ b/usr/src/uts/common/sys/Makefile @@ -22,6 +22,7 @@ # Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. # Copyright 2014, Joyent, Inc. All rights reserved. # Copyright 2013 Garrett D'Amore <garrett@damore.org> +# Copyright 2015, Joyent, Inc. All rights reserved. # Copyright 2013 Saso Kiselkov. All rights reserved. # Copyright 2015 Nexenta Systems, Inc. All rights reserved. # Copyright 2015 Igor Kozhukhov <ikozhukhov@gmail.com> @@ -249,6 +250,7 @@ CHKHDRS= \ flock.h \ flock_impl.h \ fork.h \ + frameio.h \ fss.h \ fsspriocntl.h \ fsid.h \ @@ -274,6 +276,7 @@ CHKHDRS= \ idmap.h \ ieeefp.h \ id_space.h \ + inotify.h \ instance.h \ int_const.h \ int_fmtio.h \ @@ -342,6 +345,7 @@ CHKHDRS= \ lgrp.h \ lgrp_user.h \ libc_kernel.h \ + limits.h \ link.h \ list.h \ list_impl.h \ @@ -422,6 +426,9 @@ CHKHDRS= \ ontrap.h \ open.h \ openpromio.h \ + overlay.h \ + overlay_common.h \ + overlay_target.h \ panic.h \ param.h \ pathconf.h \ @@ -509,6 +516,7 @@ CHKHDRS= \ sid.h \ siginfo.h \ signal.h \ + signalfd.h \ skein.h \ sleepq.h \ smbios.h \ @@ -643,6 +651,8 @@ CHKHDRS= \ vmem.h \ vmem_impl.h \ vmsystm.h \ + vnd.h \ + vnd_errno.h \ vnic.h \ vnic_impl.h \ vnode.h \ @@ -654,12 +664,14 @@ CHKHDRS= \ vuid_queue.h \ vuid_state.h \ vuid_store.h \ + vxlan.h \ wait.h \ waitq.h \ wanboot_impl.h \ watchpoint.h \ winlockio.h \ zcons.h \ + zfd.h \ zone.h \ xti_inet.h \ xti_osi.h \ @@ -853,13 +865,14 @@ FSHDRS= \ autofs.h \ decomp.h \ dv_node.h \ - sdev_impl.h \ fifonode.h \ hsfs_isospec.h \ hsfs_node.h \ hsfs_rrip.h \ hsfs_spec.h \ hsfs_susp.h \ + hyprlofs.h \ + hyprlofs_info.h \ lofs_info.h \ lofs_node.h \ mntdata.h \ @@ -869,6 +882,8 @@ FSHDRS= \ pc_label.h \ pc_node.h \ pxfs_ki.h \ + sdev_impl.h \ + sdev_plugin.h \ snode.h \ swapnode.h \ tmp.h \ @@ -992,6 +1007,7 @@ SATAGENHDRS= \ SYSEVENTHDRS= \ ap_driver.h \ + datalink.h \ dev.h \ domain.h \ dr.h \ diff --git a/usr/src/uts/common/sys/aggr_impl.h b/usr/src/uts/common/sys/aggr_impl.h index 547c9cc241..a4c0409304 100644 --- a/usr/src/uts/common/sys/aggr_impl.h +++ b/usr/src/uts/common/sys/aggr_impl.h @@ -21,6 +21,7 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2012 OmniTI Computer Consulting, Inc All rights reserved. */ #ifndef _SYS_AGGR_IMPL_H @@ -308,6 +309,8 @@ extern boolean_t aggr_port_notify_link(aggr_grp_t *, aggr_port_t *); extern void aggr_port_init_callbacks(aggr_port_t *); extern void aggr_recv_cb(void *, mac_resource_handle_t, mblk_t *, boolean_t); +extern void aggr_recv_promisc_cb(void *, mac_resource_handle_t, mblk_t *, + boolean_t); extern void aggr_tx_ring_update(void *, uintptr_t); extern void aggr_tx_notify_thread(void *); diff --git a/usr/src/uts/common/sys/auxv.h b/usr/src/uts/common/sys/auxv.h index 3a2e705850..e875cd10d9 100644 --- a/usr/src/uts/common/sys/auxv.h +++ b/usr/src/uts/common/sys/auxv.h @@ -29,7 +29,7 @@ * Use is subject to license terms. */ /* - * Copyright (c) 2012, Joyent, Inc. All rights reserved. + * Copyright (c) 2014, Joyent, Inc. All rights reserved. */ #ifndef _SYS_AUXV_H @@ -78,6 +78,9 @@ typedef struct { #define AT_FLAGS 8 /* processor flags */ #define AT_ENTRY 9 /* a.out entry point */ +/* First introduced on Linux */ +#define AT_RANDOM 25 /* address of 16 random bytes */ + /* * These relate to the original PPC ABI document; Linux reused * the values for other things (see below), so disambiguation of @@ -110,6 +113,16 @@ typedef struct { * AT_UCACHEBSIZE 21 (moved from 12) * * AT_IGNOREPPC 22 + * + * On Linux: + * AT_* values 18 through 22 are reserved + * AT_SECURE 23 secure mode boolean + * AT_BASE_PLATFORM 24 string identifying real platform, may + * differ from AT_PLATFORM. + * AT_HWCAP2 26 extension of AT_HWCAP + * AT_EXECFN 31 filename of program + * AT_SYSINFO 32 + * AT_SYSINFO_EHDR 33 The vDSO location */ /* @@ -186,6 +199,8 @@ extern uint_t getisax(uint32_t *, uint_t); #define AT_SUN_BRAND_AUX1 2020 #define AT_SUN_BRAND_AUX2 2021 #define AT_SUN_BRAND_AUX3 2022 +#define AT_SUN_BRAND_AUX4 2025 +#define AT_SUN_BRAND_NROOT 2024 /* * Note that 2023 is reserved for the AT_SUN_HWCAP2 word defined above. diff --git a/usr/src/uts/common/sys/brand.h b/usr/src/uts/common/sys/brand.h index badc3faff8..f5a46182b9 100644 --- a/usr/src/uts/common/sys/brand.h +++ b/usr/src/uts/common/sys/brand.h @@ -21,6 +21,7 @@ /* * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2015, Joyent, Inc. */ #ifndef _SYS_BRAND_H @@ -102,29 +103,99 @@ struct brand_mach_ops; struct intpdata; struct execa; +/* + * Common structure to define hooks for brand operation. + * + * Required Fields: + * b_init_brand_data - Setup zone brand data during zone_setbrand + * b_free_brand_data - Free zone brand data during zone_destroy + * b_brandsys - Syscall handler for brandsys + * b_setbrand - Initialize process brand data + * b_getattr - Get brand-custom zone attribute + * b_setattr - Set brand-custom zone attribute + * b_copy_procdata - Copy process brand data during fork + * b_proc_exit - Perform process brand exit processing + * b_exec - Reset branded process state on exec + * b_lwp_setrval - Set return code for forked child + * b_initlwp - Initialize lwp brand data (cannot drop p->p_lock) + * b_forklwp - Copy lwp brand data during fork + * b_freelwp - Free lwp brand data + * b_lwpexit - Perform lwp-specific brand exit processing + * b_elfexec - Load and execute ELF binary + * b_sigset_native_to_brand - Convert sigset native->brand + * b_sigset_brand_to_native - Convert sigset brand->native + * b_nsig - Maxiumum signal number + * b_sendsig - Update process state after sendsig + * + * Optional Fields: + * b_lwpdata_alloc - Speculatively allocate data for use in b_initlwp + * b_lwpdata_free - Free data from allocated by b_lwpdata_alloc if errors occur + * during lwp creation before b_initlwp could be called. + * b_initlwp_post - Complete lwp branding (can temporarily drop p->p_lock) + * b_exit_with_sig - Instead of sending SIGCLD, exit with custom behavior + * b_psig_to_proc - Custom additional behavior during psig + * b_wait_filter - Filter processes from being matched by waitid + * b_native_exec - Provide interpreter path prefix for executables + * b_ptrace_exectrap - Custom behavior for legacy ptrace traps + * b_map32limit - Specify alternate limit for MAP_32BIT mappings + * b_stop_notify - Hook process stop events + * b_waitid_helper - Generate synthetic results for waitid + * b_sigcld_repost - Post synthetic SIGCLD signals + * b_issig_stop - Alter/suppress signal delivery during issig + * b_sig_ignorable - Disallow discarding of signals + * b_savecontext - Alter context during savecontext + * b_restorecontext - Alter context during restorecontext + * b_sendsig_stack - Override stack used for signal delivery + * b_setid_clear - Override setid_clear behavior + * b_pagefault - Trap pagefault events + */ struct brand_ops { void (*b_init_brand_data)(zone_t *); void (*b_free_brand_data)(zone_t *); int (*b_brandsys)(int, int64_t *, uintptr_t, uintptr_t, uintptr_t, - uintptr_t, uintptr_t, uintptr_t); + uintptr_t, uintptr_t); void (*b_setbrand)(struct proc *); int (*b_getattr)(zone_t *, int, void *, size_t *); int (*b_setattr)(zone_t *, int, void *, size_t); void (*b_copy_procdata)(struct proc *, struct proc *); - void (*b_proc_exit)(struct proc *, klwp_t *); + void (*b_proc_exit)(struct proc *); void (*b_exec)(); void (*b_lwp_setrval)(klwp_t *, int, int); - int (*b_initlwp)(klwp_t *); + void *(*b_lwpdata_alloc)(struct proc *); + void (*b_lwpdata_free)(void *); + void (*b_initlwp)(klwp_t *, void *); + void (*b_initlwp_post)(klwp_t *); void (*b_forklwp)(klwp_t *, klwp_t *); void (*b_freelwp)(klwp_t *); void (*b_lwpexit)(klwp_t *); int (*b_elfexec)(struct vnode *vp, struct execa *uap, struct uarg *args, struct intpdata *idata, int level, long *execsz, int setid, caddr_t exec_file, - struct cred *cred, int brand_action); + struct cred *cred, int *brand_action); void (*b_sigset_native_to_brand)(sigset_t *); void (*b_sigset_brand_to_native)(sigset_t *); + void (*b_sigfd_translate)(k_siginfo_t *); int b_nsig; + void (*b_exit_with_sig)(proc_t *, sigqueue_t *); + boolean_t (*b_wait_filter)(proc_t *, proc_t *); + boolean_t (*b_native_exec)(uint8_t, const char **); + uint32_t (*b_map32limit)(proc_t *); + void (*b_stop_notify)(proc_t *, klwp_t *, ushort_t, ushort_t); + int (*b_waitid_helper)(idtype_t, id_t, k_siginfo_t *, int, + boolean_t *, int *); + int (*b_sigcld_repost)(proc_t *, sigqueue_t *); + int (*b_issig_stop)(proc_t *, klwp_t *); + boolean_t (*b_sig_ignorable)(proc_t *, klwp_t *, int); + void (*b_savecontext)(ucontext_t *); +#if defined(_SYSCALL32_IMPL) + void (*b_savecontext32)(ucontext32_t *); +#endif + void (*b_restorecontext)(ucontext_t *); + caddr_t (*b_sendsig_stack)(int); + void (*b_sendsig)(int); + int (*b_setid_clear)(vattr_t *vap, cred_t *cr); + int (*b_pagefault)(proc_t *, klwp_t *, caddr_t, enum fault_type, + enum seg_rw); }; /* @@ -135,6 +206,7 @@ typedef struct brand { char *b_name; struct brand_ops *b_ops; struct brand_mach_ops *b_machops; + size_t b_data_size; } brand_t; extern brand_t native_brand; @@ -165,7 +237,7 @@ extern brand_t *brand_register_zone(struct brand_attr *); extern brand_t *brand_find_name(char *); extern void brand_unregister_zone(brand_t *); extern int brand_zone_count(brand_t *); -extern void brand_setbrand(proc_t *); +extern int brand_setbrand(proc_t *, boolean_t); extern void brand_clearbrand(proc_t *, boolean_t); /* @@ -178,17 +250,16 @@ extern int brand_solaris_cmd(int, uintptr_t, uintptr_t, uintptr_t, extern void brand_solaris_copy_procdata(proc_t *, proc_t *, struct brand *); extern int brand_solaris_elfexec(vnode_t *, execa_t *, uarg_t *, - intpdata_t *, int, long *, int, caddr_t, cred_t *, int, - struct brand *, char *, char *, char *, char *, char *); + intpdata_t *, int, long *, int, caddr_t, cred_t *, int *, + struct brand *, char *, char *, char *); extern void brand_solaris_exec(struct brand *); extern int brand_solaris_fini(char **, struct modlinkage *, struct brand *); extern void brand_solaris_forklwp(klwp_t *, klwp_t *, struct brand *); extern void brand_solaris_freelwp(klwp_t *, struct brand *); -extern int brand_solaris_initlwp(klwp_t *, struct brand *); +extern void brand_solaris_initlwp(klwp_t *, struct brand *); extern void brand_solaris_lwpexit(klwp_t *, struct brand *); -extern void brand_solaris_proc_exit(struct proc *, klwp_t *, - struct brand *); +extern void brand_solaris_proc_exit(struct proc *, struct brand *); extern void brand_solaris_setbrand(proc_t *, struct brand *); #if defined(_SYSCALL32) diff --git a/usr/src/uts/common/sys/buf.h b/usr/src/uts/common/sys/buf.h index a9191aed7c..cb8a6012fc 100644 --- a/usr/src/uts/common/sys/buf.h +++ b/usr/src/uts/common/sys/buf.h @@ -21,6 +21,7 @@ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2012 Joyent, Inc. All rights reserved. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -186,6 +187,7 @@ struct biostats { #define B_STARTED 0x2000000 /* io:::start probe called for buf */ #define B_ABRWRITE 0x4000000 /* Application based recovery active */ #define B_PAGE_NOWAIT 0x8000000 /* Skip the page if it is locked */ +#define B_INVALCURONLY 0x10000000 /* invalidate only for curproc */ /* * There is some confusion over the meaning of B_FREE and B_INVAL and what @@ -198,6 +200,12 @@ struct biostats { * between the sole use of these two flags. In both cases, IO will be done * if the page is not yet committed to storage. * + * The B_INVALCURONLY flag modifies the behavior of the B_INVAL flag and is + * intended to be used in conjunction with B_INVAL. B_INVALCURONLY has no + * meaning on its own. When both B_INVALCURONLY and B_INVAL are set, then + * the mapping for the page is only invalidated for the current process. + * In this case, the page is not destroyed unless this was the final mapping. + * * In order to discard pages without writing them back, (B_INVAL | B_TRUNC) * should be used. * diff --git a/usr/src/uts/common/sys/contract/process.h b/usr/src/uts/common/sys/contract/process.h index 21cf94dcf9..2c70d7c9f1 100644 --- a/usr/src/uts/common/sys/contract/process.h +++ b/usr/src/uts/common/sys/contract/process.h @@ -21,13 +21,12 @@ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2015 Joyent, Inc. */ #ifndef _SYS_CONTRACT_PROCESS_H #define _SYS_CONTRACT_PROCESS_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/contract.h> #include <sys/time.h> @@ -55,7 +54,8 @@ typedef struct cont_process cont_process_t; #define CT_PR_NOORPHAN 0x2 /* kill when contract is abandoned */ #define CT_PR_PGRPONLY 0x4 /* only kill process group on fatal errors */ #define CT_PR_REGENT 0x8 /* automatically detach inherited contracts */ -#define CT_PR_ALLPARAM 0xf +#define CT_PR_KEEP_EXEC 0x10 /* preserve template accross exec */ +#define CT_PR_ALLPARAM 0x1f /* * ctr_ev_* flags diff --git a/usr/src/uts/common/sys/cpucaps.h b/usr/src/uts/common/sys/cpucaps.h index 6063ff4380..6bc042108c 100644 --- a/usr/src/uts/common/sys/cpucaps.h +++ b/usr/src/uts/common/sys/cpucaps.h @@ -22,6 +22,7 @@ /* * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2011, 2012, Joyent, Inc. All rights reserved. */ #ifndef _SYS_CPUCAPS_H @@ -84,12 +85,16 @@ extern void cpucaps_zone_remove(zone_t *); */ extern int cpucaps_project_set(kproject_t *, rctl_qty_t); extern int cpucaps_zone_set(zone_t *, rctl_qty_t); +extern int cpucaps_zone_set_base(zone_t *, rctl_qty_t); +extern int cpucaps_zone_set_burst_time(zone_t *, rctl_qty_t); /* * Get current CPU usage for a project/zone. */ extern rctl_qty_t cpucaps_project_get(kproject_t *); extern rctl_qty_t cpucaps_zone_get(zone_t *); +extern rctl_qty_t cpucaps_zone_get_base(zone_t *); +extern rctl_qty_t cpucaps_zone_get_burst_time(zone_t *); /* * Scheduling class hooks into CPU caps framework. diff --git a/usr/src/uts/common/sys/cpucaps_impl.h b/usr/src/uts/common/sys/cpucaps_impl.h index 95afd21827..2cd4ed644d 100644 --- a/usr/src/uts/common/sys/cpucaps_impl.h +++ b/usr/src/uts/common/sys/cpucaps_impl.h @@ -22,6 +22,7 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2011, 2012, Joyent, Inc. All rights reserved. */ #ifndef _SYS_CPUCAPS_IMPL_H @@ -66,8 +67,12 @@ typedef struct cpucap { waitq_t cap_waitq; /* waitq for capped threads */ kstat_t *cap_kstat; /* cpucaps specific kstat */ int64_t cap_gen; /* zone cap specific */ + hrtime_t cap_chk_value; /* effective CPU usage cap */ hrtime_t cap_value; /* scaled CPU usage cap */ hrtime_t cap_usage; /* current CPU usage */ + hrtime_t cap_base; /* base CPU for burst */ + u_longlong_t cap_burst_limit; /* max secs (in tics) for a burst */ + u_longlong_t cap_bursting; /* # of ticks currently bursting */ disp_lock_t cap_usagelock; /* protects cap_usage above */ /* * Per cap statistics. @@ -75,6 +80,7 @@ typedef struct cpucap { hrtime_t cap_maxusage; /* maximum cap usage */ u_longlong_t cap_below; /* # of ticks spend below the cap */ u_longlong_t cap_above; /* # of ticks spend above the cap */ + u_longlong_t cap_above_base; /* # of ticks spent above the base */ } cpucap_t; /* diff --git a/usr/src/uts/common/sys/cred.h b/usr/src/uts/common/sys/cred.h index 5056f9a511..914f132dc0 100644 --- a/usr/src/uts/common/sys/cred.h +++ b/usr/src/uts/common/sys/cred.h @@ -93,6 +93,7 @@ extern gid_t crgetgid(const cred_t *); extern gid_t crgetrgid(const cred_t *); extern gid_t crgetsgid(const cred_t *); extern zoneid_t crgetzoneid(const cred_t *); +extern zoneid_t crgetzonedid(const cred_t *); extern projid_t crgetprojid(const cred_t *); extern cred_t *crgetmapped(const cred_t *); diff --git a/usr/src/uts/common/sys/ctf_api.h b/usr/src/uts/common/sys/ctf_api.h index 04d73c3181..bc99f67d3f 100644 --- a/usr/src/uts/common/sys/ctf_api.h +++ b/usr/src/uts/common/sys/ctf_api.h @@ -24,7 +24,7 @@ * Use is subject to license terms. */ /* - * Copyright (c) 2013, Joyent, Inc. All rights reserved. + * Copyright (c) 2015, Joyent, Inc. */ /* @@ -60,6 +60,65 @@ extern "C" { typedef struct ctf_file ctf_file_t; typedef long ctf_id_t; +#define ECTF_BASE 1000 /* base value for libctf errnos */ + +enum { + ECTF_FMT = ECTF_BASE, /* file is not in CTF or ELF format */ + ECTF_ELFVERS, /* ELF version is more recent than libctf */ + ECTF_CTFVERS, /* CTF version is more recent than libctf */ + ECTF_ENDIAN, /* data is different endian-ness than lib */ + ECTF_SYMTAB, /* symbol table uses invalid entry size */ + ECTF_SYMBAD, /* symbol table data buffer invalid */ + ECTF_STRBAD, /* string table data buffer invalid */ + ECTF_CORRUPT, /* file data corruption detected */ + ECTF_NOCTFDATA, /* ELF file does not contain CTF data */ + ECTF_NOCTFBUF, /* buffer does not contain CTF data */ + ECTF_NOSYMTAB, /* symbol table data is not available */ + ECTF_NOPARENT, /* parent CTF container is not available */ + ECTF_DMODEL, /* data model mismatch */ + ECTF_MMAP, /* failed to mmap a data section */ + ECTF_ZMISSING, /* decompression library not installed */ + ECTF_ZINIT, /* failed to initialize decompression library */ + ECTF_ZALLOC, /* failed to allocate decompression buffer */ + ECTF_DECOMPRESS, /* failed to decompress CTF data */ + ECTF_STRTAB, /* string table for this string is missing */ + ECTF_BADNAME, /* string offset is corrupt w.r.t. strtab */ + ECTF_BADID, /* invalid type ID number */ + ECTF_NOTSOU, /* type is not a struct or union */ + ECTF_NOTENUM, /* type is not an enum */ + ECTF_NOTSUE, /* type is not a struct, union, or enum */ + ECTF_NOTINTFP, /* type is not an integer or float */ + ECTF_NOTARRAY, /* type is not an array */ + ECTF_NOTREF, /* type does not reference another type */ + ECTF_NAMELEN, /* buffer is too small to hold type name */ + ECTF_NOTYPE, /* no type found corresponding to name */ + ECTF_SYNTAX, /* syntax error in type name */ + ECTF_NOTFUNC, /* symtab entry does not refer to a function */ + ECTF_NOFUNCDAT, /* no func info available for function */ + ECTF_NOTDATA, /* symtab entry does not refer to a data obj */ + ECTF_NOTYPEDAT, /* no type info available for object */ + ECTF_NOLABEL, /* no label found corresponding to name */ + ECTF_NOLABELDATA, /* file does not contain any labels */ + ECTF_NOTSUP, /* feature not supported */ + ECTF_NOENUMNAM, /* enum element name not found */ + ECTF_NOMEMBNAM, /* member name not found */ + ECTF_RDONLY, /* CTF container is read-only */ + ECTF_DTFULL, /* CTF type is full (no more members allowed) */ + ECTF_FULL, /* CTF container is full */ + ECTF_DUPMEMBER, /* duplicate member name definition */ + ECTF_CONFLICT, /* conflicting type definition present */ + ECTF_REFERENCED, /* type has outstanding references */ + ECTF_NOTDYN, /* type is not a dynamic type */ + ECTF_ELF, /* elf library failure */ + ECTF_MCHILD, /* cannot merge child container */ + ECTF_LABELEXISTS, /* label already exists */ + ECTF_LCONFLICT, /* merged labels conflict */ + ECTF_ZLIB, /* zlib library failure */ + ECTF_CONVBKERR, /* CTF conversion backend error */ + ECTF_CONVNOCSRC, /* No C source to convert from */ + ECTF_NOCONVBKEND /* No applicable conversion backend */ +}; + /* * If the debugger needs to provide the CTF library with a set of raw buffers * for use as the CTF data, symbol table, and string table, it can do so by @@ -143,19 +202,24 @@ typedef struct ctf_lblinfo { typedef int ctf_visit_f(const char *, ctf_id_t, ulong_t, int, void *); typedef int ctf_member_f(const char *, ctf_id_t, ulong_t, void *); typedef int ctf_enum_f(const char *, int, void *); -typedef int ctf_type_f(ctf_id_t, void *); +typedef int ctf_type_f(ctf_id_t, boolean_t, void *); typedef int ctf_label_f(const char *, const ctf_lblinfo_t *, void *); +typedef int ctf_function_f(const char *, ulong_t, ctf_funcinfo_t *, void *); +typedef int ctf_object_f(const char *, ctf_id_t, ulong_t, void *); +typedef int ctf_string_f(const char *, void *); extern ctf_file_t *ctf_bufopen(const ctf_sect_t *, const ctf_sect_t *, const ctf_sect_t *, int *); extern ctf_file_t *ctf_fdopen(int, int *); extern ctf_file_t *ctf_open(const char *, int *); extern ctf_file_t *ctf_create(int *); +extern ctf_file_t *ctf_fdcreate(int, int *); extern ctf_file_t *ctf_dup(ctf_file_t *); extern void ctf_close(ctf_file_t *); extern ctf_file_t *ctf_parent_file(ctf_file_t *); extern const char *ctf_parent_name(ctf_file_t *); +extern const char *ctf_parent_label(ctf_file_t *); extern int ctf_import(ctf_file_t *, ctf_file_t *); extern int ctf_setmodel(ctf_file_t *, int); @@ -165,15 +229,20 @@ extern void ctf_setspecific(ctf_file_t *, void *); extern void *ctf_getspecific(ctf_file_t *); extern int ctf_errno(ctf_file_t *); +extern uint_t ctf_flags(ctf_file_t *); extern const char *ctf_errmsg(int); extern int ctf_version(int); extern int ctf_func_info(ctf_file_t *, ulong_t, ctf_funcinfo_t *); +extern int ctf_func_info_by_id(ctf_file_t *, ctf_id_t, ctf_funcinfo_t *); extern int ctf_func_args(ctf_file_t *, ulong_t, uint_t, ctf_id_t *); +extern int ctf_func_args_by_id(ctf_file_t *, ctf_id_t, uint_t, ctf_id_t *); extern ctf_id_t ctf_lookup_by_name(ctf_file_t *, const char *); extern ctf_id_t ctf_lookup_by_symbol(ctf_file_t *, ulong_t); +extern char *ctf_symbol_name(ctf_file_t *, ulong_t, char *, size_t); + extern ctf_id_t ctf_type_resolve(ctf_file_t *, ctf_id_t); extern ssize_t ctf_type_lname(ctf_file_t *, ctf_id_t, char *, size_t); extern char *ctf_type_name(ctf_file_t *, ctf_id_t, char *, size_t); @@ -182,6 +251,7 @@ extern char *ctf_type_qname(ctf_file_t *, ctf_id_t, char *, size_t, extern ssize_t ctf_type_size(ctf_file_t *, ctf_id_t); extern ssize_t ctf_type_align(ctf_file_t *, ctf_id_t); extern int ctf_type_kind(ctf_file_t *, ctf_id_t); +extern const char *ctf_kind_name(ctf_file_t *, int); extern ctf_id_t ctf_type_reference(ctf_file_t *, ctf_id_t); extern ctf_id_t ctf_type_pointer(ctf_file_t *, ctf_id_t); extern int ctf_type_encoding(ctf_file_t *, ctf_id_t, ctf_encoding_t *); @@ -201,37 +271,50 @@ extern int ctf_label_info(ctf_file_t *, const char *, ctf_lblinfo_t *); extern int ctf_member_iter(ctf_file_t *, ctf_id_t, ctf_member_f *, void *); extern int ctf_enum_iter(ctf_file_t *, ctf_id_t, ctf_enum_f *, void *); -extern int ctf_type_iter(ctf_file_t *, ctf_type_f *, void *); +extern int ctf_type_iter(ctf_file_t *, boolean_t, ctf_type_f *, void *); extern int ctf_label_iter(ctf_file_t *, ctf_label_f *, void *); +extern int ctf_function_iter(ctf_file_t *, ctf_function_f *, void *); +extern int ctf_object_iter(ctf_file_t *, ctf_object_f *, void *); +extern int ctf_string_iter(ctf_file_t *, ctf_string_f *, void *); extern ctf_id_t ctf_add_array(ctf_file_t *, uint_t, const ctf_arinfo_t *); -extern ctf_id_t ctf_add_const(ctf_file_t *, uint_t, ctf_id_t); +extern ctf_id_t ctf_add_const(ctf_file_t *, uint_t, const char *, ctf_id_t); extern ctf_id_t ctf_add_enum(ctf_file_t *, uint_t, const char *); extern ctf_id_t ctf_add_float(ctf_file_t *, uint_t, const char *, const ctf_encoding_t *); extern ctf_id_t ctf_add_forward(ctf_file_t *, uint_t, const char *, uint_t); -extern ctf_id_t ctf_add_function(ctf_file_t *, uint_t, - const ctf_funcinfo_t *, const ctf_id_t *); +extern ctf_id_t ctf_add_funcptr(ctf_file_t *, uint_t, const ctf_funcinfo_t *, + const ctf_id_t *); extern ctf_id_t ctf_add_integer(ctf_file_t *, uint_t, const char *, const ctf_encoding_t *); -extern ctf_id_t ctf_add_pointer(ctf_file_t *, uint_t, ctf_id_t); +extern ctf_id_t ctf_add_pointer(ctf_file_t *, uint_t, const char *, ctf_id_t); extern ctf_id_t ctf_add_type(ctf_file_t *, ctf_file_t *, ctf_id_t); extern ctf_id_t ctf_add_typedef(ctf_file_t *, uint_t, const char *, ctf_id_t); -extern ctf_id_t ctf_add_restrict(ctf_file_t *, uint_t, ctf_id_t); +extern ctf_id_t ctf_add_restrict(ctf_file_t *, uint_t, const char *, ctf_id_t); extern ctf_id_t ctf_add_struct(ctf_file_t *, uint_t, const char *); extern ctf_id_t ctf_add_union(ctf_file_t *, uint_t, const char *); -extern ctf_id_t ctf_add_volatile(ctf_file_t *, uint_t, ctf_id_t); +extern ctf_id_t ctf_add_volatile(ctf_file_t *, uint_t, const char *, ctf_id_t); extern int ctf_add_enumerator(ctf_file_t *, ctf_id_t, const char *, int); -extern int ctf_add_member(ctf_file_t *, ctf_id_t, const char *, ctf_id_t); +extern int ctf_add_member(ctf_file_t *, ctf_id_t, const char *, ctf_id_t, + ulong_t); + + +extern int ctf_add_function(ctf_file_t *, ulong_t, const ctf_funcinfo_t *, + const ctf_id_t *); +extern int ctf_add_object(ctf_file_t *, ulong_t, ctf_id_t); +extern int ctf_add_label(ctf_file_t *, const char *, ctf_id_t, uint_t); extern int ctf_set_array(ctf_file_t *, ctf_id_t, const ctf_arinfo_t *); +extern int ctf_set_root(ctf_file_t *, ctf_id_t, const boolean_t); +extern int ctf_set_size(ctf_file_t *, ctf_id_t, const ulong_t); extern int ctf_delete_type(ctf_file_t *, ctf_id_t); extern int ctf_update(ctf_file_t *); extern int ctf_discard(ctf_file_t *); extern int ctf_write(ctf_file_t *, int); +extern void ctf_dataptr(ctf_file_t *, const void **, size_t *); #ifdef _KERNEL diff --git a/usr/src/uts/common/sys/dktp/dadk.h b/usr/src/uts/common/sys/dktp/dadk.h index f5c990e7c0..2178ad1f0d 100644 --- a/usr/src/uts/common/sys/dktp/dadk.h +++ b/usr/src/uts/common/sys/dktp/dadk.h @@ -65,6 +65,8 @@ struct dadk { kstat_t *dad_errstats; /* error stats */ kmutex_t dad_cmd_mutex; int dad_cmd_count; + uint32_t dad_err_cnt; /* number of recent errors */ + hrtime_t dad_last_log; /* time of last error log */ }; #define DAD_SECSIZ dad_phyg.g_secsiz diff --git a/usr/src/uts/common/sys/dld.h b/usr/src/uts/common/sys/dld.h index fb2a0749d3..4cd93be56e 100644 --- a/usr/src/uts/common/sys/dld.h +++ b/usr/src/uts/common/sys/dld.h @@ -21,6 +21,7 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2011 Joyent, Inc. All rights reserved. */ #ifndef _SYS_DLD_H @@ -191,6 +192,7 @@ typedef struct dld_ioc_rename { datalink_id_t dir_linkid1; datalink_id_t dir_linkid2; char dir_link[MAXLINKNAMELEN]; + boolean_t dir_zoneinit; } dld_ioc_rename_t; /* @@ -203,6 +205,7 @@ typedef struct dld_ioc_rename { typedef struct dld_ioc_zid { zoneid_t diz_zid; datalink_id_t diz_linkid; + boolean_t diz_transient; } dld_ioc_zid_t; /* @@ -350,6 +353,7 @@ typedef struct dld_hwgrpinfo { */ typedef int (*dld_capab_func_t)(void *, uint_t, void *, uint_t); +#define DI_DIRECT_RAW 0x1 /* * Direct Tx/Rx capability. */ @@ -374,6 +378,9 @@ typedef struct dld_capab_direct_s { /* flow control "can I put on a ring" callback */ uintptr_t di_tx_fctl_df; /* canput-like callback */ void *di_tx_fctl_dh; + + /* flags that control our behavior */ + uint_t di_flags; } dld_capab_direct_t; /* diff --git a/usr/src/uts/common/sys/dld_impl.h b/usr/src/uts/common/sys/dld_impl.h index a76a927e59..81708aad38 100644 --- a/usr/src/uts/common/sys/dld_impl.h +++ b/usr/src/uts/common/sys/dld_impl.h @@ -53,7 +53,8 @@ typedef enum { typedef enum { DLD_UNINITIALIZED, DLD_PASSIVE, - DLD_ACTIVE + DLD_ACTIVE, + DLD_EXCLUSIVE } dld_passivestate_t; /* @@ -256,6 +257,8 @@ extern void dld_str_rx_unitdata(void *, mac_resource_handle_t, extern void dld_str_notify_ind(dld_str_t *); extern mac_tx_cookie_t str_mdata_fastpath_put(dld_str_t *, mblk_t *, uintptr_t, uint16_t); +extern mac_tx_cookie_t str_mdata_raw_fastpath_put(dld_str_t *, mblk_t *, + uintptr_t, uint16_t); extern int dld_flow_ctl_callb(dld_str_t *, uint64_t, int (*func)(), void *); diff --git a/usr/src/uts/common/sys/dld_ioc.h b/usr/src/uts/common/sys/dld_ioc.h index 2f519a8eda..093a4dc0c3 100644 --- a/usr/src/uts/common/sys/dld_ioc.h +++ b/usr/src/uts/common/sys/dld_ioc.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2015 Joyent, Inc. */ #ifndef _SYS_DLD_IOC_H @@ -59,6 +60,7 @@ extern "C" { #define IPTUN_IOC 0x454A #define BRIDGE_IOC 0xB81D #define IBPART_IOC 0x6171 +#define OVERLAY_IOC 0x2005 /* GLDv3 modules use these macros to generate unique ioctl commands */ #define DLDIOC(cmdid) DLD_IOC_CMD(DLD_IOC, (cmdid)) @@ -68,6 +70,7 @@ extern "C" { #define IPTUNIOC(cmdid) DLD_IOC_CMD(IPTUN_IOC, (cmdid)) #define BRIDGEIOC(cmdid) DLD_IOC_CMD(BRIDGE_IOC, (cmdid)) #define IBPARTIOC(cmdid) DLD_IOC_CMD(IBPART_IOC, (cmdid)) +#define OVERLAYIOC(cmdid) DLD_IOC_CMD(OVERLAY_IOC, (cmdid)) #ifdef _KERNEL diff --git a/usr/src/uts/common/sys/dlpi.h b/usr/src/uts/common/sys/dlpi.h index 5bc2bd41c5..dddac5b878 100644 --- a/usr/src/uts/common/sys/dlpi.h +++ b/usr/src/uts/common/sys/dlpi.h @@ -107,6 +107,7 @@ typedef struct dl_ipnetinfo { #define DL_PASSIVE_REQ 0x114 /* Allow access to aggregated link */ #define DL_INTR_MODE_REQ 0x115 /* Request Rx processing in INTR mode */ #define DL_NOTIFY_CONF 0x116 /* Notification from upstream */ +#define DL_EXCLUSIVE_REQ 0x117 /* Make bind active */ /* * Primitives used for Connectionless Service @@ -388,6 +389,7 @@ typedef struct dl_ipnetinfo { #define DL_PROMISC_PHYS 0x01 /* promiscuous mode at phys level */ #define DL_PROMISC_SAP 0x02 /* promiscuous mode at sap level */ #define DL_PROMISC_MULTI 0x03 /* promiscuous mode for multicast */ +#define DL_PROMISC_RX_ONLY 0x04 /* above only enabled for rx */ /* * DLPI notification codes for DL_NOTIFY_REQ primitives. @@ -1107,6 +1109,13 @@ typedef struct { } dl_intr_mode_req_t; /* + * DL_EXCLUSIVE_REQ, M_PROTO type + */ +typedef struct { + t_uscalar_t dl_primitive; +} dl_exclusive_req_t; + +/* * CONNECTION-ORIENTED SERVICE PRIMITIVES */ @@ -1528,6 +1537,7 @@ union DL_primitives { dl_control_ack_t control_ack; dl_passive_req_t passive_req; dl_intr_mode_req_t intr_mode_req; + dl_exclusive_req_t exclusive_req; }; #define DL_INFO_REQ_SIZE sizeof (dl_info_req_t) @@ -1596,6 +1606,7 @@ union DL_primitives { #define DL_CONTROL_ACK_SIZE sizeof (dl_control_ack_t) #define DL_PASSIVE_REQ_SIZE sizeof (dl_passive_req_t) #define DL_INTR_MODE_REQ_SIZE sizeof (dl_intr_mode_req_t) +#define DL_EXCLUSIVE_REQ_SIZE sizeof (dl_exclusive_req_t) #ifdef _KERNEL /* diff --git a/usr/src/uts/common/sys/dls.h b/usr/src/uts/common/sys/dls.h index 6bd2bbe35a..155cad8bc9 100644 --- a/usr/src/uts/common/sys/dls.h +++ b/usr/src/uts/common/sys/dls.h @@ -21,6 +21,7 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2015 Joyent, Inc. */ #ifndef _SYS_DLS_H @@ -85,6 +86,7 @@ typedef struct dls_link_s dls_link_t; #define DLS_PROMISC_SAP 0x00000001 #define DLS_PROMISC_MULTI 0x00000002 #define DLS_PROMISC_PHYS 0x00000004 +#define DLS_PROMISC_RX_ONLY 0x00000008 extern int dls_open(dls_link_t *, dls_dl_handle_t, dld_str_t *); extern void dls_close(dld_str_t *); @@ -106,11 +108,13 @@ extern void str_notify(void *, mac_notify_type_t); extern int dls_devnet_open(const char *, dls_dl_handle_t *, dev_t *); +extern int dls_devnet_open_in_zone(const char *, + dls_dl_handle_t *, dev_t *, zoneid_t); extern void dls_devnet_close(dls_dl_handle_t); extern boolean_t dls_devnet_rebuild(); extern int dls_devnet_rename(datalink_id_t, datalink_id_t, - const char *); + const char *, boolean_t); extern int dls_devnet_create(mac_handle_t, datalink_id_t, zoneid_t); extern int dls_devnet_destroy(mac_handle_t, datalink_id_t *, @@ -122,12 +126,13 @@ extern int dls_devnet_hold_by_dev(dev_t, dls_dl_handle_t *); extern void dls_devnet_rele(dls_dl_handle_t); extern void dls_devnet_prop_task_wait(dls_dl_handle_t); +extern const char *dls_devnet_link(dls_dl_handle_t); extern const char *dls_devnet_mac(dls_dl_handle_t); extern uint16_t dls_devnet_vid(dls_dl_handle_t); extern datalink_id_t dls_devnet_linkid(dls_dl_handle_t); extern int dls_devnet_dev2linkid(dev_t, datalink_id_t *); extern int dls_devnet_phydev(datalink_id_t, dev_t *); -extern int dls_devnet_setzid(dls_dl_handle_t, zoneid_t); +extern int dls_devnet_setzid(dls_dl_handle_t, zoneid_t, boolean_t); extern zoneid_t dls_devnet_getzid(dls_dl_handle_t); extern zoneid_t dls_devnet_getownerzid(dls_dl_handle_t); extern boolean_t dls_devnet_islinkvisible(datalink_id_t, zoneid_t); @@ -141,6 +146,8 @@ extern int dls_mgmt_update(const char *, uint32_t, boolean_t, extern int dls_mgmt_get_linkinfo(datalink_id_t, char *, datalink_class_t *, uint32_t *, uint32_t *); extern int dls_mgmt_get_linkid(const char *, datalink_id_t *); +extern int dls_mgmt_get_linkid_in_zone(const char *, + datalink_id_t *, zoneid_t); extern datalink_id_t dls_mgmt_get_next(datalink_id_t, datalink_class_t, datalink_media_t, uint32_t); extern int dls_devnet_macname2linkid(const char *, diff --git a/usr/src/uts/common/sys/dls_impl.h b/usr/src/uts/common/sys/dls_impl.h index 60f51c47b5..329f8dd08e 100644 --- a/usr/src/uts/common/sys/dls_impl.h +++ b/usr/src/uts/common/sys/dls_impl.h @@ -21,6 +21,7 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2015 Joyent, Inc. */ #ifndef _SYS_DLS_IMPL_H @@ -46,11 +47,12 @@ typedef struct dls_multicst_addr_s { } dls_multicst_addr_t; struct dls_link_s { /* Protected by */ - char dl_name[MAXNAMELEN]; /* SL */ + char dl_name[MAXNAMELEN]; /* RO */ uint_t dl_ddi_instance; /* SL */ mac_handle_t dl_mh; /* SL */ mac_client_handle_t dl_mch; /* SL */ mac_unicast_handle_t dl_mah; /* SL */ + mac_notify_handle_t dl_mnh; /* SL */ const mac_info_t *dl_mip; /* SL */ uint_t dl_ref; /* SL */ mod_hash_t *dl_str_hash; /* SL, modhash lock */ @@ -61,6 +63,7 @@ struct dls_link_s { /* Protected by */ uint_t dl_zone_ref; link_tagmode_t dl_tagmode; /* atomic */ uint_t dl_nonip_cnt; /* SL */ + uint_t dl_exclusive; /* SL */ }; typedef struct dls_head_s { @@ -96,13 +99,16 @@ extern void dls_create_str_kstats(dld_str_t *); extern int dls_stat_update(kstat_t *, dls_link_t *, int); extern int dls_stat_create(const char *, int, const char *, zoneid_t, int (*)(struct kstat *, int), void *, - kstat_t **); + kstat_t **, zoneid_t); +extern void dls_stat_delete(kstat_t *); extern int dls_devnet_open_by_dev(dev_t, dls_link_t **, dls_dl_handle_t *); extern int dls_devnet_hold_link(datalink_id_t, dls_dl_handle_t *, dls_link_t **); extern void dls_devnet_rele_link(dls_dl_handle_t, dls_link_t *); +extern int dls_devnet_hold_tmp_by_link(dls_link_t *, + dls_dl_handle_t *); extern void dls_init(void); extern int dls_fini(void); @@ -126,6 +132,7 @@ extern void dls_mgmt_init(void); extern void dls_mgmt_fini(void); extern int dls_mgmt_get_phydev(datalink_id_t, dev_t *); +extern int dls_exclusive_set(dld_str_t *, boolean_t); #ifdef __cplusplus } diff --git a/usr/src/uts/common/sys/dls_mgmt.h b/usr/src/uts/common/sys/dls_mgmt.h index b4032c24d6..214e225ac9 100644 --- a/usr/src/uts/common/sys/dls_mgmt.h +++ b/usr/src/uts/common/sys/dls_mgmt.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2015, Joyent, Inc. */ #ifndef _DLS_MGMT_H @@ -46,13 +47,15 @@ typedef enum { DATALINK_CLASS_SIMNET = 0x20, DATALINK_CLASS_BRIDGE = 0x40, DATALINK_CLASS_IPTUN = 0x80, - DATALINK_CLASS_PART = 0x100 + DATALINK_CLASS_PART = 0x100, + DATALINK_CLASS_OVERLAY = 0x200 } datalink_class_t; #define DATALINK_CLASS_ALL (DATALINK_CLASS_PHYS | \ DATALINK_CLASS_VLAN | DATALINK_CLASS_AGGR | DATALINK_CLASS_VNIC | \ DATALINK_CLASS_ETHERSTUB | DATALINK_CLASS_SIMNET | \ - DATALINK_CLASS_BRIDGE | DATALINK_CLASS_IPTUN | DATALINK_CLASS_PART) + DATALINK_CLASS_BRIDGE | DATALINK_CLASS_IPTUN | DATALINK_CLASS_PART | \ + DATALINK_CLASS_OVERLAY) /* * A combination of flags and media. @@ -165,6 +168,7 @@ typedef struct dlmgmt_door_getname { typedef struct dlmgmt_door_getlinkid { int ld_cmd; char ld_link[MAXLINKNAMELEN]; + zoneid_t ld_zoneid; } dlmgmt_door_getlinkid_t; typedef struct dlmgmt_door_getnext_s { @@ -225,6 +229,7 @@ typedef struct dlmgmt_getattr_retval_s { char lr_attrval[MAXLINKATTRVALLEN]; } dlmgmt_getattr_retval_t; + #ifdef __cplusplus } #endif diff --git a/usr/src/uts/common/sys/exec.h b/usr/src/uts/common/sys/exec.h index d36bc20481..38822cbbb0 100644 --- a/usr/src/uts/common/sys/exec.h +++ b/usr/src/uts/common/sys/exec.h @@ -26,6 +26,10 @@ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ +/* + * Copyright 2015, Joyent, Inc. + */ + #ifndef _SYS_EXEC_H #define _SYS_EXEC_H @@ -102,10 +106,12 @@ typedef struct uarg { vnode_t *ex_vp; char *emulator; char *brandname; + const char *brand_nroot; char *auxp_auxflags; /* addr of auxflags auxv on the user stack */ char *auxp_brand; /* address of first brand auxv on user stack */ cred_t *pfcred; boolean_t scrubenv; + uintptr_t maxstack; } uarg_t; /* @@ -157,10 +163,11 @@ typedef struct uarg { #endif #define INTPSZ MAXPATHLEN +#define INTP_MAXDEPTH 5 /* Nested interpreter depth matches Linux */ typedef struct intpdata { char *intp; - char *intp_name; - char *intp_arg; + char *intp_name[INTP_MAXDEPTH]; + char *intp_arg[INTP_MAXDEPTH]; } intpdata_t; #define EXECSETID_SETID 0x1 /* setid exec */ @@ -174,7 +181,7 @@ struct execsw { int (*exec_func)(struct vnode *vp, struct execa *uap, struct uarg *args, struct intpdata *idata, int level, long *execsz, int setid, caddr_t exec_file, - struct cred *cred, int brand_action); + struct cred *cred, int *brand_action); int (*exec_core)(struct vnode *vp, struct proc *p, struct cred *cred, rlim64_t rlimit, int sig, core_content_t content); @@ -212,7 +219,7 @@ extern int exec_common(const char *fname, const char **argp, const char **envp, int brand_action); extern int gexec(vnode_t **vp, struct execa *uap, struct uarg *args, struct intpdata *idata, int level, long *execsz, caddr_t exec_file, - struct cred *cred, int brand_action); + struct cred *cred, int *brand_action); extern struct execsw *allocate_execsw(char *name, char *magic, size_t magic_size); extern struct execsw *findexecsw(char *magic); @@ -237,16 +244,18 @@ extern void exec_set_sp(size_t); * when compiling the 32-bit compatability elf code in the elfexec module. */ extern int elfexec(vnode_t *, execa_t *, uarg_t *, intpdata_t *, int, - long *, int, caddr_t, cred_t *, int); + long *, int, caddr_t, cred_t *, int *); extern int mapexec_brand(vnode_t *, uarg_t *, Ehdr *, Addr *, - intptr_t *, caddr_t, int *, caddr_t *, caddr_t *, size_t *, uintptr_t *); + intptr_t *, caddr_t, char **, caddr_t *, caddr_t *, size_t *, + uintptr_t *, uintptr_t *); #endif /* !_ELF32_COMPAT */ #if defined(_LP64) extern int elf32exec(vnode_t *, execa_t *, uarg_t *, intpdata_t *, int, - long *, int, caddr_t, cred_t *, int); + long *, int, caddr_t, cred_t *, int *); extern int mapexec32_brand(vnode_t *, uarg_t *, Elf32_Ehdr *, Elf32_Addr *, - intptr_t *, caddr_t, int *, caddr_t *, caddr_t *, size_t *, uintptr_t *); + intptr_t *, caddr_t, char **, caddr_t *, caddr_t *, size_t *, + uintptr_t *, uintptr_t *); #endif /* _LP64 */ /* diff --git a/usr/src/uts/common/sys/fcntl.h b/usr/src/uts/common/sys/fcntl.h index aa74cab8b5..5ddade90e4 100644 --- a/usr/src/uts/common/sys/fcntl.h +++ b/usr/src/uts/common/sys/fcntl.h @@ -37,6 +37,7 @@ */ /* Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved. */ +/* Copyright 2015, Joyent, Inc. */ #ifndef _SYS_FCNTL_H #define _SYS_FCNTL_H @@ -145,6 +146,7 @@ extern "C" { * the large and small file environments; therefore, the #defined values must * as well. * The NBMAND forms are private and should not be used. + * The FLOCK forms are also private and should not be used. */ #if defined(_LP64) || _FILE_OFFSET_BITS == 32 @@ -155,6 +157,13 @@ extern "C" { #define F_FREESP 11 /* Free file space */ #define F_GETLK 14 /* Get file lock */ #define F_SETLK_NBMAND 42 /* private */ +#if !defined(_STRICT_SYMBOLS) +#define F_OFD_GETLK 47 /* Get file lock owned by file */ +#define F_OFD_SETLK 48 /* Set file lock owned by file */ +#define F_OFD_SETLKW 49 /* Set file lock owned by file and wait */ +#define F_FLOCK 53 /* private - set flock owned by file */ +#define F_FLOCKW 54 /* private - set flock owned by file and wait */ +#endif /* _STRICT_SYMBOLS */ #else /* ILP32 large file application compilation environment version */ #define F_SETLK 34 /* Set file lock */ @@ -163,6 +172,13 @@ extern "C" { #define F_FREESP 27 /* Free file space */ #define F_GETLK 33 /* Get file lock */ #define F_SETLK_NBMAND 44 /* private */ +#if !defined(_STRICT_SYMBOLS) +#define F_OFD_GETLK 50 /* Get file lock owned by file */ +#define F_OFD_SETLK 51 /* Set file lock owned by file */ +#define F_OFD_SETLKW 52 /* Set file lock owned by file and wait */ +#define F_FLOCK 55 /* private - set flock owned by file */ +#define F_FLOCKW 56 /* private - set flock owned by file and wait */ +#endif /* _STRICT_SYMBOLS */ #endif /* _LP64 || _FILE_OFFSET_BITS == 32 */ #if defined(_LARGEFILE64_SOURCE) @@ -180,6 +196,13 @@ extern "C" { #define F_FREESP64 27 /* Free file space */ #define F_GETLK64 33 /* Get file lock */ #define F_SETLK64_NBMAND 44 /* private */ +#if !defined(_STRICT_SYMBOLS) +#define F_OFD_GETLK64 50 /* Get file lock owned by file */ +#define F_OFD_SETLK64 51 /* Set file lock owned by file */ +#define F_OFD_SETLKW64 52 /* Set file lock owned by file and wait */ +#define F_FLOCK64 55 /* private - set flock owned by file */ +#define F_FLOCKW64 56 /* private - set flock owned by file and wait */ +#endif /* _STRICT_SYMBOLS */ #else #define F_SETLK64 6 /* Set file lock */ #define F_SETLKW64 7 /* Set file lock and wait */ @@ -187,6 +210,13 @@ extern "C" { #define F_FREESP64 11 /* Free file space */ #define F_GETLK64 14 /* Get file lock */ #define F_SETLK64_NBMAND 42 /* private */ +#if !defined(_STRICT_SYMBOLS) +#define F_OFD_GETLK64 47 /* Get file lock owned by file */ +#define F_OFD_SETLK64 48 /* Set file lock owned by file */ +#define F_OFD_SETLKW64 49 /* Set file lock owned by file and wait */ +#define F_FLOCK64 53 /* private - set flock owned by file */ +#define F_FLOCKW64 54 /* private - set flock owned by file and wait */ +#endif /* _STRICT_SYMBOLS */ #endif /* !_LP64 || _KERNEL */ #endif /* _LARGEFILE64_SOURCE */ diff --git a/usr/src/uts/common/sys/file.h b/usr/src/uts/common/sys/file.h index 1f736d3d01..7e297042af 100644 --- a/usr/src/uts/common/sys/file.h +++ b/usr/src/uts/common/sys/file.h @@ -27,6 +27,7 @@ /* All Rights Reserved */ /* Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved. */ +/* Copyright 2015 Joyent, Inc. */ #ifndef _SYS_FILE_H #define _SYS_FILE_H @@ -54,7 +55,7 @@ extern "C" { */ /* * One file structure is allocated for each open/creat/pipe call. - * Main use is to hold the read/write pointer associated with + * Main use is to hold the read/write pointer (and OFD locks) associated with * each open file. */ typedef struct file { @@ -66,6 +67,7 @@ typedef struct file { struct cred *f_cred; /* credentials of user who opened it */ struct f_audit_data *f_audit_data; /* file audit data */ int f_count; /* reference count */ + struct filock *f_filock; /* ptr to single lock_descriptor_t */ } file_t; /* @@ -173,6 +175,19 @@ typedef struct fpollinfo { #define L_SET 0 /* for lseek */ #endif /* L_SET */ +/* + * For flock(3C). These really don't belong here but for historical reasons + * the interface defines them to be here. + */ +#define LOCK_SH 1 +#define LOCK_EX 2 +#define LOCK_NB 4 +#define LOCK_UN 8 + +#if !defined(_STRICT_SYMBOLS) +extern int flock(int, int); +#endif + #if defined(_KERNEL) /* diff --git a/usr/src/uts/common/sys/flock.h b/usr/src/uts/common/sys/flock.h index e0cc609960..3d4b3626b9 100644 --- a/usr/src/uts/common/sys/flock.h +++ b/usr/src/uts/common/sys/flock.h @@ -29,6 +29,7 @@ */ /* * Copyright 2013 Nexenta Systems, Inc. All rights reserved. + * Copyright 2015 Joyent, Inc. */ #ifndef _SYS_FLOCK_H @@ -41,6 +42,9 @@ #include <sys/callb.h> #include <sys/param.h> #include <sys/zone.h> +#if defined(_KERNEL) +#include <sys/file.h> +#endif #ifdef __cplusplus extern "C" { @@ -221,6 +225,10 @@ typedef struct locklist { #define FLK_QUERY_ACTIVE 0x1 #define FLK_QUERY_SLEEPING 0x2 +#if defined(_KERNEL) +int ofdlock(file_t *, int, struct flock64 *, int, u_offset_t); +void ofdcleanlock(file_t *); +#endif int reclock(struct vnode *, struct flock64 *, int, int, u_offset_t, flk_callback_t *); int chklock(struct vnode *, int, u_offset_t, ssize_t, int, diff --git a/usr/src/uts/common/sys/flock_impl.h b/usr/src/uts/common/sys/flock_impl.h index d088192025..b1be1d598a 100644 --- a/usr/src/uts/common/sys/flock_impl.h +++ b/usr/src/uts/common/sys/flock_impl.h @@ -22,13 +22,12 @@ /* * Copyright 2004 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2015 Joyent, Inc. */ #ifndef _SYS_FLOCK_IMPL_H #define _SYS_FLOCK_IMPL_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/types.h> #include <sys/fcntl.h> /* flock definition */ #include <sys/file.h> /* FREAD etc */ @@ -83,6 +82,7 @@ struct lock_descriptor { flk_nlm_status_t l_nlm_state; /* state of NLM server */ flk_callback_t *l_callbacks; /* callbacks, or NULL */ zoneid_t l_zoneid; /* zone of request */ + file_t *l_ofd; /* OFD-style reference */ }; typedef struct lock_descriptor lock_descriptor_t; @@ -204,7 +204,8 @@ graph_t *flk_get_lock_graph(vnode_t *, int); #define SAME_OWNER(lock1, lock2) \ (((lock1)->l_flock.l_pid == (lock2)->l_flock.l_pid) && \ - ((lock1)->l_flock.l_sysid == (lock2)->l_flock.l_sysid)) + ((lock1)->l_flock.l_sysid == (lock2)->l_flock.l_sysid) && \ + ((lock1)->l_ofd == (lock2)->l_ofd)) #define COLORED(vertex) ((vertex)->l_color == (vertex)->l_graph->mark) #define COLOR(vertex) ((vertex)->l_color = (vertex)->l_graph->mark) diff --git a/usr/src/uts/common/sys/frameio.h b/usr/src/uts/common/sys/frameio.h new file mode 100644 index 0000000000..54e6dbeedf --- /dev/null +++ b/usr/src/uts/common/sys/frameio.h @@ -0,0 +1,107 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014, Joyent, Inc. All rights reserved. + */ + +#ifndef _SYS_FRAMEIO_H +#define _SYS_FRAMEIO_H + +/* + * Frame I/O definitions + */ + +#include <sys/types.h> + +#ifdef _KERNEL +/* Kernel only headers */ +#include <sys/stream.h> +#endif /* _KERNEL */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * An individual frame vector component. Collections of these are used to make + * ioctls. + */ +typedef struct framevec { + void *fv_buf; /* Buffer with data */ + size_t fv_buflen; /* Size of the buffer */ + size_t fv_actlen; /* Amount of buffer consumed, ignore on error */ +} framevec_t; + +/* + * The base unit used with frameio. + */ +typedef struct frameio { + uint_t fio_version; /* Should always be FRAMEIO_CURRENT_VERSION */ + uint_t fio_nvpf; /* How many vectors make up one frame */ + uint_t fio_nvecs; /* The total number of vectors */ + framevec_t fio_vecs[]; /* C99 VLA */ +} frameio_t; + + +#define FRAMEIO_VERSION_ONE 1 +#define FRAMEIO_CURRENT_VERSION FRAMEIO_VERSION_ONE + +#define FRAMEIO_NVECS_MAX 32 + +/* + * Definitions for kernel modules to include as helpers. These are consolidation + * private. + */ +#ifdef _KERNEL + +/* + * 32-bit versions for 64-bit kernels + */ +typedef struct framevec32 { + caddr32_t fv_buf; + size32_t fv_buflen; + size32_t fv_actlen; +} framevec32_t; + +typedef struct frameio32 { + uint_t fio_version; + uint_t fio_vecspframe; + uint_t fio_nvecs; + framevec32_t fio_vecs[]; +} frameio32_t; + +/* + * Describe the different ways that vectors should map to frames. + */ +typedef enum frameio_write_mblk_map { + MAP_BLK_FRAME +} frameio_write_mblk_map_t; + +int frameio_init(void); +void frameio_fini(void); +frameio_t *frameio_alloc(int); +void frameio_free(frameio_t *); +int frameio_hdr_copyin(frameio_t *, int, const void *, uint_t); +int frameio_mblk_chain_read(frameio_t *, mblk_t **, int *, int); +int frameio_mblk_chain_write(frameio_t *, frameio_write_mblk_map_t, mblk_t *, + int *, int); +int frameio_hdr_copyout(frameio_t *, int, void *, uint_t); +size_t frameio_frame_length(frameio_t *, framevec_t *); +void frameio_mark_consumed(frameio_t *, int); + +#endif /* _KERNEL */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_FRAMEIO_H */ diff --git a/usr/src/uts/common/sys/fs/hyprlofs.h b/usr/src/uts/common/sys/fs/hyprlofs.h new file mode 100644 index 0000000000..b8c4149df2 --- /dev/null +++ b/usr/src/uts/common/sys/fs/hyprlofs.h @@ -0,0 +1,91 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2012, Joyent, Inc. All rights reserved. + */ + +#ifndef _SYS_FS_HYPRLOFS_H +#define _SYS_FS_HYPRLOFS_H + +#include <sys/param.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * hyprlofs ioctl numbers. + */ +#define HYPRLOFS_IOC ('H' << 8) + +#define HYPRLOFS_ADD_ENTRIES (HYPRLOFS_IOC | 1) +#define HYPRLOFS_RM_ENTRIES (HYPRLOFS_IOC | 2) +#define HYPRLOFS_RM_ALL (HYPRLOFS_IOC | 3) +#define HYPRLOFS_GET_ENTRIES (HYPRLOFS_IOC | 4) + +typedef struct { + char *hle_path; + uint_t hle_plen; + char *hle_name; + uint_t hle_nlen; +} hyprlofs_entry_t; + +typedef struct { + hyprlofs_entry_t *hle_entries; + uint_t hle_len; +} hyprlofs_entries_t; + +typedef struct { + char hce_path[MAXPATHLEN]; + char hce_name[MAXPATHLEN]; +} hyprlofs_curr_entry_t; + +typedef struct { + hyprlofs_curr_entry_t *hce_entries; + uint_t hce_cnt; +} hyprlofs_curr_entries_t; + +#ifdef _KERNEL +typedef struct { + caddr32_t hle_path; + uint_t hle_plen; + caddr32_t hle_name; + uint_t hle_nlen; +} hyprlofs_entry32_t; + +typedef struct { + caddr32_t hle_entries; + uint_t hle_len; +} hyprlofs_entries32_t; + +typedef struct { + caddr32_t hce_entries; + uint_t hce_cnt; +} hyprlofs_curr_entries32_t; + +#endif /* _KERNEL */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_FS_HYPRLOFS_H */ diff --git a/usr/src/uts/common/sys/fs/hyprlofs_info.h b/usr/src/uts/common/sys/fs/hyprlofs_info.h new file mode 100644 index 0000000000..38389f77d9 --- /dev/null +++ b/usr/src/uts/common/sys/fs/hyprlofs_info.h @@ -0,0 +1,174 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2012, Joyent, Inc. All rights reserved. + */ + +#ifndef _SYS_FS_HYPRLOFS_INFO_H +#define _SYS_FS_HYPRLOFS_INFO_H + +#include <sys/t_lock.h> +#include <vm/seg.h> +#include <vm/seg_vn.h> +#include <sys/vfs_opreg.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * hlnode is the file system dependent node for hyprlofs. + * It is modeled on the tmpfs tmpnode. + * + * hln_rwlock protects access of the directory list at hln_dir + * as well as syncronizing read/writes to directory hlnodes. + * hln_tlock protects updates to hln_mode and hln_nlink. + * hln_tlock doesn't require any hlnode locks. + */ +typedef struct hlnode { + struct hlnode *hln_back; /* linked list of hlnodes */ + struct hlnode *hln_forw; /* linked list of hlnodes */ + union { + struct { + struct hldirent *un_dirlist; /* dirent list */ + uint_t un_dirents; /* number of dirents */ + } un_dirstruct; + vnode_t *un_realvp; /* real vnode */ + } un_hlnode; + vnode_t *hln_vnode; /* vnode for this hlnode */ + int hln_gen; /* pseudo gen num for hlfid */ + int hln_looped; /* flag indicating loopback */ + vattr_t hln_attr; /* attributes */ + krwlock_t hln_rwlock; /* rw - serialize mods and */ + /* directory updates */ + kmutex_t hln_tlock; /* time, flag, and nlink lock */ +} hlnode_t; + +/* + * hyprlofs per-mount data structure. + * All fields are protected by hlm_contents. + */ +typedef struct { + vfs_t *hlm_vfsp; /* filesystem's vfs struct */ + hlnode_t *hlm_rootnode; /* root hlnode */ + char *hlm_mntpath; /* name of hyprlofs mount point */ + dev_t hlm_dev; /* unique dev # of mounted `device' */ + uint_t hlm_gen; /* pseudo generation number for files */ + kmutex_t hlm_contents; /* lock for hlfsmount structure */ +} hlfsmount_t; + +/* + * hyprlofs directories are made up of a linked list of hldirent structures + * hanging off directory hlnodes. File names are not fixed length, + * but are null terminated. + */ +typedef struct hldirent { + hlnode_t *hld_hlnode; /* hlnode for this file */ + struct hldirent *hld_next; /* next directory entry */ + struct hldirent *hld_prev; /* prev directory entry */ + uint_t hld_offset; /* "offset" of dir entry */ + uint_t hld_hash; /* a hash of td_name */ + struct hldirent *hld_link; /* linked via the hash table */ + hlnode_t *hld_parent; /* parent, dir we are in */ + char *hld_name; /* must be null terminated */ + /* max length is MAXNAMELEN */ +} hldirent_t; + +/* + * hlfid overlays the fid structure (for VFS_VGET) + */ +typedef struct { + uint16_t hlfid_len; + ino32_t hlfid_ino; + int32_t hlfid_gen; +} hlfid_t; + +/* + * File system independent to hyprlofs conversion macros + */ +#define VFSTOHLM(vfsp) ((hlfsmount_t *)(vfsp)->vfs_data) +#define VTOHLM(vp) ((hlfsmount_t *)(vp)->v_vfsp->vfs_data) +#define VTOHLN(vp) ((hlnode_t *)(vp)->v_data) +#define HLNTOV(tp) ((tp)->hln_vnode) +#define REALVP(vp) ((vnode_t *)VTOHLN(vp)->hln_realvp) +#define hlnode_hold(tp) VN_HOLD(HLNTOV(tp)) +#define hlnode_rele(tp) VN_RELE(HLNTOV(tp)) + +#define hln_dir un_hlnode.un_dirstruct.un_dirlist +#define hln_dirents un_hlnode.un_dirstruct.un_dirents +#define hln_realvp un_hlnode.un_realvp + +/* + * Attributes + */ +#define hln_mask hln_attr.va_mask +#define hln_type hln_attr.va_type +#define hln_mode hln_attr.va_mode +#define hln_uid hln_attr.va_uid +#define hln_gid hln_attr.va_gid +#define hln_fsid hln_attr.va_fsid +#define hln_nodeid hln_attr.va_nodeid +#define hln_nlink hln_attr.va_nlink +#define hln_size hln_attr.va_size +#define hln_atime hln_attr.va_atime +#define hln_mtime hln_attr.va_mtime +#define hln_ctime hln_attr.va_ctime +#define hln_rdev hln_attr.va_rdev +#define hln_blksize hln_attr.va_blksize +#define hln_nblocks hln_attr.va_nblocks +#define hln_seq hln_attr.va_seq + +/* + * enums + */ +enum de_op { DE_CREATE, DE_MKDIR }; /* direnter ops */ +enum dr_op { DR_REMOVE, DR_RMDIR }; /* dirremove ops */ + +/* + * hyprlofs_minfree is the amount (in pages) of anonymous memory that hyprlofs + * leaves free for the rest of the system. The default value for + * hyprlofs_minfree is btopr(HYPRLOFSMINFREE) but it can be patched to a + * different number of pages. Since hyprlofs doesn't actually use much + * memory, its unlikely this ever needs to be patched. + */ +#define HYPRLOFSMINFREE 8 * 1024 * 1024 /* 8 Megabytes */ + +extern size_t hyprlofs_minfree; /* Anonymous memory in pages */ + +extern void hyprlofs_node_init(hlfsmount_t *, hlnode_t *, vattr_t *, + cred_t *); +extern int hyprlofs_dirlookup(hlnode_t *, char *, hlnode_t **, cred_t *); +extern int hyprlofs_dirdelete(hlnode_t *, hlnode_t *, char *, enum dr_op, + cred_t *); +extern void hyprlofs_dirinit(hlnode_t *, hlnode_t *); +extern void hyprlofs_dirtrunc(hlnode_t *); +extern int hyprlofs_taccess(void *, int, cred_t *); +extern int hyprlofs_direnter(hlfsmount_t *, hlnode_t *, char *, enum de_op, + vnode_t *, vattr_t *, hlnode_t **, cred_t *); + +extern struct vnodeops *hyprlofs_vnodeops; +extern const struct fs_operation_def hyprlofs_vnodeops_template[]; + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_FS_HYPRLOFS_INFO_H */ diff --git a/usr/src/uts/common/sys/fs/sdev_impl.h b/usr/src/uts/common/sys/fs/sdev_impl.h index 7ea6b88ec2..240a53b037 100644 --- a/usr/src/uts/common/sys/fs/sdev_impl.h +++ b/usr/src/uts/common/sys/fs/sdev_impl.h @@ -36,6 +36,7 @@ extern "C" { #include <sys/vfs_opreg.h> #include <sys/list.h> #include <sys/nvpair.h> +#include <sys/fs/sdev_plugin.h> #include <sys/sunddi.h> /* @@ -128,6 +129,21 @@ typedef struct sdev_local_data { struct sdev_dprof sdev_lprof; /* profile for multi-inst */ } sdev_local_data_t; +/* sdev_flags */ +typedef enum sdev_flags { + SDEV_BUILD = 0x0001, /* directory cache out-of-date */ + SDEV_GLOBAL = 0x0002, /* global /dev nodes */ + SDEV_PERSIST = 0x0004, /* backing store persisted node */ + SDEV_NO_NCACHE = 0x0008, /* do not include in neg. cache */ + SDEV_DYNAMIC = 0x0010, /* special-purpose vnode ops */ + /* (ex: pts) */ + SDEV_VTOR = 0x0020, /* validate sdev_nodes during search */ + SDEV_ATTR_INVALID = 0x0040, /* invalid node attributes, */ + /* need update */ + SDEV_SUBDIR = 0x0080, /* match all subdirs under here */ + SDEV_ZONED = 0x0100 /* zoned subdir */ +} sdev_flags_t; + /* * /dev filesystem sdev_node defines */ @@ -150,7 +166,7 @@ typedef struct sdev_node { ino64_t sdev_ino; /* inode */ uint_t sdev_nlink; /* link count */ int sdev_state; /* state of this node */ - int sdev_flags; /* flags bit */ + sdev_flags_t sdev_flags; /* flags bit */ kmutex_t sdev_lookup_lock; /* node creation synch lock */ kcondvar_t sdev_lookup_cv; /* node creation sync cv */ @@ -161,7 +177,7 @@ typedef struct sdev_node { struct sdev_global_data sdev_globaldata; struct sdev_local_data sdev_localdata; } sdev_instance_data; - + list_node_t sdev_plist; /* link on plugin list */ void *sdev_private; } sdev_node_t; @@ -192,29 +208,11 @@ typedef enum { SDEV_READY } sdev_node_state_t; -/* sdev_flags */ -#define SDEV_BUILD 0x0001 /* directory cache out-of-date */ -#define SDEV_GLOBAL 0x0002 /* global /dev nodes */ -#define SDEV_PERSIST 0x0004 /* backing store persisted node */ -#define SDEV_NO_NCACHE 0x0008 /* do not include in neg. cache */ -#define SDEV_DYNAMIC 0x0010 /* special-purpose vnode ops */ - /* (ex: pts) */ -#define SDEV_VTOR 0x0020 /* validate sdev_nodes during search */ -#define SDEV_ATTR_INVALID 0x0040 /* invalid node attributes, */ - /* need update */ -#define SDEV_SUBDIR 0x0080 /* match all subdirs under here */ -#define SDEV_ZONED 0x0100 /* zoned subdir */ - /* sdev_lookup_flags */ #define SDEV_LOOKUP 0x0001 /* node creation in progress */ #define SDEV_READDIR 0x0002 /* VDIR readdir in progress */ #define SDEV_LGWAITING 0x0004 /* waiting for devfsadm completion */ -#define SDEV_VTOR_INVALID -1 -#define SDEV_VTOR_SKIP 0 -#define SDEV_VTOR_VALID 1 -#define SDEV_VTOR_STALE 2 - /* convenient macros */ #define SDEV_IS_GLOBAL(dv) \ (dv->sdev_flags & SDEV_GLOBAL) @@ -366,8 +364,13 @@ extern void sdev_devfsadmd_thread(struct sdev_node *, struct sdev_node *, extern int devname_profile_update(char *, size_t); extern struct sdev_data *sdev_find_mntinfo(char *); void sdev_mntinfo_rele(struct sdev_data *); +typedef void (*sdev_mnt_walk_f)(struct sdev_node *, void *); +void sdev_mnt_walk(sdev_mnt_walk_f, void *); extern struct vnodeops *devpts_getvnodeops(void); extern struct vnodeops *devvt_getvnodeops(void); +extern void sdev_plugin_nodeready(struct sdev_node *); +extern int sdev_plugin_init(void); +extern int sdev_plugin_fini(void); /* * boot states - warning, the ordering here is significant @@ -512,6 +515,23 @@ extern void sdev_nc_path_exists(sdev_nc_list_t *, char *); extern void sdev_modctl_dump_files(void); /* + * plugin and legacy vtab stuff + */ +/* directory dependent vop table */ +typedef struct sdev_vop_table { + char *vt_name; /* subdirectory name */ + const fs_operation_def_t *vt_service; /* vnodeops table */ + struct vnodeops **vt_global_vops; /* global container for vop */ + int (*vt_vtor)(struct sdev_node *); /* validate sdev_node */ + int vt_flags; +} sdev_vop_table_t; + +extern struct sdev_vop_table vtab[]; +extern struct vnodeops *sdev_get_vop(struct sdev_node *); +extern void sdev_set_no_negcache(struct sdev_node *); +extern void *sdev_get_vtor(struct sdev_node *dv); + +/* * globals */ extern kmutex_t sdev_lock; @@ -524,6 +544,7 @@ extern struct vnodeops *devipnet_vnodeops; extern struct vnodeops *devvt_vnodeops; extern struct sdev_data *sdev_origins; /* mount info for global /dev instance */ extern struct vnodeops *devzvol_vnodeops; +extern int sdev_vnodeops_tbl_size; extern const fs_operation_def_t sdev_vnodeops_tbl[]; extern const fs_operation_def_t devpts_vnodeops_tbl[]; diff --git a/usr/src/uts/common/sys/fs/sdev_plugin.h b/usr/src/uts/common/sys/fs/sdev_plugin.h new file mode 100644 index 0000000000..8783df58e6 --- /dev/null +++ b/usr/src/uts/common/sys/fs/sdev_plugin.h @@ -0,0 +1,106 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014 Joyent, Inc. All rights reserved. + */ + +#ifndef _SYS_SDEV_PLUGIN_H +#define _SYS_SDEV_PLUGIN_H + +/* + * Kernel sdev plugin interface + */ + +#ifdef _KERNEL + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/vnode.h> + +#endif /* _KERNEL */ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _KERNEL + +typedef uintptr_t sdev_plugin_hdl_t; +typedef uintptr_t sdev_ctx_t; + +/* + * Valid return values for sdev_plugin_validate_t. + */ +typedef enum sdev_plugin_validate { + SDEV_VTOR_INVALID = -1, + SDEV_VTOR_SKIP = 0, + SDEV_VTOR_VALID = 1, + SDEV_VTOR_STALE = 2 +} sdev_plugin_validate_t; + +/* + * Valid flags + */ +typedef enum sdev_plugin_flags { + SDEV_PLUGIN_NO_NCACHE = 0x1, + SDEV_PLUGIN_SUBDIR = 0x2 +} sdev_plugin_flags_t; + +#define SDEV_PLUGIN_FLAGS_MASK 0x3 + +/* + * Functions a module must implement + */ +typedef sdev_plugin_validate_t (*sp_valid_f)(sdev_ctx_t); +typedef int (*sp_filldir_f)(sdev_ctx_t); +typedef void (*sp_inactive_f)(sdev_ctx_t); + +#define SDEV_PLUGIN_VERSION 1 + +typedef struct sdev_plugin_ops { + int spo_version; + sdev_plugin_flags_t spo_flags; + sp_valid_f spo_validate; + sp_filldir_f spo_filldir; + sp_inactive_f spo_inactive; +} sdev_plugin_ops_t; + +extern sdev_plugin_hdl_t sdev_plugin_register(const char *, sdev_plugin_ops_t *, + int *); +extern int sdev_plugin_unregister(sdev_plugin_hdl_t); + +typedef enum sdev_ctx_flags { + SDEV_CTX_GLOBAL = 0x2 /* node belongs to the GZ */ +} sdev_ctx_flags_t; + +/* + * Context helper functions + */ +extern sdev_ctx_flags_t sdev_ctx_flags(sdev_ctx_t); +extern const char *sdev_ctx_name(sdev_ctx_t); +extern const char *sdev_ctx_path(sdev_ctx_t); +extern enum vtype sdev_ctx_vtype(sdev_ctx_t); +extern const void *sdev_ctx_vtype_data(sdev_ctx_t); + +/* + * Callbacks to manipulate nodes + */ +extern int sdev_plugin_mkdir(sdev_ctx_t, char *); +extern int sdev_plugin_mknod(sdev_ctx_t, char *, mode_t, dev_t); + +#endif /* _KERNEL */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_SDEV_PLUGIN_H */ diff --git a/usr/src/uts/common/sys/fs/tmp.h b/usr/src/uts/common/sys/fs/tmp.h index 68dd67c61e..f8740e8873 100644 --- a/usr/src/uts/common/sys/fs/tmp.h +++ b/usr/src/uts/common/sys/fs/tmp.h @@ -22,12 +22,13 @@ * Copyright 2007 Sun Microsystems, Inc. * All rights reserved. Use is subject to license terms. */ +/* + * Copyright 2015 Joyent, Inc. + */ #ifndef _SYS_FS_TMP_H #define _SYS_FS_TMP_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef __cplusplus extern "C" { #endif @@ -68,29 +69,28 @@ enum dr_op { DR_REMOVE, DR_RMDIR, DR_RENAME }; /* dirremove ops */ /* * tmpfs_minfree is the amount (in pages) of anonymous memory that tmpfs - * leaves free for the rest of the system. E.g. in a system with 32MB of - * configured swap space, if 16MB were reserved (leaving 16MB free), - * tmpfs could allocate up to 16MB - tmpfs_minfree. The default value - * for tmpfs_minfree is btopr(TMPMINFREE) but it can cautiously patched - * to a different number of pages. - * NB: If tmpfs allocates too much swap space, other processes will be - * unable to execute. + * leaves free for the rest of the system. In antiquity, this number could be + * relevant on a system-wide basis, as physical DRAM was routinely exhausted; + * however, in more modern times, the relative growth of DRAM with respect to + * application footprint means that this number is only likely to become + * factor in a virtualized OS environment (e.g., a zone) -- and even then only + * when DRAM and swap have both been capped low to allow for maximum tenancy. + * TMPMINFREE -- the value from which tmpfs_minfree is derived -- should + * therefore be configured to a value that is roughly the smallest practical + * value for memory + swap minus the largest reasonable size for tmpfs in such + * a configuration. As of this writing, the smallest practical memory + swap + * configuration is 128MB, and it seems reasonable to allow tmpfs to consume + * no more than seven-eighths of this, yielding a TMPMINFREE of 16MB. Care + * should be exercised in changing this: tuning this value too high will + * result in spurious ENOSPC errors in tmpfs in small zones (a problem that + * can induce cascading failure surprisingly often); tuning this value too low + * will result in tmpfs consumption alone to alone induce application-level + * memory allocation failure. */ -#define TMPMINFREE 2 * 1024 * 1024 /* 2 Megabytes */ +#define TMPMINFREE 16 * 1024 * 1024 /* 16 Megabytes */ extern size_t tmpfs_minfree; /* Anonymous memory in pages */ -/* - * tmpfs can allocate only a certain percentage of kernel memory, - * which is used for tmpnodes, directories, file names, etc. - * This is statically set as TMPMAXFRACKMEM of physical memory. - * The actual number of allocatable bytes can be patched in tmpfs_maxkmem. - */ -#define TMPMAXFRACKMEM 25 /* 1/25 of physical memory */ - -extern size_t tmp_kmemspace; -extern size_t tmpfs_maxkmem; /* Allocatable kernel memory in bytes */ - extern void tmpnode_init(struct tmount *, struct tmpnode *, struct vattr *, struct cred *); extern int tmpnode_trunc(struct tmount *, struct tmpnode *, ulong_t); @@ -101,13 +101,12 @@ extern int tdirdelete(struct tmpnode *, struct tmpnode *, char *, enum dr_op, struct cred *); extern void tdirinit(struct tmpnode *, struct tmpnode *); extern void tdirtrunc(struct tmpnode *); -extern void *tmp_memalloc(size_t, int); -extern void tmp_memfree(void *, size_t); extern int tmp_resv(struct tmount *, struct tmpnode *, size_t, int); extern int tmp_taccess(void *, int, struct cred *); extern int tmp_sticky_remove_access(struct tmpnode *, struct tmpnode *, struct cred *); extern int tmp_convnum(char *, pgcnt_t *); +extern int tmp_convmode(char *, mode_t *); extern int tdirenter(struct tmount *, struct tmpnode *, char *, enum de_op, struct tmpnode *, struct tmpnode *, struct vattr *, struct tmpnode **, struct cred *, caller_context_t *); diff --git a/usr/src/uts/common/sys/fx.h b/usr/src/uts/common/sys/fx.h index 2d4e1aa7fb..4a48af52a1 100644 --- a/usr/src/uts/common/sys/fx.h +++ b/usr/src/uts/common/sys/fx.h @@ -21,13 +21,12 @@ /* * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2015 Joyent, Inc. */ #ifndef _SYS_FX_H #define _SYS_FX_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/types.h> #include <sys/thread.h> #include <sys/ddi.h> @@ -145,7 +144,14 @@ typedef struct fxkparms { uint_t fx_cflags; } fxkparms_t; +/* + * control flags (kparms->fx_cflags). + */ +#define FX_DOUPRILIM 0x01 /* change user priority limit */ +#define FX_DOUPRI 0x02 /* change user priority */ +#define FX_DOTQ 0x04 /* change FX time quantum */ +#define FXMAXUPRI 60 /* maximum user priority setting */ /* * Interface for partner private code. This is not a public interface. diff --git a/usr/src/uts/common/sys/gsqueue.h b/usr/src/uts/common/sys/gsqueue.h new file mode 100644 index 0000000000..40ef4ce982 --- /dev/null +++ b/usr/src/uts/common/sys/gsqueue.h @@ -0,0 +1,65 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014 Joyent, Inc. All rights reserved. + */ + +#ifndef _SYS_GSQUEUE_H +#define _SYS_GSQUEUE_H + +/* + * Standard interfaces to serializaion queues for everyone (except IP). + */ + +#include <sys/types.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _KERNEL + +typedef struct gsqueue gsqueue_t; +typedef struct gsqueue_set gsqueue_set_t; + +typedef void (*gsqueue_cb_f)(gsqueue_set_t *, gsqueue_t *, void *, boolean_t); +typedef void (*gsqueue_proc_f)(void *, mblk_t *, gsqueue_t *, void *); + +extern gsqueue_set_t *gsqueue_set_create(uint_t, pri_t); +extern void gsqueue_set_destroy(gsqueue_set_t *); +extern gsqueue_t *gsqueue_set_get(gsqueue_set_t *, uint_t); + +extern uintptr_t gsqueue_set_cb_add(gsqueue_set_t *, gsqueue_cb_f, void *); +extern int gsqueue_set_cb_remove(gsqueue_set_t *, uintptr_t); + +#define GSQUEUE_FILL 0x0001 +#define GSQUEUE_NODRAIN 0x0002 +#define GSQUEUE_PROCESS 0x0004 + +extern void gsqueue_enter_one(gsqueue_t *, mblk_t *, gsqueue_proc_f, void *, + int, uint8_t); + +/* + * The default wait is inherited from IP. This determines the amount of time + * that must pass after queuing work, before we wake up the worker thread. This + * value is in milliseconds. + */ +#define GSQUEUE_DEFAULT_WAIT 10 +#define GSQUEUE_DEFAULT_PRIORITY MAXCLSYSPRI + +#endif /* _KERNEL */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_GSQUEUE_H */ diff --git a/usr/src/uts/common/sys/id_space.h b/usr/src/uts/common/sys/id_space.h index d56fcceb5a..46d25f207f 100644 --- a/usr/src/uts/common/sys/id_space.h +++ b/usr/src/uts/common/sys/id_space.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Joyent, Inc. All Rights reserved. */ #ifndef _ID_SPACE_H @@ -34,8 +35,6 @@ extern "C" { #include <sys/mutex.h> #include <sys/vmem.h> -#ifdef _KERNEL - typedef vmem_t id_space_t; id_space_t *id_space_create(const char *, id_t, id_t); @@ -48,8 +47,6 @@ id_t id_allocff_nosleep(id_space_t *); id_t id_alloc_specific_nosleep(id_space_t *, id_t); void id_free(id_space_t *, id_t); -#endif /* _KERNEL */ - #ifdef __cplusplus } #endif diff --git a/usr/src/uts/common/sys/inotify.h b/usr/src/uts/common/sys/inotify.h new file mode 100644 index 0000000000..8acc1a7280 --- /dev/null +++ b/usr/src/uts/common/sys/inotify.h @@ -0,0 +1,153 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014 Joyent, Inc. All rights reserved. + */ + +/* + * Header file to support for the inotify facility. Note that this facility + * is designed to be binary compatible with the Linux inotify facility; values + * for constants here should therefore exactly match those found in Linux, and + * this facility shouldn't be extended independently of Linux. + */ + +#ifndef _SYS_INOTIFY_H +#define _SYS_INOTIFY_H + +#include <sys/types.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Events that can be explicitly requested on any inotify watch. + */ +#define IN_ACCESS 0x00000001 +#define IN_MODIFY 0x00000002 +#define IN_ATTRIB 0x00000004 +#define IN_CLOSE_WRITE 0x00000008 +#define IN_CLOSE_NOWRITE 0x00000010 +#define IN_OPEN 0x00000020 +#define IN_MOVED_FROM 0x00000040 +#define IN_MOVED_TO 0x00000080 +#define IN_CREATE 0x00000100 +#define IN_DELETE 0x00000200 +#define IN_DELETE_SELF 0x00000400 +#define IN_MOVE_SELF 0x00000800 + +/* + * Events that can be sent to an inotify watch -- requested or not. + */ +#define IN_UNMOUNT 0x00002000 +#define IN_Q_OVERFLOW 0x00004000 +#define IN_IGNORED 0x00008000 + +/* + * Flags that can modify an inotify event. + */ +#define IN_ONLYDIR 0x01000000 +#define IN_DONT_FOLLOW 0x02000000 +#define IN_EXCL_UNLINK 0x04000000 +#define IN_MASK_ADD 0x20000000 +#define IN_ISDIR 0x40000000 +#define IN_ONESHOT 0x80000000 + +/* + * Helpful constants. + */ +#define IN_CLOSE (IN_CLOSE_WRITE | IN_CLOSE_NOWRITE) +#define IN_MOVE (IN_MOVED_FROM | IN_MOVED_TO) +#define IN_ALL_EVENTS \ + (IN_ACCESS | IN_MODIFY | IN_ATTRIB | IN_CLOSE_WRITE | \ + IN_CLOSE_NOWRITE | IN_OPEN | IN_MOVED_FROM | IN_MOVED_TO | \ + IN_DELETE | IN_CREATE | IN_DELETE_SELF | IN_MOVE_SELF) + +#define IN_CHILD_EVENTS \ + (IN_ACCESS | IN_MODIFY | IN_ATTRIB | IN_CLOSE_WRITE | \ + IN_CLOSE_NOWRITE | IN_MODIFY | IN_OPEN) + +/* + * To assure binary compatibility with Linux, these values are fixed at their + * Linux equivalents, not their native ones. + */ +#define IN_CLOEXEC 02000000 /* LX_O_CLOEXEC */ +#define IN_NONBLOCK 04000 /* LX_O_NONBLOCK */ + +struct inotify_event { + int32_t wd; /* watch descriptor */ + uint32_t mask; /* mask of events */ + uint32_t cookie; /* event association cookie, if any */ + uint32_t len; /* size of name field */ + char name[]; /* optional NUL-terminated name */ +}; + +/* + * These ioctl values are specific to the native implementation; applications + * shouldn't be using them directly, and they should therefore be safe to + * change without breaking apps. + */ +#define INOTIFYIOC (('i' << 24) | ('n' << 16) | ('y' << 8)) +#define INOTIFYIOC_ADD_WATCH (INOTIFYIOC | 1) /* add watch */ +#define INOTIFYIOC_RM_WATCH (INOTIFYIOC | 2) /* remove watch */ +#define INOTIFYIOC_ADD_CHILD (INOTIFYIOC | 3) /* add child watch */ +#define INOTIFYIOC_ACTIVATE (INOTIFYIOC | 4) /* activate watch */ + +#ifndef _LP64 +#ifndef _LITTLE_ENDIAN +#define INOTIFY_PTR(type, name) uint32_t name##pad; type *name +#else +#define INOTIFY_PTR(type, name) type *name; uint32_t name##pad +#endif +#else +#define INOTIFY_PTR(type, name) type *name +#endif + +typedef struct inotify_addwatch { + int inaw_fd; /* open fd for object */ + uint32_t inaw_mask; /* desired mask */ +} inotify_addwatch_t; + +typedef struct inotify_addchild { + INOTIFY_PTR(char, inac_name); /* pointer to name */ + int inac_fd; /* open fd for parent */ +} inotify_addchild_t; + +#ifndef _KERNEL + +extern int inotify_init(void); +extern int inotify_init1(int); +extern int inotify_add_watch(int, const char *, uint32_t); +extern int inotify_rm_watch(int, int); + +#else + +#define IN_UNMASKABLE \ + (IN_UNMOUNT | IN_Q_OVERFLOW | IN_IGNORED | IN_ISDIR) + +#define IN_MODIFIERS \ + (IN_EXCL_UNLINK | IN_ONESHOT) + +#define IN_FLAGS \ + (IN_ONLYDIR | IN_DONT_FOLLOW | IN_MASK_ADD) + +#define IN_REMOVAL (1ULL << 32) +#define INOTIFYMNRN_INOTIFY 0 +#define INOTIFYMNRN_CLONE 1 + +#endif /* _KERNEL */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_INOTIFY_H */ diff --git a/usr/src/uts/common/sys/iso/signal_iso.h b/usr/src/uts/common/sys/iso/signal_iso.h index b1990121b8..0ae64b45d7 100644 --- a/usr/src/uts/common/sys/iso/signal_iso.h +++ b/usr/src/uts/common/sys/iso/signal_iso.h @@ -22,6 +22,7 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2015, Joyent, Inc. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -95,7 +96,7 @@ extern "C" { /* insert new signals here, and move _SIGRTM* appropriately */ #define _SIGRTMIN 42 /* first (highest-priority) realtime signal */ -#define _SIGRTMAX 73 /* last (lowest-priority) realtime signal */ +#define _SIGRTMAX 74 /* last (lowest-priority) realtime signal */ extern long _sysconf(int); /* System Private interface to sysconf() */ #define SIGRTMIN ((int)_sysconf(_SC_SIGRT_MIN)) /* first realtime signal */ #define SIGRTMAX ((int)_sysconf(_SC_SIGRT_MAX)) /* last realtime signal */ diff --git a/usr/src/uts/common/sys/klwp.h b/usr/src/uts/common/sys/klwp.h index 41b70f6a6e..2aceb3a0f6 100644 --- a/usr/src/uts/common/sys/klwp.h +++ b/usr/src/uts/common/sys/klwp.h @@ -24,7 +24,7 @@ */ /* - * Copyright (c) 2013, Joyent, Inc. All rights reserved. + * Copyright 2015 Joyent, Inc. */ #ifndef _SYS_KLWP_H @@ -192,6 +192,8 @@ typedef struct _klwp { struct contract *lwp_ct_latest[CTT_MAXTYPE]; /* last created contract */ void *lwp_brand; /* per-lwp brand data */ + int (*lwp_brand_syscall)(void); /* brand syscall interposer */ + struct psinfo *lwp_spymaster; /* if an agent LWP, our spymaster */ } klwp_t; diff --git a/usr/src/uts/common/sys/ksocket.h b/usr/src/uts/common/sys/ksocket.h index dfe25eec76..be669cb78d 100644 --- a/usr/src/uts/common/sys/ksocket.h +++ b/usr/src/uts/common/sys/ksocket.h @@ -21,6 +21,7 @@ /* * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2015, Joyent, Inc. */ #ifndef _SYS_KSOCKET_H_ @@ -121,6 +122,10 @@ extern int ksocket_close(ksocket_t, struct cred *); extern void ksocket_hold(ksocket_t); extern void ksocket_rele(ksocket_t); +typedef boolean_t (*ksocket_krecv_f)(ksocket_t, mblk_t *, size_t, int, void *); +extern int ksocket_krecv_set(ksocket_t, ksocket_krecv_f, void *); +extern void ksocket_krecv_unblock(ksocket_t); + #ifdef __cplusplus } #endif diff --git a/usr/src/uts/common/sys/limits.h b/usr/src/uts/common/sys/limits.h new file mode 100644 index 0000000000..88625d1829 --- /dev/null +++ b/usr/src/uts/common/sys/limits.h @@ -0,0 +1,32 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ +/* + * Copyright 2015 Joyent, Inc. All rights reserved. + */ + +#ifndef _SYS_LIMITS_H +#define _SYS_LIMITS_H + +#ifdef __cplusplus +extern "C" { +#endif + +#define IOV_MAX 1024 + +#ifdef _KERNEL +#define IOV_MAX_STACK 16 /* max. IOV on-stack allocation */ +#endif /* _KERNEL */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_LIMITS_H */ diff --git a/usr/src/uts/common/sys/mac.h b/usr/src/uts/common/sys/mac.h index 247c3bd48d..cdbbe4ce62 100644 --- a/usr/src/uts/common/sys/mac.h +++ b/usr/src/uts/common/sys/mac.h @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Joyent, Inc. All rights reserved. + * Copyright 2015, Joyent, Inc. * Copyright (c) 2015 Garrett D'Amore <garrett@damore.org> */ @@ -101,6 +101,14 @@ typedef struct mac_propval_uint32_range_s { } mac_propval_uint32_range_t; /* + * Defines ranges which are a series of C style strings. + */ +typedef struct mac_propval_str_range_s { + uint32_t mpur_nextbyte; + char mpur_data[1]; +} mac_propval_str_range_t; + +/* * Data type of property values. */ typedef enum { @@ -120,6 +128,7 @@ typedef struct mac_propval_range_s { mac_propval_type_t mpr_type; /* type of value */ union { mac_propval_uint32_range_t mpr_uint32[1]; + mac_propval_str_range_t mpr_str; } u; } mac_propval_range_t; @@ -214,6 +223,7 @@ typedef enum { MAC_PROP_MAX_RXHWCLNT_AVAIL, MAC_PROP_MAX_TXHWCLNT_AVAIL, MAC_PROP_IB_LINKMODE, + MAC_PROP_VN_PROMISC_FILTERED, MAC_PROP_SECONDARY_ADDRS, MAC_PROP_ADV_40GFDX_CAP, MAC_PROP_EN_40GFDX_CAP, diff --git a/usr/src/uts/common/sys/mac_client_impl.h b/usr/src/uts/common/sys/mac_client_impl.h index 3c89b7c434..f599e1fcfe 100644 --- a/usr/src/uts/common/sys/mac_client_impl.h +++ b/usr/src/uts/common/sys/mac_client_impl.h @@ -328,6 +328,7 @@ extern int mac_tx_percpu_cnt; /* Mac protection flags */ #define MPT_FLAG_V6_LOCAL_ADDR_SET 0x0001 +#define MPT_FLAG_PROMISC_FILTERED 0x0002 /* in mac_client.c */ extern void mac_promisc_client_dispatch(mac_client_impl_t *, mblk_t *); diff --git a/usr/src/uts/common/sys/mac_client_priv.h b/usr/src/uts/common/sys/mac_client_priv.h index 6b409513a6..a5848625c2 100644 --- a/usr/src/uts/common/sys/mac_client_priv.h +++ b/usr/src/uts/common/sys/mac_client_priv.h @@ -22,7 +22,7 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright 2013 Joyent, Inc. All rights reserved. + * Copyright 2015 Joyent, Inc. */ /* @@ -171,6 +171,7 @@ extern void mac_client_set_intr_cpu(void *, mac_client_handle_t, int32_t); extern void *mac_get_devinfo(mac_handle_t); extern boolean_t mac_is_vnic(mac_handle_t); +extern boolean_t mac_is_overlay(mac_handle_t); extern uint32_t mac_no_notification(mac_handle_t); extern int mac_set_prop(mac_handle_t, mac_prop_id_t, char *, void *, uint_t); diff --git a/usr/src/uts/common/sys/mac_impl.h b/usr/src/uts/common/sys/mac_impl.h index 3c9c6b77a4..d72558c612 100644 --- a/usr/src/uts/common/sys/mac_impl.h +++ b/usr/src/uts/common/sys/mac_impl.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Joyent, Inc. All rights reserved. + * Copyright 2015, Joyent, Inc. */ #ifndef _SYS_MAC_IMPL_H @@ -643,6 +643,7 @@ struct mac_impl_s { #define MIS_LEGACY 0x0040 #define MIS_NO_ACTIVE 0x0080 #define MIS_POLL_DISABLE 0x0100 +#define MIS_IS_OVERLAY 0x0200 #define mi_getstat mi_callbacks->mc_getstat #define mi_start mi_callbacks->mc_start @@ -894,6 +895,8 @@ extern void mac_protect_fini(mac_client_impl_t *); extern int mac_set_resources(mac_handle_t, mac_resource_props_t *); extern void mac_get_resources(mac_handle_t, mac_resource_props_t *); extern void mac_get_effective_resources(mac_handle_t, mac_resource_props_t *); +extern void mac_set_promisc_filtered(mac_client_handle_t, boolean_t); +extern boolean_t mac_get_promisc_filtered(mac_client_handle_t); extern cpupart_t *mac_pset_find(mac_resource_props_t *, boolean_t *); extern void mac_set_pool_effective(boolean_t, cpupart_t *, diff --git a/usr/src/uts/common/sys/mac_provider.h b/usr/src/uts/common/sys/mac_provider.h index 9f7f2a1a73..5f02451542 100644 --- a/usr/src/uts/common/sys/mac_provider.h +++ b/usr/src/uts/common/sys/mac_provider.h @@ -21,6 +21,7 @@ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2015, Joyent, Inc. */ #ifndef _SYS_MAC_PROVIDER_H @@ -106,7 +107,8 @@ typedef enum { MAC_CAPAB_NO_NATIVEVLAN = 0x00080000, /* boolean only, no data */ MAC_CAPAB_NO_ZCOPY = 0x00100000, /* boolean only, no data */ MAC_CAPAB_LEGACY = 0x00200000, /* data is mac_capab_legacy_t */ - MAC_CAPAB_VRRP = 0x00400000 /* data is mac_capab_vrrp_t */ + MAC_CAPAB_VRRP = 0x00400000, /* data is mac_capab_vrrp_t */ + MAC_CAPAB_OVERLAY = 0x00800000 /* boolean only, no data */ } mac_capab_t; /* diff --git a/usr/src/uts/common/sys/mem.h b/usr/src/uts/common/sys/mem.h index d0ac1223fc..5101b3ec17 100644 --- a/usr/src/uts/common/sys/mem.h +++ b/usr/src/uts/common/sys/mem.h @@ -23,6 +23,10 @@ * Use is subject to license terms. */ +/* + * Copyright (c) 2015, Joyent, Inc. All rights reserved. + */ + #ifndef _SYS_MEM_H #define _SYS_MEM_H @@ -128,6 +132,15 @@ extern pfn_t impl_obmem_pfnum(pfn_t); extern int plat_mem_do_mmio(struct uio *, enum uio_rw); +typedef struct mm_logentry { + uintptr_t mle_vaddr; /* vaddr being written to */ + size_t mle_len; /* length of write */ + timespec_t mle_hrestime; /* hrestime at time of write */ + hrtime_t mle_hrtime; /* hrtime at time of write */ + pid_t mle_pid; /* pid of writing process */ + char mle_psargs[80]; /* psargs of writing process */ +} mm_logentry_t; + #endif /* _KERNEL */ #ifdef __cplusplus diff --git a/usr/src/uts/common/sys/mman.h b/usr/src/uts/common/sys/mman.h index 4976bf09ab..34e491fd3b 100644 --- a/usr/src/uts/common/sys/mman.h +++ b/usr/src/uts/common/sys/mman.h @@ -318,6 +318,7 @@ struct memcntl_mha32 { #define MADV_ACCESS_DEFAULT 6 /* default access */ #define MADV_ACCESS_LWP 7 /* next LWP to access heavily */ #define MADV_ACCESS_MANY 8 /* many processes to access heavily */ +#define MADV_PURGE 9 /* contents will be purged */ #endif /* (_POSIX_C_SOURCE <= 2) && !defined(_XPG4_2) ... */ @@ -337,6 +338,7 @@ struct memcntl_mha32 { #define MS_SYNC 0x4 /* wait for msync */ #define MS_ASYNC 0x1 /* return immediately */ #define MS_INVALIDATE 0x2 /* invalidate caches */ +#define MS_INVALCURPROC 0x8 /* invalidate cache for curproc only */ #if (_POSIX_C_SOURCE <= 2) && !defined(_XPG4_2) || defined(__EXTENSIONS__) /* functions to mctl */ diff --git a/usr/src/uts/common/sys/mntent.h b/usr/src/uts/common/sys/mntent.h index 88c98dc5a4..7196f7b3ac 100644 --- a/usr/src/uts/common/sys/mntent.h +++ b/usr/src/uts/common/sys/mntent.h @@ -21,6 +21,7 @@ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2012, Joyent, Inc. All rights reserved. * Copyright 2015 Nexenta Systems, Inc. All rights reserved. * * Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T @@ -47,6 +48,7 @@ extern "C" { #define MNTTYPE_PCFS "pcfs" /* PC (MSDOS) file system */ #define MNTTYPE_PC MNTTYPE_PCFS /* Deprecated name; use MNTTYPE_PCFS */ #define MNTTYPE_LOFS "lofs" /* Loop back file system */ +#define MNTTYPE_HYPRLOFS "hyprlofs" /* Hyperlofs file system */ #define MNTTYPE_LO MNTTYPE_LOFS /* Deprecated name; use MNTTYPE_LOFS */ #define MNTTYPE_HSFS "hsfs" /* High Sierra (9660) file system */ #define MNTTYPE_SWAP "swap" /* Swap file system */ diff --git a/usr/src/uts/common/sys/netconfig.h b/usr/src/uts/common/sys/netconfig.h index 14b1aa55db..883c329aed 100644 --- a/usr/src/uts/common/sys/netconfig.h +++ b/usr/src/uts/common/sys/netconfig.h @@ -28,6 +28,7 @@ * * Copyright 2004 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2015 Joyent, Inc. */ #ifndef _SYS_NETCONFIG_H @@ -147,6 +148,8 @@ extern int endnetpath(void *); extern struct netconfig *getnetpath(void *); extern void nc_perror(const char *); extern char *nc_sperror(void); +extern void _nsl_brand_set_hooks(int (*)(void), + struct netconfig *(*)(int)); #ifdef __cplusplus } diff --git a/usr/src/uts/common/sys/neti.h b/usr/src/uts/common/sys/neti.h index 93b5fc3e01..ea85c78f6b 100644 --- a/usr/src/uts/common/sys/neti.h +++ b/usr/src/uts/common/sys/neti.h @@ -44,6 +44,8 @@ extern "C" { #define NHF_INET "NHF_INET" #define NHF_INET6 "NHF_INET6" #define NHF_ARP "NHF_ARP" +#define NHF_VND_INET "NHF_VND_INET" +#define NHF_VND_INET6 "NHF_VND_INET6" /* * Event identification diff --git a/usr/src/uts/common/sys/netstack.h b/usr/src/uts/common/sys/netstack.h index 2c77e1be96..73f29d1e63 100644 --- a/usr/src/uts/common/sys/netstack.h +++ b/usr/src/uts/common/sys/netstack.h @@ -81,7 +81,8 @@ typedef id_t netstackid_t; #define NS_IPSECESP 16 #define NS_IPNET 17 #define NS_ILB 18 -#define NS_MAX (NS_ILB+1) +#define NS_VND 19 +#define NS_MAX (NS_VND+1) /* * State maintained for each module which tracks the state of diff --git a/usr/src/uts/common/sys/overlay.h b/usr/src/uts/common/sys/overlay.h new file mode 100644 index 0000000000..12d0dbca51 --- /dev/null +++ b/usr/src/uts/common/sys/overlay.h @@ -0,0 +1,96 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015, Joyent, Inc. + */ + +#ifndef _SYS_OVERLAY_H +#define _SYS_OVERLAY_H + +/* + * Overlay device support + */ + +#include <sys/param.h> +#include <sys/dld_ioc.h> +#include <sys/mac.h> +#include <sys/overlay_common.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define OVERLAY_IOC_CREATE OVERLAYIOC(1) +#define OVERLAY_IOC_DELETE OVERLAYIOC(2) +#define OVERLAY_IOC_PROPINFO OVERLAYIOC(3) +#define OVERLAY_IOC_GETPROP OVERLAYIOC(4) +#define OVERLAY_IOC_SETPROP OVERLAYIOC(5) +#define OVERLAY_IOC_NPROPS OVERLAYIOC(6) +#define OVERLAY_IOC_ACTIVATE OVERLAYIOC(7) +#define OVERLAY_IOC_STATUS OVERLAYIOC(8) + +typedef struct overlay_ioc_create { + datalink_id_t oic_linkid; + uint32_t oic_filler; + uint64_t oic_vnetid; + char oic_encap[MAXLINKNAMELEN]; +} overlay_ioc_create_t; + +typedef struct overlay_ioc_activate { + datalink_id_t oia_linkid; +} overlay_ioc_activate_t; + +typedef struct overlay_ioc_delete { + datalink_id_t oid_linkid; +} overlay_ioc_delete_t; + +typedef struct overlay_ioc_nprops { + datalink_id_t oipn_linkid; + int32_t oipn_nprops; +} overlay_ioc_nprops_t; + +typedef struct overlay_ioc_propinfo { + datalink_id_t oipi_linkid; + int32_t oipi_id; + char oipi_name[OVERLAY_PROP_NAMELEN]; + uint_t oipi_type; + uint_t oipi_prot; + uint8_t oipi_default[OVERLAY_PROP_SIZEMAX]; + uint32_t oipi_defsize; + uint32_t oipi_posssize; + uint8_t oipi_poss[OVERLAY_PROP_SIZEMAX]; +} overlay_ioc_propinfo_t; + +typedef struct overlay_ioc_prop { + datalink_id_t oip_linkid; + int32_t oip_id; + char oip_name[OVERLAY_PROP_NAMELEN]; + uint8_t oip_value[OVERLAY_PROP_SIZEMAX]; + uint32_t oip_size; +} overlay_ioc_prop_t; + +typedef enum overlay_status { + OVERLAY_I_OK = 0x00, + OVERLAY_I_DEGRADED = 0x01 +} overlay_status_t; + +typedef struct overlay_ioc_status { + datalink_id_t ois_linkid; + uint_t ois_status; + char ois_message[OVERLAY_STATUS_BUFLEN]; +} overlay_ioc_status_t; + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_OVERLAY_H */ diff --git a/usr/src/uts/common/sys/overlay_common.h b/usr/src/uts/common/sys/overlay_common.h new file mode 100644 index 0000000000..d638096006 --- /dev/null +++ b/usr/src/uts/common/sys/overlay_common.h @@ -0,0 +1,65 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015 Joyent, Inc. + */ + +#ifndef _SYS_OVERLAY_COMMON_H +#define _SYS_OVERLAY_COMMON_H + +/* + * Common overlay definitions + */ + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum overlay_target_mode { + OVERLAY_TARGET_NONE = 0x0, + OVERLAY_TARGET_POINT, + OVERLAY_TARGET_DYNAMIC +} overlay_target_mode_t; + +typedef enum overlay_plugin_dest { + OVERLAY_PLUGIN_D_INVALID = 0x0, + OVERLAY_PLUGIN_D_ETHERNET = 0x1, + OVERLAY_PLUGIN_D_IP = 0x2, + OVERLAY_PLUGIN_D_PORT = 0x4, + OVERLAY_PLUGIN_D_MASK = 0x7 +} overlay_plugin_dest_t; + +typedef enum overlay_prop_type { + OVERLAY_PROP_T_INT = 0x1, /* signed int */ + OVERLAY_PROP_T_UINT, /* unsigned int */ + OVERLAY_PROP_T_IP, /* sinaddr6 */ + OVERLAY_PROP_T_STRING /* OVERLAY_PROPS_SIZEMAX */ +} overlay_prop_type_t; + +typedef enum overlay_prop_prot { + OVERLAY_PROP_PERM_REQ = 0x1, + OVERLAY_PROP_PERM_READ = 0x2, + OVERLAY_PROP_PERM_WRITE = 0x4, + OVERLAY_PROP_PERM_RW = 0x6, + OVERLAY_PROP_PERM_RRW = 0x7, + OVERLAY_PROP_PERM_MASK = 0x7 +} overlay_prop_prot_t; + +#define OVERLAY_PROP_NAMELEN 64 +#define OVERLAY_PROP_SIZEMAX 256 +#define OVERLAY_STATUS_BUFLEN 256 + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_OVERLAY_COMMON_H */ diff --git a/usr/src/uts/common/sys/overlay_impl.h b/usr/src/uts/common/sys/overlay_impl.h new file mode 100644 index 0000000000..9f3c133951 --- /dev/null +++ b/usr/src/uts/common/sys/overlay_impl.h @@ -0,0 +1,207 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015, Joyent, Inc. + */ + +#ifndef _SYS_OVERLAY_IMPL_H +#define _SYS_OVERLAY_IMPL_H + +/* + * Overlay device support + */ + +#include <sys/overlay.h> +#include <sys/overlay_common.h> +#include <sys/overlay_plugin.h> +#include <sys/overlay_target.h> +#include <sys/ksynch.h> +#include <sys/list.h> +#include <sys/avl.h> +#include <sys/ksocket.h> +#include <sys/socket.h> +#include <sys/refhash.h> +#include <sys/ethernet.h> +#include <sys/list.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define OVEP_VERSION_ONE 0x1 + +typedef struct overlay_plugin { + kmutex_t ovp_mutex; + list_node_t ovp_link; /* overlay_plugin_lock */ + uint_t ovp_active; /* ovp_mutex */ + const char *ovp_name; /* RO */ + const overlay_plugin_ops_t *ovp_ops; /* RO */ + const char *const *ovp_props; /* RO */ + uint_t ovp_nprops; /* RO */ + uint_t ovp_id_size; /* RO */ + overlay_plugin_flags_t ovp_flags; /* RO */ + overlay_plugin_dest_t ovp_dest; /* RO */ +} overlay_plugin_t; + +typedef struct overlay_mux { + list_node_t omux_lnode; + ksocket_t omux_ksock; /* RO */ + overlay_plugin_t *omux_plugin; /* RO: associated encap */ + int omux_domain; /* RO: socket domain */ + int omux_family; /* RO: socket family */ + int omux_protocol; /* RO: socket protocol */ + struct sockaddr *omux_addr; /* RO: socket address */ + socklen_t omux_alen; /* RO: sockaddr len */ + kmutex_t omux_lock; /* Protects everything below */ + uint_t omux_count; /* Active instances */ + avl_tree_t omux_devices; /* Tree of devices */ +} overlay_mux_t; + +typedef enum overlay_target_flag { + OVERLAY_T_TEARDOWN = 0x1 +} overlay_target_flag_t; + +typedef struct overlay_target { + kmutex_t ott_lock; + kcondvar_t ott_cond; + overlay_target_mode_t ott_mode; /* RO */ + overlay_plugin_dest_t ott_dest; /* RO */ + uint64_t ott_id; /* RO */ + overlay_target_flag_t ott_flags; /* ott_lock */ + uint_t ott_ocount; /* ott_lock */ + union { /* ott_lock */ + overlay_target_point_t ott_point; + struct overlay_target_dyn { + refhash_t *ott_dhash; + avl_tree_t ott_tree; + } ott_dyn; + } ott_u; +} overlay_target_t; + +typedef enum overlay_dev_flag { + OVERLAY_F_ACTIVATED = 0x01, /* Activate ioctl completed */ + OVERLAY_F_IN_MUX = 0x02, /* Currently in a mux */ + OVERLAY_F_IN_TX = 0x04, /* Currently doing tx */ + OVERLAY_F_IN_RX = 0x08, /* Currently doing rx */ + OVERLAY_F_IOMASK = 0x0c, /* A mask for rx and tx */ + OVERLAY_F_MDDROP = 0x10, /* Drop traffic for metadata update */ + OVERLAY_F_STOPMASK = 0x1e, /* None set when stopping */ + OVERLAY_F_VARPD = 0x20, /* varpd plugin exists */ + OVERLAY_F_DEGRADED = 0x40, /* device is degraded */ + OVERLAY_F_MASK = 0x7f /* mask of everything */ +} overlay_dev_flag_t; + +typedef struct overlay_dev { + kmutex_t odd_lock; + kcondvar_t odd_iowait; + list_node_t odd_link; /* overlay_dev_lock */ + mac_handle_t odd_mh; /* RO */ + overlay_plugin_t *odd_plugin; /* RO */ + datalink_id_t odd_linkid; /* RO */ + void *odd_pvoid; /* RO -- only used by plugin */ + uint_t odd_ref; /* protected by odd_lock */ + uint_t odd_mtu; /* protected by odd_lock */ + overlay_dev_flag_t odd_flags; /* protected by odd_lock */ + uint_t odd_rxcount; /* protected by odd_lock */ + uint_t odd_txcount; /* protected by odd_lock */ + overlay_mux_t *odd_mux; /* protected by odd_lock */ + uint64_t odd_vid; /* RO if active else odd_lock */ + avl_node_t odd_muxnode; /* managed by mux */ + overlay_target_t *odd_target; /* See big theory statement */ + char odd_fmamsg[OVERLAY_STATUS_BUFLEN]; /* odd_lock */ +} overlay_dev_t; + +typedef enum overlay_target_entry_flags { + OVERLAY_ENTRY_F_PENDING = 0x01, /* lookup in progress */ + OVERLAY_ENTRY_F_VALID = 0x02, /* entry is currently valid */ + OVERLAY_ENTRY_F_DROP = 0x04, /* always drop target */ + OVERLAY_ENTRY_F_VALID_MASK = 0x06 +} overlay_target_entry_flags_t; + +typedef struct overlay_target_entry { + kmutex_t ote_lock; + refhash_link_t ote_reflink; /* hashtable link */ + avl_node_t ote_avllink; /* iteration link */ + list_node_t ote_qlink; + overlay_target_entry_flags_t ote_flags; /* RW: state flags */ + uint8_t ote_addr[ETHERADDRL]; /* RO: mac addr */ + overlay_target_t *ote_ott; /* RO */ + overlay_dev_t *ote_odd; /* RO */ + overlay_target_point_t ote_dest; /* RW: destination */ + mblk_t *ote_chead; /* RW: blocked mb chain head */ + mblk_t *ote_ctail; /* RW: blocked mb chain tail */ + size_t ote_mbsize; /* RW: outstanding mblk size */ + hrtime_t ote_vtime; /* RW: valid timestamp */ +} overlay_target_entry_t; + + +#define OVERLAY_CTL "overlay" + +extern dev_info_t *overlay_dip; + +extern mblk_t *overlay_m_tx(void *, mblk_t *); + +typedef int (*overlay_dev_iter_f)(overlay_dev_t *, void *); +extern void overlay_dev_iter(overlay_dev_iter_f, void *); + +extern void overlay_plugin_init(void); +extern overlay_plugin_t *overlay_plugin_lookup(const char *); +extern void overlay_plugin_rele(overlay_plugin_t *); +extern void overlay_plugin_fini(void); +typedef int (*overlay_plugin_walk_f)(overlay_plugin_t *, void *); +extern void overlay_plugin_walk(overlay_plugin_walk_f, void *); + +extern void overlay_io_start(overlay_dev_t *, overlay_dev_flag_t); +extern void overlay_io_done(overlay_dev_t *, overlay_dev_flag_t); + +extern void overlay_link_state_update(overlay_dev_t *); + +extern void overlay_mux_init(void); +extern void overlay_mux_fini(void); + +extern overlay_mux_t *overlay_mux_open(overlay_plugin_t *, int, int, int, + struct sockaddr *, socklen_t, int *); +extern void overlay_mux_close(overlay_mux_t *); +extern void overlay_mux_add_dev(overlay_mux_t *, overlay_dev_t *); +extern void overlay_mux_remove_dev(overlay_mux_t *, overlay_dev_t *); +extern int overlay_mux_tx(overlay_mux_t *, struct msghdr *, mblk_t *); + +extern void overlay_prop_init(overlay_prop_handle_t); + +extern void overlay_target_init(void); +extern int overlay_target_busy(void); +extern int overlay_target_open(dev_t *, int, int, cred_t *); +extern int overlay_target_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); +extern int overlay_target_close(dev_t, int, int, cred_t *); +extern void overlay_target_free(overlay_dev_t *); + +#define OVERLAY_TARGET_OK 0 +#define OVERLAY_TARGET_DROP 1 +#define OVERLAY_TARGET_ASYNC 2 +extern int overlay_target_lookup(overlay_dev_t *, mblk_t *, struct sockaddr *, + socklen_t *); +extern void overlay_target_quiesce(overlay_target_t *); +extern void overlay_target_fini(void); + +extern void overlay_fm_init(void); +extern void overlay_fm_fini(void); +extern void overlay_fm_degrade(overlay_dev_t *, const char *); +extern void overlay_fm_restore(overlay_dev_t *); + +extern overlay_dev_t *overlay_hold_by_dlid(datalink_id_t); +extern void overlay_hold_rele(overlay_dev_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_OVERLAY_IMPL_H */ diff --git a/usr/src/uts/common/sys/overlay_plugin.h b/usr/src/uts/common/sys/overlay_plugin.h new file mode 100644 index 0000000000..07efaa05df --- /dev/null +++ b/usr/src/uts/common/sys/overlay_plugin.h @@ -0,0 +1,324 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2015 Joyent, Inc. + */ + +#ifndef _SYS_OVERLAY_PLUGIN_H +#define _SYS_OVERLAY_PLUGIN_H + +/* + * overlay plugin interface for encapsulation/decapsulation modules + * + * This header file defines how encapsulation and decapsulation plugins + * interact within the broader system. At this time, these interfaces are + * considered private to illumos and therefore are subject to change. As we gain + * more experience with a few of the different encapsulation formats, say nvgre + * or geneve, then we can move to make this a more-stable interface. + * + * A plugin is a general kernel module that uses the miscellaneous mod-linkage. + * + * In it's _init(9E) routine, it must register itself with the overlay + * subsystem. To do this, it allocates an overlay_plugin_register_t via + * overlay_plugin_alloc(), that it then * fills out with various required + * information and then attempts to register with the system via a call to + * overlay_plugin_register(). If that succeeds, it should then call + * mod_install(9F). If the mod_install(9F) fails, then it should call + * overlay_plugin_unregister(). Regardless of success or failure, it should call + * overlay_plugin_free() to ensure that any memory that may be associated with + * the registration is freed. + * + * When the module's _fini(9E) is called, overlay_plugin_unregister() should be + * called first. It may return an error, such as EBUSY. In such cases, it should + * be returned as the return status of _fini(9E). This is quite necessary, it + * ensures that if the module is in use it doesn't get unloaded out from under + * us the broader subsystem while it's still in use. A driver can use that to + * know that there are no current instances of its private data. + * + * ------------------ + * Plugin Definitions + * ------------------ + * + * A plugin is required to fill in both an operations vector and a series of + * information to the callback routine. Here are the routines and their + * purposes. The full signatures are available below. + * + * overlay_plugin_init_t + * + * This interface is used to create a new instance of a plugin. An instance + * of a plugin will be created for each overlay device that is created. For + * example, if a device is created with VXLAN ID 23 and ID 42, then there + * will be two different calls to this function. + * + * This function gives the plugin a chance to create a private data + * structure that will be returned on subsequent calls to the system. + * + * overlay_plugin_fini_t + * + * This is the opposite of overlay_plugin_init_t. It will be called when it + * is safe to remove any private data that is associated with this instance + * of the plugin. + * + * overlay_plugin_propinfo_t + * + * This is called with the name of a property that is registered when the + * plugin is created. This function will be called with the name of the + * property that information is being requested about. The plugin is + * responsible for filling out information such as setting the name, the + * type of property it is, the protection of the property (can a user + * update it?), whether the property is required, an optional default value + * for the property, and an optional set of values or ranges that are + * allowed. + * + * overlay_plugin_getprop_t + * + * Return the value of the named property from the current instance of the + * plugin. + * + * overlay_plugin_setprop_t + * + * Set the value of the named property to the specified value for the + * current instance of the plugin. Note, that it is the plugin's + * responsibility to ensure that the value of the property is valid and to + * update state as appropriate. + * + * overlay_plugin_socket_t + * + * Every overlay device has a corresponding socket that it uses to send and + * receive traffic. This routine is used to get the parameters that should + * be used to define such a socket. The actual socket may be multiplexed + * with other uses of it. + * + * overlay_plugin_sockopt_t + * + * Allow a plugin to set any necessary socket options that it needs on the + * kernel socket that is being used by a mux. This will only be called once + * for a given mux, if additional devices are added to a mux, it will not + * be called additional times. + * + * overlay_plugin_encap_t + * + * In this routine you're given a message block and information about the + * packet, such as the identifier and are asked to fill out a message block + * that represents the encapsulation header and optionally manipulate the + * input message if required. + * + * overlay_plugin_decap_t + * + * In this routine, you're given the encapsulated message block. The + * requirement is to decapsulate it and determine what is the correct + * overlay identifier for this network and to fill in the header size so + * the broader system knows how much of this data should be considered + * consumed. + * + * ovpo_callbacks + * + * This should be set to zero, it's reserved for future use. + * + * Once these properties are defined, the module should define the following + * members in the overlay_plugin_register_t. + * + * ovep_version + * + * Should be set to the value of the macro OVEP_VERSION. + * + * ovep_name + * + * Should be set to a character string that has the name of the module. + * Generally this should match the name of the kernel module; however, this + * is the name that users will use to refer to this module when creating + * devices. + * + * overlay_plugin_ops_t + * + * Should be set to the functions as described above. + * + * ovep_props + * + * This is an array of character strings that holds the names of the + * properties of the encapsulation plugin. + * + * + * ovep_id_size + * + * This is the size in bytes of the valid range for the identifier. The + * valid identifier range is considered a ovep_id_size byte unsigned + * integer, [ 0, 1 << (ovep_id_size * 8) ). + * + * ovep_flags + * + * A series of flags that indicate optional features that are supported. + * Valid flags include: + * + * OVEP_F_VLAN_TAG + * + * The encapsulation format allows for the encapsulated + * packet to maintain a VLAN tag. + * + * ovep_dest + * + * Describes the kind of destination that the overlay plugin supports for + * sending traffic. For example, vxlan uses UDP, therefore it requires both + * an IP address and a port; however, nvgre uses the gre header and + * therefore only requires an IP address. The following flags may be + * combined: + * + * OVERLAY_PLUGIN_D_ETHERNET + * + * Indicates that to send a packet to its destination, we + * require a link-layer ethernet address. + * + * OVERLAY_PLUGIN_D_IP + * + * Indicates that to send a packet to its destination, we + * require an IP address. Note, all IP addresses are + * transmitted as IPv6 addresses and for an IPv4 + * destination, using an IPv4-mapped IPv6 address is the + * expected way to transmit that. + * + * OVERLAY_PLUGIN_D_PORT + * + * Indicates that to send a packet to its destination, a + * port is required, this usually indicates that the + * protocol uses something like TCP or UDP. + * + * + * ------------------------------------------------- + * Downcalls, Upcalls, and Synchronization Guarantees + * ------------------------------------------------- + * + * Every instance of a given module is independent. The kernel only guarantees + * that it will probably perform downcalls into different instances in parallel + * at some point. No locking is provided by the framework for synchronization + * across instances. If a module finds itself needing that, it will be up to it + * to provide it. + * + * In a given instance, the kernel may call into entry points in parallel. If + * the instance has private data, it should likely synchronize it. The one + * guarantee that we do make, is that calls to getprop and setprop will be done + * synchronized by a caller holding the MAC perimeter. + * + * While servicing a downcall from the general overlay device framework, a + * kernel module should not make any upcalls, excepting those functions that are + * defined in this header file, eg. the property related callbacks. Improtantly, + * it cannot make any assumptions about what locks may or may not be held by the + * broader system. The only thing that it is safe for it to use are its own + * locks. + * + * ---------------- + * Downcall Context + * ---------------- + * + * For all of the downcalls, excepting the overlay_plugin_encap_t and + * overlay_plugin_decap_t, the calls will be made either in kernel or user + * context, the module should not assume either way. + * + * overlay_plugin_encap_t and overlay_plugin_decap_t may be called in user, + * kernel or interrupt context; however, it is guaranteed that the interrupt + * will be below LOCK_LEVEL, and therefore it is safe to grab locks. + */ + +#include <sys/stream.h> +#include <sys/mac_provider.h> +#include <sys/ksocket.h> +#include <sys/overlay_common.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define OVEP_VERSION 0x1 + +typedef enum overlay_plugin_flags { + OVEP_F_VLAN_TAG = 0x01 /* Supports VLAN Tags */ +} overlay_plugin_flags_t; + +/* + * The ID space could easily be more than a 64-bit number, even + * though today it's either a 24-64 bit value. How should we future + * proof ourselves here? + */ +typedef struct ovep_encap_info { + uint64_t ovdi_id; + size_t ovdi_hdr_size; +} ovep_encap_info_t; + +typedef struct __overlay_prop_handle *overlay_prop_handle_t; +typedef struct __overlay_handle *overlay_handle_t; + +/* + * Plugins are guaranteed that calls to setprop are serialized. However, any + * number of other calls can be going on in parallel otherwise. + */ +typedef int (*overlay_plugin_encap_t)(void *, mblk_t *, + ovep_encap_info_t *, mblk_t **); +typedef int (*overlay_plugin_decap_t)(void *, mblk_t *, + ovep_encap_info_t *); +typedef int (*overlay_plugin_init_t)(overlay_handle_t, void **); +typedef void (*overlay_plugin_fini_t)(void *); +typedef int (*overlay_plugin_socket_t)(void *, int *, int *, int *, + struct sockaddr *, socklen_t *); +typedef int (*overlay_plugin_sockopt_t)(ksocket_t); +typedef int (*overlay_plugin_getprop_t)(void *, const char *, void *, + uint32_t *); +typedef int (*overlay_plugin_setprop_t)(void *, const char *, const void *, + uint32_t); +typedef int (*overlay_plugin_propinfo_t)(const char *, overlay_prop_handle_t); + +typedef struct overlay_plugin_ops { + uint_t ovpo_callbacks; + overlay_plugin_init_t ovpo_init; + overlay_plugin_fini_t ovpo_fini; + overlay_plugin_encap_t ovpo_encap; + overlay_plugin_decap_t ovpo_decap; + overlay_plugin_socket_t ovpo_socket; + overlay_plugin_sockopt_t ovpo_sockopt; + overlay_plugin_getprop_t ovpo_getprop; + overlay_plugin_setprop_t ovpo_setprop; + overlay_plugin_propinfo_t ovpo_propinfo; +} overlay_plugin_ops_t; + +typedef struct overlay_plugin_register { + uint_t ovep_version; + const char *ovep_name; + const overlay_plugin_ops_t *ovep_ops; + const char **ovep_props; + uint_t ovep_id_size; + uint_t ovep_flags; + uint_t ovep_dest; +} overlay_plugin_register_t; + +/* + * Functions that interact with registration + */ +extern overlay_plugin_register_t *overlay_plugin_alloc(uint_t); +extern void overlay_plugin_free(overlay_plugin_register_t *); +extern int overlay_plugin_register(overlay_plugin_register_t *); +extern int overlay_plugin_unregister(const char *); + +/* + * Property information callbacks + */ +extern void overlay_prop_set_name(overlay_prop_handle_t, const char *); +extern void overlay_prop_set_prot(overlay_prop_handle_t, overlay_prop_prot_t); +extern void overlay_prop_set_type(overlay_prop_handle_t, overlay_prop_type_t); +extern int overlay_prop_set_default(overlay_prop_handle_t, void *, ssize_t); +extern void overlay_prop_set_nodefault(overlay_prop_handle_t); +extern void overlay_prop_set_range_uint32(overlay_prop_handle_t, uint32_t, + uint32_t); +extern void overlay_prop_set_range_str(overlay_prop_handle_t, const char *); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_OVERLAY_PLUGIN_H */ diff --git a/usr/src/uts/common/sys/overlay_target.h b/usr/src/uts/common/sys/overlay_target.h new file mode 100644 index 0000000000..cae193c334 --- /dev/null +++ b/usr/src/uts/common/sys/overlay_target.h @@ -0,0 +1,292 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2015 Joyent, Inc. + */ + +#ifndef _OVERLAY_TARGET_H +#define _OVERLAY_TARGET_H + +/* + * Overlay device varpd ioctl interface (/dev/overlay) + */ + +#include <sys/types.h> +#include <sys/ethernet.h> +#include <netinet/in.h> +#include <sys/overlay_common.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct overlay_target_point { + uint8_t otp_mac[ETHERADDRL]; + struct in6_addr otp_ip; + uint16_t otp_port; +} overlay_target_point_t; + +#define OVERLAY_TARG_IOCTL (('o' << 24) | ('v' << 16) | ('t' << 8)) + +#define OVERLAY_TARG_INFO (OVERLAY_TARG_IOCTL | 0x01) + +typedef enum overlay_targ_info_flags { + OVERLAY_TARG_INFO_F_ACTIVE = 0x01, + OVERLAY_TARG_INFO_F_DEGRADED = 0x02 +} overlay_targ_info_flags_t; + +/* + * Get target information about an overlay device + */ +typedef struct overlay_targ_info { + datalink_id_t oti_linkid; + uint32_t oti_needs; + uint64_t oti_flags; + uint64_t oti_vnetid; +} overlay_targ_info_t; + +/* + * Declare an association between a given varpd instance and a datalink. + */ +#define OVERLAY_TARG_ASSOCIATE (OVERLAY_TARG_IOCTL | 0x02) + +typedef struct overlay_targ_associate { + datalink_id_t ota_linkid; + uint32_t ota_mode; + uint64_t ota_id; + uint32_t ota_provides; + overlay_target_point_t ota_point; +} overlay_targ_associate_t; + +/* + * Remove an association from a device. If the device has already been started, + * this implies OVERLAY_TARG_DEGRADE. + */ +#define OVERLAY_TARG_DISASSOCIATE (OVERLAY_TARG_IOCTL | 0x3) + +/* + * Tells the kernel that while a varpd instance still exists, it basically isn't + * making any forward progress, so the device should consider itself degraded. + */ +#define OVERLAY_TARG_DEGRADE (OVERLAY_TARG_IOCTL | 0x4) + +typedef struct overlay_targ_degrade { + datalink_id_t otd_linkid; + uint32_t otd_pad; + char otd_buf[OVERLAY_STATUS_BUFLEN]; +} overlay_targ_degrade_t; + +/* + * Tells the kernel to remove the degraded status that it set on a device. + */ +#define OVERLAY_TARG_RESTORE (OVERLAY_TARG_IOCTL | 0x5) + +typedef struct overlay_targ_id { + datalink_id_t otid_linkid; +} overlay_targ_id_t; + +/* + * The following ioctls are all used to support dynamic lookups from userland, + * generally serviced by varpd. + * + * The way this is designed to work is that user land will have threads sitting + * in OVERLAY_TARG_LOOKUP ioctls waiting to service requests. A thread will sit + * waiting for work for up to approximately one second of time before they will + * be sent back out to user land to give user land a chance to clean itself up + * or more generally, come back into the kernel for work. Once these threads + * return, they will have a request with which more action can be done. The + * following ioctls can all be used to answer the request. + * + * OVERLAY_TARG_RESPOND - overlay_targ_resp_t + * + * The overlay_targ_resp_t has the appropriate information from + * which a reply can be generated. The information is filled into + * an overlay_targ_point_t as appropriate based on the + * overlay_plugin_dest_t type. + * + * + * OVERLAY_TARG_DROP - overlay_targ_resp_t + * + * The overlay_targ_resp_t should identify a request for which to + * drop a packet. + * + * + * OVERLAY_TARG_INJECT - overlay_targ_pkt_t + * + * The overlay_targ_pkt_t injects a fully formed packet into the + * virtual network. It may either be identified by its data link id + * or by the request id. If both are specified, the + * datalink id will be used. Note, that an injection is not + * considered a reply and if this corresponds to a requeset, then + * that individual packet must still be dropped. + * + * + * OVERLAY_TARG_PKT - overlay_targ_pkt_t + * + * This ioctl can be used to copy data from a given request into a + * user buffer. This can be used in combination with + * OVERLAY_TARG_INJECT to implemnt services such as a proxy-arp. + * + * + * OVERLAY_TARG_RESEND - overlay_targ_pkt_t + * + * This ioctl is similar to the OVERLAY_TARG_INJECT, except instead + * of receiving it on the local mac handle, it queues it for + * retransmission again. This is useful if you have a packet that + * was originally destined for some broadcast or multicast address + * that you now want to send to a unicast address. + */ +#define OVERLAY_TARG_LOOKUP (OVERLAY_TARG_IOCTL | 0x10) +#define OVERLAY_TARG_RESPOND (OVERLAY_TARG_IOCTL | 0x11) +#define OVERLAY_TARG_DROP (OVERLAY_TARG_IOCTL | 0x12) +#define OVERLAY_TARG_INJECT (OVERLAY_TARG_IOCTL | 0x13) +#define OVERLAY_TARG_PKT (OVERLAY_TARG_IOCTL | 0x14) +#define OVERLAY_TARG_RESEND (OVERLAY_TARG_IOCTL | 0x15) + +typedef struct overlay_targ_lookup { + uint64_t otl_dlid; + uint64_t otl_reqid; + uint64_t otl_varpdid; + uint64_t otl_vnetid; + uint64_t otl_hdrsize; + uint64_t otl_pktsize; + uint8_t otl_srcaddr[ETHERADDRL]; + uint8_t otl_dstaddr[ETHERADDRL]; + uint32_t otl_dsttype; + uint32_t otl_sap; + int32_t otl_vlan; +} overlay_targ_lookup_t; + +typedef struct overlay_targ_resp { + uint64_t otr_reqid; + overlay_target_point_t otr_answer; +} overlay_targ_resp_t; + +typedef struct overlay_targ_pkt { + uint64_t otp_linkid; + uint64_t otp_reqid; + uint64_t otp_size; + void *otp_buf; +} overlay_targ_pkt_t; + +#ifdef _KERNEL + +typedef struct overlay_targ_pkt32 { + uint64_t otp_linkid; + uint64_t otp_reqid; + uint64_t otp_size; + caddr32_t otp_buf; +} overlay_targ_pkt32_t; + +#endif /* _KERNEL */ + +/* + * This provides a way to get a list of active overlay devices independently + * from dlmgmtd. At the end of the day the kernel always knows what will exist + * and this allows varpd which is an implementation of libdladm not to end up + * needing to call back into dlmgmtd via libdladm and create an unfortunate + * dependency cycle. + */ + +#define OVERLAY_TARG_LIST (OVERLAY_TARG_IOCTL | 0x20) + +typedef struct overlay_targ_list { + uint32_t otl_nents; + uint32_t otl_ents[]; +} overlay_targ_list_t; + +/* + * The following family of ioctls all manipulate the target cache of a given + * device. + * + * OVERLAY_TARG_CACHE_GET - overlay_targ_cache_t + * + * The overlay_targ_cache_t should be have its link identifier and + * the desired mac address filled in. On return, it will fill in + * the otc_dest member, if the entry exists in the table. + * + * + * OVERLAY_TARG_CACHE_SET - overlay_targ_cache_t + * + * The cache table entry of the mac address referred to by otc_mac + * and otd_linkid will be filled in with the details provided by in + * the otc_dest member. + * + * OVERLAY_TARG_CACHE_REMOVE - overlay_targ_cache_t + * + * Removes the cache entry identified by otc_mac from the table. + * Note that this does not stop any in-flight lookups or deal with + * any data that is awaiting a lookup. + * + * + * OVERLAY_TARG_CACHE_FLUSH - overlay_targ_cache_t + * + * Similar to OVERLAY_TARG_CACHE_REMOVE, but functions on the + * entire table identified by otc_linkid. All other parameters are + * ignored. + * + * + * OVERLAY_TARG_CACHE_ITER - overlay_targ_cache_iter_t + * + * Iterates over the contents of a target cache identified by + * otci_linkid. Iteration is guaranteed to be exactly once for + * items which are in the hashtable at the beginning and end of + * iteration. For items which are added or removed after iteration + * has begun, only at most once semantics are guaranteed. Consumers + * should ensure that otci_marker is zeroed before starting + * iteration and should preserve its contents across calls. + * + * Before calling in, otci_count should be set to the number of + * entries that space has been allocated for in otci_ents. The + * value will be updated to indicate the total number written out. + */ + +#define OVERLAY_TARG_CACHE_GET (OVERLAY_TARG_IOCTL | 0x30) +#define OVERLAY_TARG_CACHE_SET (OVERLAY_TARG_IOCTL | 0x31) +#define OVERLAY_TARG_CACHE_REMOVE (OVERLAY_TARG_IOCTL | 0x32) +#define OVERLAY_TARG_CACHE_FLUSH (OVERLAY_TARG_IOCTL | 0x33) +#define OVERLAY_TARG_CACHE_ITER (OVERLAY_TARG_IOCTL | 0x34) + +/* + * This is a pretty arbitrary number that we're constraining ourselves to + * for iteration. Basically the goal is to make sure that we can't have a user + * ask us to allocate too much memory on their behalf at any time. A more + * dynamic form may be necessary some day. + */ +#define OVERLAY_TARGET_ITER_MAX 500 + +#define OVERLAY_TARGET_CACHE_DROP 0x01 + +typedef struct overlay_targ_cache_entry { + uint8_t otce_mac[ETHERADDRL]; + uint16_t otce_flags; + overlay_target_point_t otce_dest; +} overlay_targ_cache_entry_t; + +typedef struct overlay_targ_cache { + datalink_id_t otc_linkid; + overlay_targ_cache_entry_t otc_entry; +} overlay_targ_cache_t; + +typedef struct overlay_targ_cache_iter { + datalink_id_t otci_linkid; + uint32_t otci_pad; + uint64_t otci_marker; + uint16_t otci_count; + overlay_targ_cache_entry_t otci_ents[]; +} overlay_targ_cache_iter_t; + +#ifdef __cplusplus +} +#endif + +#endif /* _OVERLAY_TARGET_H */ diff --git a/usr/src/uts/common/sys/param.h b/usr/src/uts/common/sys/param.h index 01c25610dd..12bdc29079 100644 --- a/usr/src/uts/common/sys/param.h +++ b/usr/src/uts/common/sys/param.h @@ -103,7 +103,7 @@ extern "C" { #define DEFAULT_MAXPID 999999 #define DEFAULT_JUMPPID 100000 #else -#define DEFAULT_MAXPID 30000 +#define DEFAULT_MAXPID 99999 #define DEFAULT_JUMPPID 0 #endif diff --git a/usr/src/uts/common/sys/policy.h b/usr/src/uts/common/sys/policy.h index 5f5b66d437..5328d02c59 100644 --- a/usr/src/uts/common/sys/policy.h +++ b/usr/src/uts/common/sys/policy.h @@ -108,6 +108,7 @@ int secpolicy_ipc_owner(const cred_t *, const struct kipc_perm *); int secpolicy_kmdb(const cred_t *); int secpolicy_lock_memory(const cred_t *); int secpolicy_meminfo(const cred_t *); +int secpolicy_fs_import(const cred_t *); int secpolicy_modctl(const cred_t *, int); int secpolicy_net(const cred_t *, int, boolean_t); int secpolicy_net_bindmlp(const cred_t *); @@ -174,6 +175,7 @@ int secpolicy_setid_setsticky_clear(vnode_t *, vattr_t *, const vattr_t *, cred_t *); int secpolicy_xvattr(xvattr_t *, uid_t, cred_t *, vtype_t); int secpolicy_xvm_control(const cred_t *); +int secpolicy_hyprlofs_control(const cred_t *); int secpolicy_basic_exec(const cred_t *, vnode_t *); int secpolicy_basic_fork(const cred_t *); diff --git a/usr/src/uts/common/sys/proc.h b/usr/src/uts/common/sys/proc.h index f1a2fc5485..ff4a1abce4 100644 --- a/usr/src/uts/common/sys/proc.h +++ b/usr/src/uts/common/sys/proc.h @@ -21,6 +21,7 @@ /* * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015 Joyent, Inc. All rights reserved. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -47,6 +48,7 @@ #include <sys/list.h> #include <sys/avl.h> #include <sys/door_impl.h> +#include <sys/signalfd.h> #ifdef __cplusplus extern "C" { @@ -197,6 +199,7 @@ typedef struct proc { k_sigset_t p_extsig; /* signals sent from another contract */ k_sigset_t p_ignore; /* ignore when generated */ k_sigset_t p_siginfo; /* gets signal info with signal */ + void *p_sigfd; /* signalfd support state */ struct sigqueue *p_sigqueue; /* queued siginfo structures */ struct sigqhdr *p_sigqhdr; /* hdr to sigqueue structure pool */ struct sigqhdr *p_signhdr; /* hdr to signotify structure pool */ @@ -346,7 +349,9 @@ typedef struct proc { struct zone *p_zone; /* zone in which process lives */ struct vnode *p_execdir; /* directory that p_exec came from */ struct brand *p_brand; /* process's brand */ - void *p_brand_data; /* per-process brand state */ + + /* per-process brand state */ + void *p_brand_data; /* additional lock to protect p_sessp (but not its contents) */ kmutex_t p_splock; @@ -361,7 +366,6 @@ typedef struct proc { */ struct user p_user; /* (see sys/user.h) */ } proc_t; - #define PROC_T /* headers relying on proc_t are OK */ #ifdef _KERNEL @@ -627,6 +631,7 @@ extern int signal_is_blocked(kthread_t *, int); extern int sigcheck(proc_t *, kthread_t *); extern void sigdefault(proc_t *); +extern struct pid *pid_find(pid_t pid); extern void pid_setmin(void); extern pid_t pid_allocate(proc_t *, pid_t, int); extern int pid_rele(struct pid *); @@ -642,6 +647,7 @@ extern int sprtrylock_proc(proc_t *); extern void sprwaitlock_proc(proc_t *); extern void sprlock_proc(proc_t *); extern void sprunlock(proc_t *); +extern void sprunprlock(proc_t *); extern void pid_init(void); extern proc_t *pid_entry(int); extern int pid_slot(proc_t *); @@ -716,6 +722,10 @@ extern kthread_t *thread_unpin(void); extern void thread_init(void); extern void thread_load(kthread_t *, void (*)(), caddr_t, size_t); +extern void thread_splitstack(void (*)(void *), void *, size_t); +extern void thread_splitstack_run(caddr_t, void (*)(void *), void *); +extern void thread_splitstack_cleanup(void); + extern void tsd_create(uint_t *, void (*)(void *)); extern void tsd_destroy(uint_t *); extern void *tsd_getcreate(uint_t *, void (*)(void *), void *(*)(void)); @@ -757,7 +767,7 @@ extern void pokelwps(proc_t *); extern void continuelwps(proc_t *); extern int exitlwps(int); extern void lwp_ctmpl_copy(klwp_t *, klwp_t *); -extern void lwp_ctmpl_clear(klwp_t *); +extern void lwp_ctmpl_clear(klwp_t *, boolean_t); extern klwp_t *forklwp(klwp_t *, proc_t *, id_t); extern void lwp_load(klwp_t *, gregset_t, uintptr_t); extern void lwp_setrval(klwp_t *, int, int); diff --git a/usr/src/uts/common/sys/procfs.h b/usr/src/uts/common/sys/procfs.h index f592fd9dcf..501af712ef 100644 --- a/usr/src/uts/common/sys/procfs.h +++ b/usr/src/uts/common/sys/procfs.h @@ -25,6 +25,7 @@ */ /* * Copyright 2012 DEY Storage Systems, Inc. All rights reserved. + * Copyright 2015, Joyent, Inc. */ #ifndef _SYS_PROCFS_H @@ -233,6 +234,7 @@ typedef struct pstatus { #define PR_FAULTED 6 #define PR_SUSPENDED 7 #define PR_CHECKPOINT 8 +#define PR_BRAND 9 /* * lwp ps(1) information file. /proc/<pid>/lwp/<lwpid>/lwpsinfo diff --git a/usr/src/uts/common/sys/ptms.h b/usr/src/uts/common/sys/ptms.h index 6c79ee266d..ba8b2b1210 100644 --- a/usr/src/uts/common/sys/ptms.h +++ b/usr/src/uts/common/sys/ptms.h @@ -126,6 +126,12 @@ extern void ptms_logp(char *, uintptr_t); #define DDBGP(a, b) #endif +typedef struct __ptmptsopencb_arg *ptmptsopencb_arg_t; +typedef struct ptmptsopencb { + boolean_t (*ppocb_func)(ptmptsopencb_arg_t); + ptmptsopencb_arg_t ppocb_arg; +} ptmptsopencb_t; + #endif /* _KERNEL */ typedef struct pt_own { @@ -157,6 +163,19 @@ typedef struct pt_own { #define ZONEPT (('P'<<8)|4) /* set zone of master/slave pair */ #define OWNERPT (('P'<<8)|5) /* set owner/group for slave device */ +#ifdef _KERNEL +/* + * kernel ioctl commands + * + * PTMPTSOPENCB: Returns a callback function pointer and opaque argument. + * The return value of the callback function when it's invoked + * with the opaque argument passed to it will indicate if the + * pts slave device is currently open. + */ +#define PTMPTSOPENCB (('P'<<8)|6) /* check if the slave is open */ + +#endif /* _KERNEL */ + #ifdef __cplusplus } #endif diff --git a/usr/src/uts/common/sys/scsi/adapters/mpt_sas/mptsas_hash.h b/usr/src/uts/common/sys/refhash.h index 2069e6d3f1..b7427a454d 100644 --- a/usr/src/uts/common/sys/scsi/adapters/mpt_sas/mptsas_hash.h +++ b/usr/src/uts/common/sys/refhash.h @@ -10,11 +10,11 @@ */ /* - * Copyright 2014 Joyent, Inc. All rights reserved. + * Copyright 2015 Joyent, Inc. */ -#ifndef _SYS_SCSI_ADAPTERS_MPTHASH_H -#define _SYS_SCSI_ADAPTERS_MPTHASH_H +#ifndef _SYS_REFHASH_H +#define _SYS_REFHASH_H #include <sys/types.h> #include <sys/list.h> @@ -58,4 +58,4 @@ extern void *refhash_first(refhash_t *); extern void *refhash_next(refhash_t *, void *); extern boolean_t refhash_obj_valid(refhash_t *hp, const void *); -#endif /* _SYS_SCSI_ADAPTERS_MPTHASH_H */ +#endif /* _SYS_REFHASH_H */ diff --git a/usr/src/uts/common/sys/resource.h b/usr/src/uts/common/sys/resource.h index 2d3800b946..4b70a77db8 100644 --- a/usr/src/uts/common/sys/resource.h +++ b/usr/src/uts/common/sys/resource.h @@ -23,6 +23,7 @@ * * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2014 Joyent, Inc. All rights reserved. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -191,6 +192,7 @@ struct rusage { #define _RUSAGESYS_GETRUSAGE_CHLD 1 /* rusage child process */ #define _RUSAGESYS_GETRUSAGE_LWP 2 /* rusage lwp */ #define _RUSAGESYS_GETVMUSAGE 3 /* getvmusage */ +#define _RUSAGESYS_INVALMAP 4 /* vm_map_inval */ #if defined(_SYSCALL32) diff --git a/usr/src/uts/common/sys/rt.h b/usr/src/uts/common/sys/rt.h index ca52f8d995..82cc08d326 100644 --- a/usr/src/uts/common/sys/rt.h +++ b/usr/src/uts/common/sys/rt.h @@ -22,6 +22,7 @@ /* * Copyright 2004 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2015 Joyent, Inc. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -31,8 +32,6 @@ #ifndef _SYS_RT_H #define _SYS_RT_H -#pragma ident "%Z%%M% %I% %E% SMI" /* SVr4.0 1.4 */ - #include <sys/types.h> #include <sys/thread.h> @@ -77,6 +76,16 @@ typedef struct rtkparms { int rt_tqsig; /* real-time time quantum signal */ uint_t rt_cflags; /* real-time control flags */ } rtkparms_t; + +#define RTGPPRIO0 100 /* Global priority for RT priority 0 */ + +/* + * control flags (kparms->rt_cflags). + */ +#define RT_DOPRI 0x01 /* change priority */ +#define RT_DOTQ 0x02 /* change RT time quantum */ +#define RT_DOSIG 0x04 /* change RT time quantum signal */ + #endif /* _KERNEL */ #ifdef __cplusplus diff --git a/usr/src/uts/common/sys/scsi/adapters/mpt_sas/mptsas_var.h b/usr/src/uts/common/sys/scsi/adapters/mpt_sas/mptsas_var.h index 3983188fce..02116b45c4 100644 --- a/usr/src/uts/common/sys/scsi/adapters/mpt_sas/mptsas_var.h +++ b/usr/src/uts/common/sys/scsi/adapters/mpt_sas/mptsas_var.h @@ -22,7 +22,7 @@ /* * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2015 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2013, Joyent, Inc. All rights reserved. + * Copyright 2015, Joyent, Inc. * Copyright (c) 2014, Tegile Systems Inc. All rights reserved. */ @@ -58,10 +58,10 @@ #include <sys/byteorder.h> #include <sys/queue.h> +#include <sys/refhash.h> #include <sys/isa_defs.h> #include <sys/sunmdi.h> #include <sys/mdi_impldefs.h> -#include <sys/scsi/adapters/mpt_sas/mptsas_hash.h> #include <sys/scsi/adapters/mpt_sas/mptsas_ioctl.h> #include <sys/scsi/adapters/mpt_sas/mpi/mpi2_tool.h> #include <sys/scsi/adapters/mpt_sas/mpi/mpi2_cnfg.h> diff --git a/usr/src/uts/common/sys/signal.h b/usr/src/uts/common/sys/signal.h index 8f0e1794f4..139784d578 100644 --- a/usr/src/uts/common/sys/signal.h +++ b/usr/src/uts/common/sys/signal.h @@ -22,6 +22,7 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2015, Joyent, Inc. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -158,8 +159,8 @@ struct sigaction32 { * use of these symbols by applications is injurious * to binary compatibility */ -#define NSIG 74 /* valid signals range from 1 to NSIG-1 */ -#define MAXSIG 73 /* size of u_signal[], NSIG-1 <= MAXSIG */ +#define NSIG 75 /* valid signals range from 1 to NSIG-1 */ +#define MAXSIG 74 /* size of u_signal[], NSIG-1 <= MAXSIG */ #endif /* defined(__EXTENSIONS__) || !defined(_XPG4_2) */ #define MINSIGSTKSZ 2048 diff --git a/usr/src/uts/common/sys/signalfd.h b/usr/src/uts/common/sys/signalfd.h new file mode 100644 index 0000000000..2661d5a05f --- /dev/null +++ b/usr/src/uts/common/sys/signalfd.h @@ -0,0 +1,100 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015 Joyent, Inc. + */ + +/* + * Header file to support the signalfd facility. Note that this facility + * is designed to be binary compatible with the Linux signalfd facility, modulo + * the signals themselves; values for constants here should therefore exactly + * match those found in Linux, and this facility shouldn't be extended + * independently of Linux. + */ + +#ifndef _SYS_SIGNALFD_H +#define _SYS_SIGNALFD_H + +#include <sys/types.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * To assure binary compatibility with Linux, these values are fixed at their + * Linux equivalents, not their native ones. + */ +#define SFD_CLOEXEC 02000000 /* LX_O_CLOEXEC */ +#define SFD_NONBLOCK 04000 /* LX_O_NONBLOCK */ + +/* + * These ioctl values are specific to the native implementation; applications + * shouldn't be using them directly, and they should therefore be safe to + * change without breaking apps. + */ +#define SIGNALFDIOC (('s' << 24) | ('f' << 16) | ('d' << 8)) +#define SIGNALFDIOC_MASK (SIGNALFDIOC | 1) /* set mask */ + +typedef struct signalfd_siginfo { + uint32_t ssi_signo; /* signal from signal.h */ + int32_t ssi_errno; /* error from errno.h */ + int32_t ssi_code; /* signal code */ + uint32_t ssi_pid; /* PID of sender */ + uint32_t ssi_uid; /* real UID of sender */ + int32_t ssi_fd; /* File descriptor (SIGIO) */ + uint32_t ssi_tid; /* unused */ + uint32_t ssi_band; /* band event (SIGIO) */ + uint32_t ssi_overrun; /* unused */ + uint32_t ssi_trapno; /* trap number that caused signal */ + int32_t ssi_status; /* exit status or signal (SIGCHLD) */ + int32_t ssi_int; /* unused */ + uint64_t ssi_ptr; /* unused */ + uint64_t ssi_utime; /* user CPU time consumed (SIGCHLD) */ + uint64_t ssi_stime; /* system CPU time consumed (SIGCHLD) */ + uint64_t ssi_addr; /* address that generated signal */ + uint8_t ssi_pad[48]; /* Pad size to 128 bytes to allow for */ + /* additional fields in the future. */ +} signalfd_siginfo_t; + +#ifndef _KERNEL + +extern int signalfd(int, const sigset_t *, int); + +#else + +#define SIGNALFDMNRN_SIGNALFD 0 +#define SIGNALFDMNRN_CLONE 1 + +typedef struct sigfd_wake_list { + list_node_t sigfd_wl_lst; + void *sigfd_wl_state; +} sigfd_wake_list_t; + +/* + * This holds the proc_t state for a process which is using signalfd. + */ +typedef struct sigfd_proc_state { + void (*sigfd_pollwake_cb)(void *, int); + list_t sigfd_list; +} sigfd_proc_state_t; + + +extern void (*sigfd_exit_helper)(); + +#endif /* _KERNEL */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_SIGNALFD_H */ diff --git a/usr/src/uts/common/sys/socket.h b/usr/src/uts/common/sys/socket.h index da8e3ab351..53763cd0c8 100644 --- a/usr/src/uts/common/sys/socket.h +++ b/usr/src/uts/common/sys/socket.h @@ -22,6 +22,7 @@ * Copyright 2014 Garrett D'Amore <garrett@damore.org> * * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2015, Joyent, Inc. All rights reserved. */ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ @@ -39,6 +40,9 @@ /* Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved. */ +/* + * Copyright (c) 2014, Joyent, Inc. All rights reserved. + */ #ifndef _SYS_SOCKET_H #define _SYS_SOCKET_H @@ -293,8 +297,9 @@ struct linger { #define AF_INET_OFFLOAD 30 /* Sun private; do not use */ #define AF_TRILL 31 /* TRILL interface */ #define AF_PACKET 32 /* PF_PACKET Linux socket interface */ +#define AF_LX_NETLINK 33 /* Linux-compatible netlink */ -#define AF_MAX 32 +#define AF_MAX 33 /* * Protocol families, same as address families for now. @@ -334,6 +339,7 @@ struct linger { #define PF_INET_OFFLOAD AF_INET_OFFLOAD /* Sun private; do not use */ #define PF_TRILL AF_TRILL #define PF_PACKET AF_PACKET +#define PF_LX_NETLINK AF_LX_NETLINK #define PF_MAX AF_MAX @@ -420,6 +426,7 @@ struct msghdr32 { #define MSG_NOTIFICATION 0x100 /* Notification, not data */ #define MSG_XPG4_2 0x8000 /* Private: XPG4.2 flag */ +/* Obsolete but kept for compilation compatability. Use IOV_MAX. */ #define MSG_MAXIOVLEN 16 #ifdef _KERNEL diff --git a/usr/src/uts/common/sys/socketvar.h b/usr/src/uts/common/sys/socketvar.h index 52fa3a5822..da61975904 100644 --- a/usr/src/uts/common/sys/socketvar.h +++ b/usr/src/uts/common/sys/socketvar.h @@ -21,6 +21,7 @@ /* * Copyright (c) 1996, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2015 Joyent, Inc. */ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ @@ -102,6 +103,7 @@ struct sockaddr_ux { typedef struct sonodeops sonodeops_t; typedef struct sonode sonode_t; +typedef boolean_t (*so_krecv_f)(sonode_t *, mblk_t *, size_t, int, void *); struct sodirect_s; @@ -244,6 +246,10 @@ struct sonode { struct sof_instance *so_filter_top; /* top of stack */ struct sof_instance *so_filter_bottom; /* bottom of stack */ clock_t so_filter_defertime; /* time when deferred */ + + /* Kernel direct receive callbacks */ + so_krecv_f so_krecv_cb; /* recv callback */ + void *so_krecv_arg; /* recv cb arg */ }; #define SO_HAVE_DATA(so) \ @@ -297,15 +303,16 @@ struct sonode { #define SS_OOBPEND 0x00002000 /* OOB pending or present - poll */ #define SS_HAVEOOBDATA 0x00004000 /* OOB data present */ #define SS_HADOOBDATA 0x00008000 /* OOB data consumed */ -#define SS_CLOSING 0x00010000 /* in process of closing */ +#define SS_CLOSING 0x00010000 /* in process of closing */ #define SS_FIL_DEFER 0x00020000 /* filter deferred notification */ #define SS_FILOP_OK 0x00040000 /* socket can attach filters */ #define SS_FIL_RCV_FLOWCTRL 0x00080000 /* filter asserted rcv flow ctrl */ + #define SS_FIL_SND_FLOWCTRL 0x00100000 /* filter asserted snd flow ctrl */ #define SS_FIL_STOP 0x00200000 /* no more filter actions */ - #define SS_SODIRECT 0x00400000 /* transport supports sodirect */ +#define SS_FILOP_UNSF 0x00800000 /* block attaching unsafe filters */ #define SS_SENTLASTREADSIG 0x01000000 /* last rx signal has been sent */ #define SS_SENTLASTWRITESIG 0x02000000 /* last tx signal has been sent */ @@ -321,7 +328,8 @@ struct sonode { /* * Sockets that can fall back to TPI must ensure that fall back is not - * initiated while a thread is using a socket. + * initiated while a thread is using a socket. Otherwise this disables all + * future filter attachment. */ #define SO_BLOCK_FALLBACK(so, fn) \ ASSERT(MUTEX_NOT_HELD(&(so)->so_lock)); \ @@ -337,6 +345,24 @@ struct sonode { } \ } +/* + * Sockets that can fall back to TPI must ensure that fall back is not + * initiated while a thread is using a socket. Otherwise this disables all + * future unsafe filter attachment. Safe filters can still attach after + * we execute the function in which this macro is used. + */ +#define SO_BLOCK_FALLBACK_SAFE(so, fn) \ + ASSERT(MUTEX_NOT_HELD(&(so)->so_lock)); \ + rw_enter(&(so)->so_fallback_rwlock, RW_READER); \ + if ((so)->so_state & SS_FALLBACK_COMP) { \ + rw_exit(&(so)->so_fallback_rwlock); \ + return (fn); \ + } else if (((so)->so_state & SS_FILOP_UNSF) == 0) { \ + mutex_enter(&(so)->so_lock); \ + (so)->so_state |= SS_FILOP_UNSF; \ + mutex_exit(&(so)->so_lock); \ + } + #define SO_UNBLOCK_FALLBACK(so) { \ rw_exit(&(so)->so_fallback_rwlock); \ } @@ -368,6 +394,7 @@ struct sonode { /* The modes below are only for non-streams sockets */ #define SM_ACCEPTSUPP 0x400 /* can handle accept() */ #define SM_SENDFILESUPP 0x800 /* Private: proto supp sendfile */ +#define SM_DEFERERR 0x1000 /* Private: defer so_error delivery */ /* * Socket versions. Used by the socket library when calling _so_socket(). @@ -946,6 +973,15 @@ extern struct sonode *socreate(struct sockparams *, int, int, int, int, extern int so_copyin(const void *, void *, size_t, int); extern int so_copyout(const void *, void *, size_t, int); +/* + * Functions to manipulate the use of direct receive callbacks. This should not + * be used outside of sockfs and ksocket. These are generally considered a use + * once interface for a socket and will cause all outstanding data on the socket + * to be flushed. + */ +extern int so_krecv_set(sonode_t *, so_krecv_f, void *); +extern void so_krecv_unblock(sonode_t *); + #endif /* diff --git a/usr/src/uts/common/sys/sockfilter.h b/usr/src/uts/common/sys/sockfilter.h index 9f6d8b499b..c4dd6539de 100644 --- a/usr/src/uts/common/sys/sockfilter.h +++ b/usr/src/uts/common/sys/sockfilter.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2015 Joyent, Inc. */ #ifndef _SYS_SOCKFILTER_H @@ -129,6 +130,15 @@ typedef struct sof_ops { #define SOF_VERSION 1 +/* + * Flag indicating that the filter module is safe to attach after bind, + * getsockname, getsockopt or setsockopt calls. By default filters are unsafe + * so may not be attached after any socket operation. However, a safe filter + * can still be attached after one of the above calls. This makes attaching + * the filter less dependent on the initial socket setup order. + */ +#define SOF_ATT_SAFE 0x1 + extern int sof_register(int, const char *, const sof_ops_t *, int); extern int sof_unregister(const char *); diff --git a/usr/src/uts/common/sys/squeue.h b/usr/src/uts/common/sys/squeue.h index f1bd429815..35e1cf64c7 100644 --- a/usr/src/uts/common/sys/squeue.h +++ b/usr/src/uts/common/sys/squeue.h @@ -29,6 +29,17 @@ extern "C" { #endif +/* + * Originally in illumos, we had an IP-centric view of the serialization queue + * abstraction. While that has useful properties, the implementation of squeues + * hardcodes various parts of the implementation of IP into it which makes it + * unsuitable for other consumers. To enable them, we created another interface, + * but opted not to port all of the functionality that IP uses in the form of + * ip_squeue.c As other consumers need the functionality that IP has in squeues, + * then we'll come up with more genericized methods and add that functionality + * to <sys/gsqueue.h>. Please do not continue to use this header. + */ + #include <sys/types.h> #include <sys/processor.h> #include <sys/stream.h> @@ -76,12 +87,13 @@ typedef enum { struct ip_recv_attr_s; extern void squeue_init(void); -extern squeue_t *squeue_create(clock_t, pri_t); +extern squeue_t *squeue_create(clock_t, pri_t, boolean_t); extern void squeue_bind(squeue_t *, processorid_t); extern void squeue_unbind(squeue_t *); extern void squeue_enter(squeue_t *, mblk_t *, mblk_t *, uint32_t, struct ip_recv_attr_s *, int, uint8_t); extern uintptr_t *squeue_getprivate(squeue_t *, sqprivate_t); +extern void squeue_destroy(squeue_t *); struct conn_s; extern int squeue_synch_enter(struct conn_s *, mblk_t *); diff --git a/usr/src/uts/common/sys/squeue_impl.h b/usr/src/uts/common/sys/squeue_impl.h index 22550886eb..d2418bbc15 100644 --- a/usr/src/uts/common/sys/squeue_impl.h +++ b/usr/src/uts/common/sys/squeue_impl.h @@ -117,6 +117,7 @@ struct squeue_s { squeue_set_t *sq_set; /* managed by squeue creator */ pri_t sq_priority; /* squeue thread priority */ + boolean_t sq_isip; /* use IP-centric features */ /* Keep the debug-only fields at the end of the structure */ #ifdef DEBUG @@ -165,6 +166,7 @@ struct squeue_s { #define SQS_POLL_RESTART_DONE 0x01000000 #define SQS_POLL_THR_QUIESCE 0x02000000 #define SQS_PAUSE 0x04000000 /* The squeue has been paused */ +#define SQS_EXIT 0x08000000 /* squeue is being torn down */ #define SQS_WORKER_THR_CONTROL \ (SQS_POLL_QUIESCE | SQS_POLL_RESTART | SQS_POLL_CLEANUP) diff --git a/usr/src/uts/common/sys/stream.h b/usr/src/uts/common/sys/stream.h index a04019a9ce..28289649dd 100644 --- a/usr/src/uts/common/sys/stream.h +++ b/usr/src/uts/common/sys/stream.h @@ -21,6 +21,7 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2015 Joyent, Inc. All rights reserved. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -628,16 +629,11 @@ struct stroptions { /* * Structure for rw (read/write) procedure calls. A pointer * to a struiod_t is passed as a parameter to the rwnext() call. - * - * Note: DEF_IOV_MAX is defined and used as it is in "fs/vncalls.c" - * as there isn't a formal definition of IOV_MAX ??? */ -#define DEF_IOV_MAX 16 - typedef struct struiod { mblk_t *d_mp; /* pointer to mblk (chain) */ uio_t d_uio; /* uio info */ - iovec_t d_iov[DEF_IOV_MAX]; /* iov referenced by uio */ + iovec_t *d_iov; /* iov referenced by uio */ } struiod_t; /* diff --git a/usr/src/uts/common/sys/syscall.h b/usr/src/uts/common/sys/syscall.h index e365668f85..7d86565564 100644 --- a/usr/src/uts/common/sys/syscall.h +++ b/usr/src/uts/common/sys/syscall.h @@ -511,8 +511,8 @@ typedef struct { /* return values from system call */ #if !defined(_KERNEL) -extern int syscall(int, ...); -extern int __systemcall(sysret_t *, int, ...); +extern long syscall(int, ...); +extern long __systemcall(sysret_t *, int, ...); extern int __set_errno(int); #endif /* _KERNEL */ diff --git a/usr/src/uts/common/sys/sysevent.h b/usr/src/uts/common/sys/sysevent.h index 2b26441973..5dcaa69c6d 100644 --- a/usr/src/uts/common/sys/sysevent.h +++ b/usr/src/uts/common/sys/sysevent.h @@ -21,6 +21,7 @@ /* * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2015 Joyent, Inc. */ #ifndef _SYS_SYSEVENT_H @@ -74,10 +75,12 @@ extern "C" { #define SE_KERN_PID 0 #define SUNW_VENDOR "SUNW" +#define ILLUMOS_VENDOR "ILLUMOS" #define SE_USR_PUB "usr:" #define SE_KERN_PUB "kern:" #define SUNW_KERN_PUB SUNW_VENDOR":"SE_KERN_PUB #define SUNW_USR_PUB SUNW_VENDOR":"SE_USR_PUB +#define ILLUMOS_KERN_PUB ILLUMOS_VENDOR":"SE_KERN_PUB /* * Event header and attribute value limits diff --git a/usr/src/uts/common/sys/sysevent/datalink.h b/usr/src/uts/common/sys/sysevent/datalink.h new file mode 100644 index 0000000000..592ef5bdde --- /dev/null +++ b/usr/src/uts/common/sys/sysevent/datalink.h @@ -0,0 +1,54 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015 Joyent, Inc. + */ + +#ifndef _SYS_SYSEVENT_DATALINK_H +#define _SYS_SYSEVENT_DATALINK_H + +/* + * Datalink System Event payloads + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Event schema for EC_DATALINK_LINK_STATE + * + * Event Class - EC_DATALINK + * Event Sub-Class - EC_DATALINK_LINK_STATE + * + * Attribute Name - DATALINK_EV_LINK_NAME + * Attribute Type - SE_DATA_TYPE_STRING + * Attribute Value - [Name of the datalink] + * + * Attribute Name - DATALINK_EV_LINK_ID + * Attribute Type - SE_DATA_TYPE_INT32 + * Attribute Value - [datalink_id_t for the device] + * + * Attribute Name - DATALINK_EV_ZONE_ID + * Attribute Type - SE_DATA_TYPE_INT32 + * Attribute Value - [zoneid_t of the zone the datalink is in] + */ + +#define DATALINK_EV_LINK_NAME "link" +#define DATALINK_EV_LINK_ID "linkid" +#define DATALINK_EV_ZONE_ID "zone" + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_SYSEVENT_DATALINK_H */ diff --git a/usr/src/uts/common/sys/sysevent/eventdefs.h b/usr/src/uts/common/sys/sysevent/eventdefs.h index 9c6907a08a..b47b5e2545 100644 --- a/usr/src/uts/common/sys/sysevent/eventdefs.h +++ b/usr/src/uts/common/sys/sysevent/eventdefs.h @@ -265,9 +265,11 @@ extern "C" { #define ESC_ZFS_POOL_REGUID "ESC_ZFS_pool_reguid" /* - * datalink subclass definitions. + * datalink subclass definitions. Supporting attributes for datalink state found + * in sys/sysevent/datalink.h. */ #define ESC_DATALINK_PHYS_ADD "ESC_datalink_phys_add" /* new physical link */ +#define ESC_DATALINK_LINK_STATE "ESC_datalink_link_state" /* link state */ /* * VRRP subclass definitions. Supporting attributes (name/value paris) are diff --git a/usr/src/uts/common/sys/systrace.h b/usr/src/uts/common/sys/systrace.h index d43974451e..17e509d4d8 100644 --- a/usr/src/uts/common/sys/systrace.h +++ b/usr/src/uts/common/sys/systrace.h @@ -22,13 +22,12 @@ /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2014 Joyent, Inc. All rights reserved. */ #ifndef _SYS_SYSTRACE_H #define _SYS_SYSTRACE_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/dtrace.h> #ifdef __cplusplus @@ -47,16 +46,18 @@ extern systrace_sysent_t *systrace_sysent; extern systrace_sysent_t *systrace_sysent32; extern void (*systrace_probe)(dtrace_id_t, uintptr_t, uintptr_t, - uintptr_t, uintptr_t, uintptr_t, uintptr_t); + uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); extern void systrace_stub(dtrace_id_t, uintptr_t, uintptr_t, - uintptr_t, uintptr_t, uintptr_t, uintptr_t); + uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); extern int64_t dtrace_systrace_syscall(uintptr_t arg0, uintptr_t arg1, - uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, uintptr_t arg5); + uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, + uintptr_t arg6, uintptr_t arg7); #ifdef _SYSCALL32_IMPL extern int64_t dtrace_systrace_syscall32(uintptr_t arg0, uintptr_t arg1, - uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, uintptr_t arg5); + uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, + uintptr_t arg6, uintptr_t arg7); #endif #endif diff --git a/usr/src/uts/common/sys/termios.h b/usr/src/uts/common/sys/termios.h index 09be20858d..889a7096cd 100644 --- a/usr/src/uts/common/sys/termios.h +++ b/usr/src/uts/common/sys/termios.h @@ -361,6 +361,24 @@ extern pid_t tcgetsid(int); #define TCSETSF (_TIOC|16) /* + * linux terminal ioctls we need to be aware of + */ +#define TIOCSETLD (_TIOC|123) /* set line discipline parms */ +#define TIOCGETLD (_TIOC|124) /* get line discipline parms */ + +/* + * The VMIN and VTIME and solaris overlap with VEOF and VEOL - This is + * perfectly legal except, linux expects them to be separate. So we keep + * them separately. + */ +struct lx_cc { + unsigned char veof; /* veof value */ + unsigned char veol; /* veol value */ + unsigned char vmin; /* vmin value */ + unsigned char vtime; /* vtime value */ +}; + +/* * NTP PPS ioctls */ #define TIOCGPPS (_TIOC|125) diff --git a/usr/src/uts/common/sys/thread.h b/usr/src/uts/common/sys/thread.h index 188230d61e..53deb90e23 100644 --- a/usr/src/uts/common/sys/thread.h +++ b/usr/src/uts/common/sys/thread.h @@ -24,6 +24,10 @@ * Use is subject to license terms. */ +/* + * Copyright (c) 2015, Joyent, Inc. All rights reserved. + */ + #ifndef _SYS_THREAD_H #define _SYS_THREAD_H @@ -68,6 +72,8 @@ typedef struct ctxop { void (*free_op)(void *, int); /* function which frees the context */ void *arg; /* argument to above functions, ctx pointer */ struct ctxop *next; /* next context ops */ + hrtime_t save_ts; /* timestamp of last save */ + hrtime_t restore_ts; /* timestamp of last restore */ } ctxop_t; /* @@ -164,7 +170,7 @@ typedef struct _kthread { k_sigset_t t_sig; /* signals pending to this process */ k_sigset_t t_extsig; /* signals sent from another contract */ k_sigset_t t_hold; /* hold signal bit mask */ - k_sigset_t t_sigwait; /* sigtimedwait() is accepting these */ + k_sigset_t t_sigwait; /* sigtimedwait/sigfd accepting these */ struct _kthread *t_forw; /* process's forward thread link */ struct _kthread *t_back; /* process's backward thread link */ struct _kthread *t_thlink; /* tid (lwpid) lookup hash link */ @@ -365,7 +371,7 @@ typedef struct _kthread { #define T_WOULDBLOCK 0x0020 /* for lockfs */ #define T_DONTBLOCK 0x0040 /* for lockfs */ #define T_DONTPEND 0x0080 /* for lockfs */ -#define T_SYS_PROF 0x0100 /* profiling on for duration of system call */ +#define T_SPLITSTK 0x0100 /* kernel stack is currently split */ #define T_WAITCVSEM 0x0200 /* waiting for a lwp_cv or lwp_sema on sleepq */ #define T_WATCHPT 0x0400 /* thread undergoing a watchpoint emulation */ #define T_PANIC 0x0800 /* thread initiated a system panic */ @@ -413,8 +419,9 @@ typedef struct _kthread { #define TS_RESUME 0x1000 /* setrun() by CPR resume process */ #define TS_CREATE 0x2000 /* setrun() by syslwp_create() */ #define TS_RUNQMATCH 0x4000 /* exact run queue balancing by setbackdq() */ +#define TS_BSTART 0x8000 /* setrun() by brand */ #define TS_ALLSTART \ - (TS_CSTART|TS_UNPAUSE|TS_XSTART|TS_PSTART|TS_RESUME|TS_CREATE) + (TS_CSTART|TS_UNPAUSE|TS_XSTART|TS_PSTART|TS_RESUME|TS_CREATE|TS_BSTART) #define TS_ANYWAITQ (TS_PROJWAITQ|TS_ZONEWAITQ) /* @@ -442,6 +449,10 @@ typedef struct _kthread { #define ISTOPPED(t) ((t)->t_state == TS_STOPPED && \ !((t)->t_schedflag & TS_PSTART)) +/* True if thread is stopped for a brand-specific reason */ +#define BSTOPPED(t) ((t)->t_state == TS_STOPPED && \ + !((t)->t_schedflag & TS_BSTART)) + /* True if thread is asleep and wakeable */ #define ISWAKEABLE(t) (((t)->t_state == TS_SLEEP && \ ((t)->t_flag & T_WAKEABLE))) diff --git a/usr/src/uts/common/sys/uadmin.h b/usr/src/uts/common/sys/uadmin.h index d5168c9b2c..c14a3bf11e 100644 --- a/usr/src/uts/common/sys/uadmin.h +++ b/usr/src/uts/common/sys/uadmin.h @@ -23,6 +23,7 @@ * * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2011 Joyent, Inc. All rights reserved. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -159,7 +160,7 @@ extern kmutex_t ualock; extern void mdboot(int, int, char *, boolean_t); extern void mdpreboot(int, int, char *); extern int kadmin(int, int, void *, cred_t *); -extern void killall(zoneid_t); +extern void killall(zoneid_t, boolean_t); #endif extern int uadmin(int, int, uintptr_t); diff --git a/usr/src/uts/common/sys/uio.h b/usr/src/uts/common/sys/uio.h index e803efeb45..5663929bfb 100644 --- a/usr/src/uts/common/sys/uio.h +++ b/usr/src/uts/common/sys/uio.h @@ -145,7 +145,8 @@ typedef struct uioa_s { */ typedef enum xuio_type { UIOTYPE_ASYNCIO, - UIOTYPE_ZEROCOPY + UIOTYPE_ZEROCOPY, + UIOTYPE_PEEKSIZE } xuio_type_t; typedef struct xuio { @@ -175,6 +176,15 @@ typedef struct xuio { int xu_zc_rw; /* read or write buffer */ void *xu_zc_priv; /* fs specific */ } xu_zc; + + /* + * Peek Size Support -- facilitate peeking at the size of a + * waiting message on a socket. + */ + struct { + ssize_t xu_ps_size; /* size of waiting msg */ + boolean_t xu_ps_set; /* was size calculated? */ + } xu_ps; } xu_ext; } xuio_t; diff --git a/usr/src/uts/common/sys/vm_usage.h b/usr/src/uts/common/sys/vm_usage.h index 1aa4a8ee6d..c2954cbc29 100644 --- a/usr/src/uts/common/sys/vm_usage.h +++ b/usr/src/uts/common/sys/vm_usage.h @@ -21,6 +21,7 @@ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2014 Joyent, Inc. All rights reserved. */ #ifndef _SYS_VM_USAGE_H @@ -79,8 +80,9 @@ extern "C" { /* zoneid */ #define VMUSAGE_COL_EUSERS 0x2000 /* same as VMUSAGE_COL_RUSERS, but by */ /* euser */ +#define VMUSAGE_A_ZONE 0x4000 /* rss/swap for a specified zone */ -#define VMUSAGE_MASK 0x3fff /* all valid flags for getvmusage() */ +#define VMUSAGE_MASK 0x7fff /* all valid flags for getvmusage() */ typedef struct vmusage { id_t vmu_zoneid; /* zoneid, or ALL_ZONES for */ @@ -108,6 +110,7 @@ extern int getvmusage(uint_t flags, time_t age, vmusage_t *buf, size_t *nres); int vm_getusage(uint_t, time_t, vmusage_t *, size_t *, int); void vm_usage_init(); +int vm_map_inval(pid_t, caddr_t, size_t); #endif /* _KERNEL */ diff --git a/usr/src/uts/common/sys/vmsystm.h b/usr/src/uts/common/sys/vmsystm.h index 6122b6cd2f..c7b41730b6 100644 --- a/usr/src/uts/common/sys/vmsystm.h +++ b/usr/src/uts/common/sys/vmsystm.h @@ -19,6 +19,9 @@ * CDDL HEADER END */ /* + * Copyright (c) 2014, Joyent, Inc. All rights reserved. + */ +/* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -159,6 +162,8 @@ extern void *boot_virt_alloc(void *addr, size_t size); extern size_t exec_get_spslew(void); +extern caddr_t map_userlimit(proc_t *pp, struct as *as, int flags); + #endif /* _KERNEL */ #ifdef __cplusplus diff --git a/usr/src/uts/common/sys/vnd.h b/usr/src/uts/common/sys/vnd.h new file mode 100644 index 0000000000..bc7c9c3122 --- /dev/null +++ b/usr/src/uts/common/sys/vnd.h @@ -0,0 +1,141 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014 Joyent, Inc. All rights reserved. + */ + +#ifndef _SYS_VND_H +#define _SYS_VND_H + +#include <sys/types.h> +#include <sys/vnd_errno.h> +#include <sys/frameio.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * We distinguish between normal ioctls and private ioctls we issues to out + * streams version. Streams ioctls have the upper bit set in the lowest byte. + * Note that there are no STREAMs ioctls for userland and all definitions + * related to them are not present in this file. + */ +#define VND_IOC (('v' << 24) | ('n' << 16) | ('d' << 8)) + +/* + * Attach the current minor instance to a given dlpi datalink identified by a + * vnd_ioc_name_t argument. This fails if it's already been attached. Note that + * unlike the other ioctls, this is passed directly as opposed to every other + * function which is passed as a pointer to the value. + */ +#define VND_IOC_ATTACH (VND_IOC | 0x1) + +#define VND_NAMELEN 32 + +typedef struct vnd_ioc_attach { + char via_name[VND_NAMELEN]; + zoneid_t via_zoneid; + uint32_t via_errno; +} vnd_ioc_attach_t; + +/* + * Link the current minor instance into the /devices name space. + * + * This ioctl adds entries into /devices with a name of the form z%d:%s vil_zid, + * vil_name. The device will be namespaced to the zone. The global zone will be + * able to see all minor nodes. In the zone, only the /dev entries will exist. + * At this time, a given device can only have one link at a time. Note that a + * user cannot specify the zone to pass in, rather it is the zone that the + * device was attached in. + */ +#define VND_IOC_LINK (VND_IOC | 0x2) + +typedef struct vnd_ioc_link { + char vil_name[VND_NAMELEN]; + uint32_t vil_errno; +} vnd_ioc_link_t; + +/* + * Unlink the opened minor instance from the /devices name space. A zone may use + * this to unlink an extent entry in /dev; however, they will not be able to + * link it in again. + */ +#define VND_IOC_UNLINK (VND_IOC | 0x3) +typedef struct vnd_ioc_unlink { + uint32_t viu_errno; +} vnd_ioc_unlink_t; + +/* + * Controls to get and set the current buffer recieve buffer size. + */ +typedef struct vnd_ioc_buf { + uint64_t vib_size; + uint32_t vib_filler; + uint32_t vib_errno; +} vnd_ioc_buf_t; + +#define VND_IOC_GETRXBUF (VND_IOC | 0x04) +#define VND_IOC_SETRXBUF (VND_IOC | 0x05) +#define VND_IOC_GETMAXBUF (VND_IOC | 0x06) +#define VND_IOC_GETTXBUF (VND_IOC | 0x07) +#define VND_IOC_SETTXBUF (VND_IOC | 0x08) +#define VND_IOC_GETMINTU (VND_IOC | 0x09) +#define VND_IOC_GETMAXTU (VND_IOC | 0x0a) + +/* + * Information and listing ioctls + * + * This gets information about all of the active vnd instances. vl_actents is + * always updated to the number around and vl_nents is the number of + * vnd_ioc_info_t elements are allocated in vl_ents. + */ +typedef struct vnd_ioc_info { + uint32_t vii_version; + zoneid_t vii_zone; + char vii_name[VND_NAMELEN]; + char vii_datalink[VND_NAMELEN]; +} vnd_ioc_info_t; + +typedef struct vnd_ioc_list { + uint_t vl_nents; + uint_t vl_actents; + vnd_ioc_info_t *vl_ents; +} vnd_ioc_list_t; + +#ifdef _KERNEL + +typedef struct vnd_ioc_list32 { + uint_t vl_nents; + uint_t vl_actents; + caddr32_t vl_ents; +} vnd_ioc_list32_t; + +#endif /* _KERNEL */ + +#define VND_IOC_LIST (VND_IOC | 0x20) + +/* + * Framed I/O ioctls + * + * Users should use the standard frameio_t as opposed to a vnd specific type. + * This is a consolidation private ioctl pending futher stability in the form of + * specific system work. + */ +#define VND_IOC_FRAMEIO_READ (VND_IOC | 0x30) +#define VND_IOC_FRAMEIO_WRITE (VND_IOC | 0x31) + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_VND_H */ diff --git a/usr/src/uts/common/sys/vnd_errno.h b/usr/src/uts/common/sys/vnd_errno.h new file mode 100644 index 0000000000..89e5fc2543 --- /dev/null +++ b/usr/src/uts/common/sys/vnd_errno.h @@ -0,0 +1,72 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright (c) 2014 Joyent, Inc. All rights reserved. + */ + +#ifndef _SYS_VND_ERRNO_H +#define _SYS_VND_ERRNO_H + +/* + * This header contains all of the available vnd errors. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum vnd_errno { + VND_E_SUCCESS = 0, /* no error */ + VND_E_NOMEM, /* no memory */ + VND_E_NODATALINK, /* no such datalink */ + VND_E_NOTETHER, /* not DL_ETHER */ + VND_E_DLPIINVAL, /* Unknown DLPI failures */ + VND_E_ATTACHFAIL, /* DL_ATTACH_REQ failed */ + VND_E_BINDFAIL, /* DL_BIND_REQ failed */ + VND_E_PROMISCFAIL, /* DL_PROMISCON_REQ failed */ + VND_E_DIRECTFAIL, /* DLD_CAPAB_DIRECT enable failed */ + VND_E_CAPACKINVAL, /* bad dl_capability_ack_t */ + VND_E_SUBCAPINVAL, /* bad dl_capability_sub_t */ + VND_E_DLDBADVERS, /* bad dld version */ + VND_E_KSTATCREATE, /* failed to create kstats */ + VND_E_NODEV, /* no such vnd link */ + VND_E_NONETSTACK, /* netstack doesn't exist */ + VND_E_ASSOCIATED, /* device already associated */ + VND_E_ATTACHED, /* device already attached */ + VND_E_LINKED, /* device already linked */ + VND_E_BADNAME, /* invalid name */ + VND_E_PERM, /* can't touch this */ + VND_E_NOZONE, /* no such zone */ + VND_E_STRINIT, /* failed to initialize vnd stream module */ + VND_E_NOTATTACHED, /* device not attached */ + VND_E_NOTLINKED, /* device not linked */ + VND_E_LINKEXISTS, /* another device has the same link name */ + VND_E_MINORNODE, /* failed to create minor node */ + VND_E_BUFTOOBIG, /* requested buffer size is too large */ + VND_E_BUFTOOSMALL, /* requested buffer size is too small */ + VND_E_DLEXCL, /* unable to get dlpi excl access */ + VND_E_DIRECTNOTSUP, + /* DLD direct capability not suported over data link */ + VND_E_BADPROPSIZE, /* invalid property size */ + VND_E_BADPROP, /* invalid property */ + VND_E_PROPRDONLY, /* property is read only */ + VND_E_SYS, /* unexpected system error */ + VND_E_CAPABPASS, + /* capabilities invalid, pass-through module detected */ + VND_E_UNKNOWN /* unknown error */ +} vnd_errno_t; + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_VND_ERRNO_H */ diff --git a/usr/src/uts/common/sys/vnic_impl.h b/usr/src/uts/common/sys/vnic_impl.h index 7e50091347..1a91158da6 100644 --- a/usr/src/uts/common/sys/vnic_impl.h +++ b/usr/src/uts/common/sys/vnic_impl.h @@ -21,7 +21,7 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright 2014 Joyent, Inc. All rights reserved. + * Copyright 2015 Joyent, Inc. */ #ifndef _SYS_VNIC_IMPL_H @@ -65,6 +65,7 @@ typedef struct vnic_s { uint32_t vn_hcksum_txflags; uint32_t vn_mtu; + link_state_t vn_ls; } vnic_t; #define vn_mch vn_mc_handles[0] diff --git a/usr/src/uts/common/sys/vnode.h b/usr/src/uts/common/sys/vnode.h index af9516fe52..9bcff72268 100644 --- a/usr/src/uts/common/sys/vnode.h +++ b/usr/src/uts/common/sys/vnode.h @@ -21,7 +21,7 @@ /* * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013, Joyent, Inc. All rights reserved. + * Copyright (c) 2015, Joyent, Inc. */ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ @@ -724,7 +724,12 @@ typedef enum symfollow symfollow_t; typedef enum vcexcl vcexcl_t; typedef enum create create_t; -/* Vnode Events - Used by VOP_VNEVENT */ +/* + * Vnode Events - Used by VOP_VNEVENT + * The VE_PRE_RENAME_* events fire before the rename operation and are + * primarily used for specialized applications, such as NFSv4 delegation, which + * need to know about rename before it occurs. + */ typedef enum vnevent { VE_SUPPORT = 0, /* Query */ VE_RENAME_SRC = 1, /* Rename, with vnode as source */ @@ -733,9 +738,13 @@ typedef enum vnevent { VE_RMDIR = 4, /* Remove of directory vnode's name */ VE_CREATE = 5, /* Create with vnode's name which exists */ VE_LINK = 6, /* Link with vnode's name as source */ - VE_RENAME_DEST_DIR = 7, /* Rename with vnode as target dir */ + VE_RENAME_DEST_DIR = 7, /* Rename with vnode as target dir */ VE_MOUNTEDOVER = 8, /* File or Filesystem got mounted over vnode */ - VE_TRUNCATE = 9 /* Truncate */ + VE_TRUNCATE = 9, /* Truncate */ + VE_PRE_RENAME_SRC = 10, /* Pre-rename, with vnode as source */ + VE_PRE_RENAME_DEST = 11, /* Pre-rename, with vnode as target/dest. */ + VE_PRE_RENAME_DEST_DIR = 12, /* Pre-rename with vnode as target dir */ + VE_RENAME_SRC_DIR = 13 /* Rename with vnode as source dir */ } vnevent_t; /* @@ -1290,10 +1299,17 @@ void vnevent_remove(vnode_t *, vnode_t *, char *, caller_context_t *); void vnevent_rmdir(vnode_t *, vnode_t *, char *, caller_context_t *); void vnevent_create(vnode_t *, caller_context_t *); void vnevent_link(vnode_t *, caller_context_t *); -void vnevent_rename_dest_dir(vnode_t *, caller_context_t *ct); +void vnevent_rename_dest_dir(vnode_t *, vnode_t *, char *, + caller_context_t *ct); void vnevent_mountedover(vnode_t *, caller_context_t *); void vnevent_truncate(vnode_t *, caller_context_t *); int vnevent_support(vnode_t *, caller_context_t *); +void vnevent_pre_rename_src(vnode_t *, vnode_t *, char *, + caller_context_t *); +void vnevent_pre_rename_dest(vnode_t *, vnode_t *, char *, + caller_context_t *); +void vnevent_pre_rename_dest_dir(vnode_t *, vnode_t *, char *, + caller_context_t *ct); /* Vnode specific data */ void vsd_create(uint_t *, void (*)(void *)); @@ -1323,6 +1339,9 @@ u_longlong_t fs_new_caller_id(); int vn_vmpss_usepageio(vnode_t *); +/* Empty v_path placeholder */ +extern char *vn_vpath_empty; + /* * Needed for use of IS_VMODSORT() in kernel. */ diff --git a/usr/src/uts/common/sys/vxlan.h b/usr/src/uts/common/sys/vxlan.h new file mode 100644 index 0000000000..d87786b507 --- /dev/null +++ b/usr/src/uts/common/sys/vxlan.h @@ -0,0 +1,47 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015 Joyent, Inc. + */ + +#ifndef _SYS_VXLAN_H +#define _SYS_VXLAN_H + +/* + * Common VXLAN information + */ + +#include <sys/inttypes.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* Sizes in bytes */ +#define VXLAN_HDR_LEN 8 +#define VXLAN_ID_LEN 3 + +#define VXLAN_F_VDI 0x08000000 +#define VXLAN_ID_SHIFT 8 + +#pragma pack(1) +typedef struct vxlan_hdr { + uint32_t vxlan_flags; + uint32_t vxlan_id; +} vxlan_hdr_t; +#pragma pack() + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_VXLAN_H */ diff --git a/usr/src/uts/common/sys/zfd.h b/usr/src/uts/common/sys/zfd.h new file mode 100644 index 0000000000..e08d75ecba --- /dev/null +++ b/usr/src/uts/common/sys/zfd.h @@ -0,0 +1,78 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ +/* + * Copyright 2015 Joyent, Inc. + */ + +#ifndef _SYS_ZFD_H +#define _SYS_ZFD_H + +#include <sys/types.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Minor node name of the global zone side (often called the "master" side) + * of the zfd dev. + */ +#define ZFD_MASTER_NAME "master" + +/* + * Minor node name of the non-global zone side (often called the "slave" + * side) of the zfd dev. + */ +#define ZFD_SLAVE_NAME "slave" + +#define ZFD_NAME_LEN 16 + +/* + * ZFD_IOC forms the base for all zfd ioctls. + */ +#define ZFD_IOC (('Z' << 24) | ('f' << 16) | ('d' << 8)) + +/* + * This ioctl tells the slave side it should push the TTY stream modules + * so that the fd looks like a tty. + */ +#define ZFD_MAKETTY (ZFD_IOC | 0) + +/* + * This ioctl puts a hangup into the stream so that the slave side sees EOF. + */ +#define ZFD_EOF (ZFD_IOC | 1) + +/* + * This ioctl succeeds if the slave side is open. + */ +#define ZFD_HAS_SLAVE (ZFD_IOC | 2) + +/* + * This ioctl links two streams into a multiplexer configuration for in-zone + * logging. + */ +#define ZFD_MUX (ZFD_IOC | 3) + +/* + * This ioctl controls the flow control setting for the log multiplexer stream + * (1 = true, 0 = false). The default is false which implies teeing into the + * log stream is "best-effort" but data will be discarded if the stream + * becomes full. If set and the log stream begins to fill up, the primary + * stream will stop flowing. + */ +#define ZFD_MUX_FLOWCON (ZFD_IOC | 4) + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_ZFD_H */ diff --git a/usr/src/uts/common/sys/zone.h b/usr/src/uts/common/sys/zone.h index 7aab7c8d32..af8bd3718d 100644 --- a/usr/src/uts/common/sys/zone.h +++ b/usr/src/uts/common/sys/zone.h @@ -23,6 +23,7 @@ * Copyright 2013, Joyent, Inc. All rights reserved. * Copyright 2014 Nexenta Systems, Inc. All rights reserved. * Copyright 2014 Igor Kozhukhov <ikozhukhov@gmail.com>. + * Copyright 2015, Joyent, Inc. All rights reserved. */ #ifndef _SYS_ZONE_H @@ -97,13 +98,19 @@ extern "C" { #define ZONE_ATTR_INITNAME 9 #define ZONE_ATTR_BOOTARGS 10 #define ZONE_ATTR_BRAND 11 -#define ZONE_ATTR_PHYS_MCAP 12 +#define ZONE_ATTR_PMCAP_NOVER 12 #define ZONE_ATTR_SCHED_CLASS 13 #define ZONE_ATTR_FLAGS 14 #define ZONE_ATTR_HOSTID 15 #define ZONE_ATTR_FS_ALLOWED 16 #define ZONE_ATTR_NETWORK 17 +#define ZONE_ATTR_DID 18 +#define ZONE_ATTR_PMCAP_PAGEOUT 19 #define ZONE_ATTR_INITNORESTART 20 +#define ZONE_ATTR_PG_FLT_DELAY 21 +#define ZONE_ATTR_RSS 22 +#define ZONE_ATTR_APP_SVC_CT 23 +#define ZONE_ATTR_SCHED_FIXEDHI 24 /* Start of the brand-specific attribute namespace */ #define ZONE_ATTR_BRAND_ATTRS 32768 @@ -184,6 +191,7 @@ typedef struct { uint32_t doi; /* DOI for label */ caddr32_t label; /* label associated with zone */ int flags; + zoneid_t zoneid; /* requested zoneid */ } zone_def32; #endif typedef struct { @@ -200,6 +208,7 @@ typedef struct { uint32_t doi; /* DOI for label */ const bslabel_t *label; /* label associated with zone */ int flags; + zoneid_t zoneid; /* requested zoneid */ } zone_def; /* extended error information */ @@ -244,9 +253,12 @@ typedef enum zone_cmd { typedef struct zone_cmd_arg { uint64_t uniqid; /* unique "generation number" */ zone_cmd_t cmd; /* requested action */ - uint32_t _pad; /* need consistent 32/64 bit alignmt */ + int status; /* init status on shutdown */ + uint32_t debug; /* enable brand hook debug */ char locale[MAXPATHLEN]; /* locale in which to render messages */ char bootbuf[BOOTARGS_MAX]; /* arguments passed to zone_boot() */ + /* Needed for 32/64 zoneadm -> zoneadmd door arg size check. */ + int pad; } zone_cmd_arg_t; /* @@ -372,7 +384,7 @@ typedef struct zone_dataset { } zone_dataset_t; /* - * structure for zone kstats + * structure for rctl zone kstats */ typedef struct zone_kstat { kstat_named_t zk_zonename; @@ -383,6 +395,58 @@ typedef struct zone_kstat { struct cpucap; typedef struct { + hrtime_t cycle_start; + uint_t cycle_cnt; + hrtime_t zone_avg_cnt; +} sys_zio_cntr_t; + +typedef struct { + kstat_named_t zv_zonename; + kstat_named_t zv_nread; + kstat_named_t zv_reads; + kstat_named_t zv_rtime; + kstat_named_t zv_rlentime; + kstat_named_t zv_nwritten; + kstat_named_t zv_writes; + kstat_named_t zv_wtime; + kstat_named_t zv_wlentime; + kstat_named_t zv_10ms_ops; + kstat_named_t zv_100ms_ops; + kstat_named_t zv_1s_ops; + kstat_named_t zv_10s_ops; + kstat_named_t zv_delay_cnt; + kstat_named_t zv_delay_time; +} zone_vfs_kstat_t; + +typedef struct { + kstat_named_t zz_zonename; + kstat_named_t zz_nread; + kstat_named_t zz_reads; + kstat_named_t zz_rtime; + kstat_named_t zz_rlentime; + kstat_named_t zz_nwritten; + kstat_named_t zz_writes; + kstat_named_t zz_waittime; +} zone_zfs_kstat_t; + +typedef struct { + kstat_named_t zm_zonename; + kstat_named_t zm_rss; + kstat_named_t zm_phys_cap; + kstat_named_t zm_swap; + kstat_named_t zm_swap_cap; + kstat_named_t zm_nover; + kstat_named_t zm_pagedout; + kstat_named_t zm_pgpgin; + kstat_named_t zm_anonpgin; + kstat_named_t zm_execpgin; + kstat_named_t zm_fspgin; + kstat_named_t zm_anon_alloc_fail; + kstat_named_t zm_pf_throttle; + kstat_named_t zm_pf_throttle_usec; +} zone_mcap_kstat_t; + +typedef struct { kstat_named_t zm_zonename; /* full name, kstat truncates name */ kstat_named_t zm_utime; kstat_named_t zm_stime; @@ -390,15 +454,13 @@ typedef struct { kstat_named_t zm_avenrun1; kstat_named_t zm_avenrun5; kstat_named_t zm_avenrun15; - kstat_named_t zm_run_ticks; - kstat_named_t zm_run_wait; - kstat_named_t zm_fss_shr_pct; - kstat_named_t zm_fss_pri_hi; - kstat_named_t zm_fss_pri_avg; kstat_named_t zm_ffcap; kstat_named_t zm_ffnoproc; kstat_named_t zm_ffnomem; kstat_named_t zm_ffmisc; + kstat_named_t zm_nested_intp; + kstat_named_t zm_init_pid; + kstat_named_t zm_boot_time; } zone_misc_kstat_t; typedef struct zone { @@ -440,6 +502,7 @@ typedef struct zone { */ list_node_t zone_linkage; zoneid_t zone_id; /* ID of zone */ + zoneid_t zone_did; /* persistent debug ID of zone */ uint_t zone_ref; /* count of zone_hold()s on zone */ uint_t zone_cred_ref; /* count of zone_hold_cred()s on zone */ /* @@ -492,10 +555,11 @@ typedef struct zone { kcondvar_t zone_cv; /* used to signal state changes */ struct proc *zone_zsched; /* Dummy kernel "zsched" process */ pid_t zone_proc_initpid; /* pid of "init" for this zone */ - char *zone_initname; /* fs path to 'init' */ + char *zone_initname; /* fs path to 'init' */ + int zone_init_status; /* init's exit status */ int zone_boot_err; /* for zone_boot() if boot fails */ char *zone_bootargs; /* arguments passed via zone_boot() */ - uint64_t zone_phys_mcap; /* physical memory cap */ + rctl_qty_t zone_phys_mem_ctl; /* current phys. memory limit */ /* * zone_kthreads is protected by zone_status_lock. */ @@ -533,9 +597,12 @@ typedef struct zone { tsol_mlp_list_t zone_mlps; /* MLPs on zone-private addresses */ boolean_t zone_restart_init; /* Restart init if it dies? */ + boolean_t zone_reboot_on_init_exit; /* Reboot if init dies? */ + boolean_t zone_setup_app_contract; /* setup contract? */ struct brand *zone_brand; /* zone's brand */ void *zone_brand_data; /* store brand specific data */ id_t zone_defaultcid; /* dflt scheduling class id */ + boolean_t zone_fixed_hipri; /* fixed sched. hi prio */ kstat_t *zone_swapresv_kstat; kstat_t *zone_lockedmem_kstat; /* @@ -544,6 +611,37 @@ typedef struct zone { list_t zone_dl_list; netstack_t *zone_netstack; struct cpucap *zone_cpucap; /* CPU caps data */ + + /* + * Data and counters used for ZFS fair-share disk IO. + */ + rctl_qty_t zone_zfs_io_pri; /* ZFS IO priority */ + uint_t zone_zfs_queued[2]; /* sync I/O enqueued count */ + uint64_t zone_zfs_weight; /* used to prevent starvation */ + uint64_t zone_io_util; /* IO utilization metric */ + boolean_t zone_io_util_above_avg; /* IO util percent > avg. */ + uint16_t zone_io_delay; /* IO delay on logical r/w */ + kmutex_t zone_stg_io_lock; /* protects IO window data */ + sys_zio_cntr_t zone_rd_ops; /* Counters for ZFS reads, */ + sys_zio_cntr_t zone_wr_ops; /* writes and */ + sys_zio_cntr_t zone_lwr_ops; /* logical writes. */ + + /* + * kstats and counters for VFS ops and bytes. + */ + kmutex_t zone_vfs_lock; /* protects VFS statistics */ + kstat_t *zone_vfs_ksp; + kstat_io_t zone_vfs_rwstats; + zone_vfs_kstat_t *zone_vfs_stats; + + /* + * kstats for ZFS I/O ops and bytes. + */ + kmutex_t zone_zfs_lock; /* protects ZFS statistics */ + kstat_t *zone_zfs_ksp; + kstat_io_t zone_zfs_rwstats; + zone_zfs_kstat_t *zone_zfs_stats; + /* * Solaris Auditing per-zone audit context */ @@ -563,6 +661,27 @@ typedef struct zone { kstat_t *zone_nprocs_kstat; /* + * kstats and counters for physical memory capping. + */ + rctl_qty_t zone_phys_mem; /* current bytes of phys. mem. (RSS) */ + kstat_t *zone_physmem_kstat; + uint64_t zone_mcap_nover; /* # of times over phys. cap */ + uint64_t zone_mcap_pagedout; /* bytes of mem. paged out */ + kmutex_t zone_mcap_lock; /* protects mcap statistics */ + kstat_t *zone_mcap_ksp; + zone_mcap_kstat_t *zone_mcap_stats; + uint64_t zone_pgpgin; /* pages paged in */ + uint64_t zone_anonpgin; /* anon pages paged in */ + uint64_t zone_execpgin; /* exec pages paged in */ + uint64_t zone_fspgin; /* fs pages paged in */ + uint64_t zone_anon_alloc_fail; /* cnt of anon alloc fails */ + uint64_t zone_pf_throttle; /* cnt of page flt throttles */ + uint64_t zone_pf_throttle_usec; /* time of page flt throttles */ + + /* Num usecs to throttle page fault when zone is over phys. mem cap */ + uint32_t zone_pg_flt_delay; + + /* * Misc. kstats and counters for zone cpu-usage aggregation. * The zone_Xtime values are the sum of the micro-state accounting * values for all threads that are running or have run in the zone. @@ -586,6 +705,8 @@ typedef struct zone { uint32_t zone_ffnomem; /* as_dup/memory error */ uint32_t zone_ffmisc; /* misc. other error */ + uint32_t zone_nested_intp; /* nested interp. kstat */ + struct loadavg_s zone_loadavg; /* loadavg for this zone */ uint64_t zone_hp_avenrun[3]; /* high-precision avenrun */ int zone_avenrun[3]; /* FSCALED avg. run queue len */ @@ -640,6 +761,7 @@ extern zone_t *zone_find_by_name(char *); extern zone_t *zone_find_by_any_path(const char *, boolean_t); extern zone_t *zone_find_by_path(const char *); extern zoneid_t getzoneid(void); +extern zoneid_t getzonedid(void); extern zone_t *zone_find_by_id_nolock(zoneid_t); extern int zone_datalink_walk(zoneid_t, int (*)(datalink_id_t, void *), void *); extern int zone_check_datalink(zoneid_t *, datalink_id_t); @@ -834,6 +956,7 @@ extern int zone_walk(int (*)(zone_t *, void *), void *); extern rctl_hndl_t rc_zone_locked_mem; extern rctl_hndl_t rc_zone_max_swap; +extern rctl_hndl_t rc_zone_phys_mem; extern rctl_hndl_t rc_zone_max_lofi; #endif /* _KERNEL */ |