diff options
Diffstat (limited to 'usr/src/uts/common/sys')
33 files changed, 1115 insertions, 485 deletions
diff --git a/usr/src/uts/common/sys/Makefile b/usr/src/uts/common/sys/Makefile index 271682bc67..50846d0cb3 100644 --- a/usr/src/uts/common/sys/Makefile +++ b/usr/src/uts/common/sys/Makefile @@ -20,6 +20,7 @@ # # # Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. +# Copyright 2012, Joyent, Inc. All rights reserved. # include $(SRC)/uts/Makefile.uts @@ -277,6 +278,7 @@ CHKHDRS= \ ipc.h \ ipc_impl.h \ ipc_rctl.h \ + ipd.h \ ipmi.h \ isa_defs.h \ iscsi_authclient.h \ @@ -856,6 +858,8 @@ FSHDRS= \ hsfs_rrip.h \ hsfs_spec.h \ hsfs_susp.h \ + hyprlofs.h \ + hyprlofs_info.h \ lofs_info.h \ lofs_node.h \ mntdata.h \ diff --git a/usr/src/uts/common/sys/aggr_impl.h b/usr/src/uts/common/sys/aggr_impl.h index 8363d231cf..e0cfd6f778 100644 --- a/usr/src/uts/common/sys/aggr_impl.h +++ b/usr/src/uts/common/sys/aggr_impl.h @@ -21,6 +21,7 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2012 OmniTI Computer Consulting, Inc All rights reserved. */ #ifndef _SYS_AGGR_IMPL_H @@ -307,6 +308,8 @@ extern boolean_t aggr_port_notify_link(aggr_grp_t *, aggr_port_t *); extern void aggr_port_init_callbacks(aggr_port_t *); extern void aggr_recv_cb(void *, mac_resource_handle_t, mblk_t *, boolean_t); +extern void aggr_recv_promisc_cb(void *, mac_resource_handle_t, mblk_t *, + boolean_t); extern void aggr_tx_ring_update(void *, uintptr_t); extern void aggr_tx_notify_thread(void *); diff --git a/usr/src/uts/common/sys/blkdev.h b/usr/src/uts/common/sys/blkdev.h index 2307610bae..4ec50fbf3b 100644 --- a/usr/src/uts/common/sys/blkdev.h +++ b/usr/src/uts/common/sys/blkdev.h @@ -19,6 +19,7 @@ * CDDL HEADER END */ /* + * Copyright 2012 DEY Storage Systems, Inc. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. */ @@ -116,6 +117,7 @@ struct bd_media { uint64_t m_nblks; uint32_t m_blksize; boolean_t m_readonly; + boolean_t m_solidstate; }; #define BD_INFO_FLAG_REMOVABLE (1U << 0) diff --git a/usr/src/uts/common/sys/buf.h b/usr/src/uts/common/sys/buf.h index a9191aed7c..cb8a6012fc 100644 --- a/usr/src/uts/common/sys/buf.h +++ b/usr/src/uts/common/sys/buf.h @@ -21,6 +21,7 @@ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2012 Joyent, Inc. All rights reserved. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -186,6 +187,7 @@ struct biostats { #define B_STARTED 0x2000000 /* io:::start probe called for buf */ #define B_ABRWRITE 0x4000000 /* Application based recovery active */ #define B_PAGE_NOWAIT 0x8000000 /* Skip the page if it is locked */ +#define B_INVALCURONLY 0x10000000 /* invalidate only for curproc */ /* * There is some confusion over the meaning of B_FREE and B_INVAL and what @@ -198,6 +200,12 @@ struct biostats { * between the sole use of these two flags. In both cases, IO will be done * if the page is not yet committed to storage. * + * The B_INVALCURONLY flag modifies the behavior of the B_INVAL flag and is + * intended to be used in conjunction with B_INVAL. B_INVALCURONLY has no + * meaning on its own. When both B_INVALCURONLY and B_INVAL are set, then + * the mapping for the page is only invalidated for the current process. + * In this case, the page is not destroyed unless this was the final mapping. + * * In order to discard pages without writing them back, (B_INVAL | B_TRUNC) * should be used. * diff --git a/usr/src/uts/common/sys/cpucaps.h b/usr/src/uts/common/sys/cpucaps.h index 6063ff4380..6bc042108c 100644 --- a/usr/src/uts/common/sys/cpucaps.h +++ b/usr/src/uts/common/sys/cpucaps.h @@ -22,6 +22,7 @@ /* * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2011, 2012, Joyent, Inc. All rights reserved. */ #ifndef _SYS_CPUCAPS_H @@ -84,12 +85,16 @@ extern void cpucaps_zone_remove(zone_t *); */ extern int cpucaps_project_set(kproject_t *, rctl_qty_t); extern int cpucaps_zone_set(zone_t *, rctl_qty_t); +extern int cpucaps_zone_set_base(zone_t *, rctl_qty_t); +extern int cpucaps_zone_set_burst_time(zone_t *, rctl_qty_t); /* * Get current CPU usage for a project/zone. */ extern rctl_qty_t cpucaps_project_get(kproject_t *); extern rctl_qty_t cpucaps_zone_get(zone_t *); +extern rctl_qty_t cpucaps_zone_get_base(zone_t *); +extern rctl_qty_t cpucaps_zone_get_burst_time(zone_t *); /* * Scheduling class hooks into CPU caps framework. diff --git a/usr/src/uts/common/sys/cpucaps_impl.h b/usr/src/uts/common/sys/cpucaps_impl.h index 95afd21827..2cd4ed644d 100644 --- a/usr/src/uts/common/sys/cpucaps_impl.h +++ b/usr/src/uts/common/sys/cpucaps_impl.h @@ -22,6 +22,7 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2011, 2012, Joyent, Inc. All rights reserved. */ #ifndef _SYS_CPUCAPS_IMPL_H @@ -66,8 +67,12 @@ typedef struct cpucap { waitq_t cap_waitq; /* waitq for capped threads */ kstat_t *cap_kstat; /* cpucaps specific kstat */ int64_t cap_gen; /* zone cap specific */ + hrtime_t cap_chk_value; /* effective CPU usage cap */ hrtime_t cap_value; /* scaled CPU usage cap */ hrtime_t cap_usage; /* current CPU usage */ + hrtime_t cap_base; /* base CPU for burst */ + u_longlong_t cap_burst_limit; /* max secs (in tics) for a burst */ + u_longlong_t cap_bursting; /* # of ticks currently bursting */ disp_lock_t cap_usagelock; /* protects cap_usage above */ /* * Per cap statistics. @@ -75,6 +80,7 @@ typedef struct cpucap { hrtime_t cap_maxusage; /* maximum cap usage */ u_longlong_t cap_below; /* # of ticks spend below the cap */ u_longlong_t cap_above; /* # of ticks spend above the cap */ + u_longlong_t cap_above_base; /* # of ticks spent above the base */ } cpucap_t; /* diff --git a/usr/src/uts/common/sys/cred.h b/usr/src/uts/common/sys/cred.h index 5056f9a511..914f132dc0 100644 --- a/usr/src/uts/common/sys/cred.h +++ b/usr/src/uts/common/sys/cred.h @@ -93,6 +93,7 @@ extern gid_t crgetgid(const cred_t *); extern gid_t crgetrgid(const cred_t *); extern gid_t crgetsgid(const cred_t *); extern zoneid_t crgetzoneid(const cred_t *); +extern zoneid_t crgetzonedid(const cred_t *); extern projid_t crgetprojid(const cred_t *); extern cred_t *crgetmapped(const cred_t *); diff --git a/usr/src/uts/common/sys/dkio.h b/usr/src/uts/common/sys/dkio.h index eb4ddf34fe..a5b0c312f9 100644 --- a/usr/src/uts/common/sys/dkio.h +++ b/usr/src/uts/common/sys/dkio.h @@ -23,6 +23,7 @@ * Copyright (c) 1982, 2010, Oracle and/or its affiliates. All rights reserved. * * Copyright 2011 Nexenta Systems, Inc. All rights reserved. + * Copyright 2012 DEY Storage Systems, Inc. All rights reserved. */ #ifndef _SYS_DKIO_H @@ -237,6 +238,9 @@ struct dk_callback { #define DKIOCSETEXTPART (DKIOC|46) #endif +/* ioctl to report whether the disk is solid state or not - used for ZFS */ +#define DKIOCSOLIDSTATE (DKIOC|38) + /* * Ioctl to force driver to re-read the alternate partition and rebuild * the internal defect map. diff --git a/usr/src/uts/common/sys/dktp/dadk.h b/usr/src/uts/common/sys/dktp/dadk.h index f5c990e7c0..2178ad1f0d 100644 --- a/usr/src/uts/common/sys/dktp/dadk.h +++ b/usr/src/uts/common/sys/dktp/dadk.h @@ -65,6 +65,8 @@ struct dadk { kstat_t *dad_errstats; /* error stats */ kmutex_t dad_cmd_mutex; int dad_cmd_count; + uint32_t dad_err_cnt; /* number of recent errors */ + hrtime_t dad_last_log; /* time of last error log */ }; #define DAD_SECSIZ dad_phyg.g_secsiz diff --git a/usr/src/uts/common/sys/dld.h b/usr/src/uts/common/sys/dld.h index fb2a0749d3..303a9c7e45 100644 --- a/usr/src/uts/common/sys/dld.h +++ b/usr/src/uts/common/sys/dld.h @@ -21,6 +21,7 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2011 Joyent, Inc. All rights reserved. */ #ifndef _SYS_DLD_H @@ -191,6 +192,7 @@ typedef struct dld_ioc_rename { datalink_id_t dir_linkid1; datalink_id_t dir_linkid2; char dir_link[MAXLINKNAMELEN]; + boolean_t dir_zoneinit; } dld_ioc_rename_t; /* @@ -203,6 +205,7 @@ typedef struct dld_ioc_rename { typedef struct dld_ioc_zid { zoneid_t diz_zid; datalink_id_t diz_linkid; + boolean_t diz_transient; } dld_ioc_zid_t; /* diff --git a/usr/src/uts/common/sys/dls.h b/usr/src/uts/common/sys/dls.h index 6bd2bbe35a..adcfe76c08 100644 --- a/usr/src/uts/common/sys/dls.h +++ b/usr/src/uts/common/sys/dls.h @@ -21,6 +21,7 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2011 Joyent, Inc. All rights reserved. */ #ifndef _SYS_DLS_H @@ -110,7 +111,7 @@ extern void dls_devnet_close(dls_dl_handle_t); extern boolean_t dls_devnet_rebuild(); extern int dls_devnet_rename(datalink_id_t, datalink_id_t, - const char *); + const char *, boolean_t); extern int dls_devnet_create(mac_handle_t, datalink_id_t, zoneid_t); extern int dls_devnet_destroy(mac_handle_t, datalink_id_t *, @@ -127,7 +128,7 @@ extern uint16_t dls_devnet_vid(dls_dl_handle_t); extern datalink_id_t dls_devnet_linkid(dls_dl_handle_t); extern int dls_devnet_dev2linkid(dev_t, datalink_id_t *); extern int dls_devnet_phydev(datalink_id_t, dev_t *); -extern int dls_devnet_setzid(dls_dl_handle_t, zoneid_t); +extern int dls_devnet_setzid(dls_dl_handle_t, zoneid_t, boolean_t); extern zoneid_t dls_devnet_getzid(dls_dl_handle_t); extern zoneid_t dls_devnet_getownerzid(dls_dl_handle_t); extern boolean_t dls_devnet_islinkvisible(datalink_id_t, zoneid_t); diff --git a/usr/src/uts/common/sys/dls_impl.h b/usr/src/uts/common/sys/dls_impl.h index 60f51c47b5..8f7af6856c 100644 --- a/usr/src/uts/common/sys/dls_impl.h +++ b/usr/src/uts/common/sys/dls_impl.h @@ -21,6 +21,7 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2011 Joyent, Inc. All rights reserved. */ #ifndef _SYS_DLS_IMPL_H @@ -96,7 +97,8 @@ extern void dls_create_str_kstats(dld_str_t *); extern int dls_stat_update(kstat_t *, dls_link_t *, int); extern int dls_stat_create(const char *, int, const char *, zoneid_t, int (*)(struct kstat *, int), void *, - kstat_t **); + kstat_t **, zoneid_t); +extern void dls_stat_delete(kstat_t *); extern int dls_devnet_open_by_dev(dev_t, dls_link_t **, dls_dl_handle_t *); diff --git a/usr/src/uts/common/sys/dls_mgmt.h b/usr/src/uts/common/sys/dls_mgmt.h index b4032c24d6..4f73d92118 100644 --- a/usr/src/uts/common/sys/dls_mgmt.h +++ b/usr/src/uts/common/sys/dls_mgmt.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2011, Joyent Inc. All rights reserved. */ #ifndef _DLS_MGMT_H @@ -165,6 +166,7 @@ typedef struct dlmgmt_door_getname { typedef struct dlmgmt_door_getlinkid { int ld_cmd; char ld_link[MAXLINKNAMELEN]; + zoneid_t ld_zoneid; } dlmgmt_door_getlinkid_t; typedef struct dlmgmt_door_getnext_s { diff --git a/usr/src/uts/common/sys/dtrace.h b/usr/src/uts/common/sys/dtrace.h index fd7612f88a..e6d9e0e675 100644 --- a/usr/src/uts/common/sys/dtrace.h +++ b/usr/src/uts/common/sys/dtrace.h @@ -25,7 +25,7 @@ */ /* - * Copyright (c) 2011, Joyent, Inc. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. */ @@ -36,16 +36,16 @@ extern "C" { #endif -/* - * DTrace Dynamic Tracing Software: Kernel Interfaces - * - * Note: The contents of this file are private to the implementation of the - * Solaris system and DTrace subsystem and are subject to change at any time - * without notice. Applications and drivers using these interfaces will fail - * to run on future releases. These interfaces should not be used for any - * purpose except those expressly outlined in dtrace(7D) and libdtrace(3LIB). - * Please refer to the "Solaris Dynamic Tracing Guide" for more information. - */ + /* + * DTrace Dynamic Tracing Software: Kernel Interfaces + * + * Note: The contents of this file are private to the implementation of the + * Solaris system and DTrace subsystem and are subject to change at any time + * without notice. Applications and drivers using these interfaces will fail + * to run on future releases. These interfaces should not be used for any + * purpose except those expressly outlined in dtrace(7D) and libdtrace(3LIB). + * Please refer to the "Solaris Dynamic Tracing Guide" for more information. + */ #ifndef _ASM @@ -57,9 +57,9 @@ extern "C" { #include <sys/cyclic.h> #include <sys/int_limits.h> -/* - * DTrace Universal Constants and Typedefs - */ + /* + * DTrace Universal Constants and Typedefs + */ #define DTRACE_CPUALL -1 /* all CPUs */ #define DTRACE_IDNONE 0 /* invalid probe identifier */ #define DTRACE_EPIDNONE 0 /* invalid enabled probe identifier */ @@ -75,35 +75,35 @@ extern "C" { #define DTRACE_FUNCNAMELEN 128 #define DTRACE_NAMELEN 64 #define DTRACE_FULLNAMELEN (DTRACE_PROVNAMELEN + DTRACE_MODNAMELEN + \ - DTRACE_FUNCNAMELEN + DTRACE_NAMELEN + 4) + DTRACE_FUNCNAMELEN + DTRACE_NAMELEN + 4) #define DTRACE_ARGTYPELEN 128 -typedef uint32_t dtrace_id_t; /* probe identifier */ -typedef uint32_t dtrace_epid_t; /* enabled probe identifier */ -typedef uint32_t dtrace_aggid_t; /* aggregation identifier */ -typedef int64_t dtrace_aggvarid_t; /* aggregation variable identifier */ -typedef uint16_t dtrace_actkind_t; /* action kind */ -typedef int64_t dtrace_optval_t; /* option value */ -typedef uint32_t dtrace_cacheid_t; /* predicate cache identifier */ - -typedef enum dtrace_probespec { - DTRACE_PROBESPEC_NONE = -1, - DTRACE_PROBESPEC_PROVIDER = 0, - DTRACE_PROBESPEC_MOD, - DTRACE_PROBESPEC_FUNC, - DTRACE_PROBESPEC_NAME -} dtrace_probespec_t; - -/* - * DTrace Intermediate Format (DIF) - * - * The following definitions describe the DTrace Intermediate Format (DIF), a - * a RISC-like instruction set and program encoding used to represent - * predicates and actions that can be bound to DTrace probes. The constants - * below defining the number of available registers are suggested minimums; the - * compiler should use DTRACEIOC_CONF to dynamically obtain the number of - * registers provided by the current DTrace implementation. - */ + typedef uint32_t dtrace_id_t; /* probe identifier */ + typedef uint32_t dtrace_epid_t; /* enabled probe identifier */ + typedef uint32_t dtrace_aggid_t; /* aggregation identifier */ + typedef int64_t dtrace_aggvarid_t; /* aggregation variable identifier */ + typedef uint16_t dtrace_actkind_t; /* action kind */ + typedef int64_t dtrace_optval_t; /* option value */ + typedef uint32_t dtrace_cacheid_t; /* predicate cache identifier */ + + typedef enum dtrace_probespec { + DTRACE_PROBESPEC_NONE = -1, + DTRACE_PROBESPEC_PROVIDER = 0, + DTRACE_PROBESPEC_MOD, + DTRACE_PROBESPEC_FUNC, + DTRACE_PROBESPEC_NAME + } dtrace_probespec_t; + + /* + * DTrace Intermediate Format (DIF) + * + * The following definitions describe the DTrace Intermediate Format (DIF), a + * a RISC-like instruction set and program encoding used to represent + * predicates and actions that can be bound to DTrace probes. The constants + * below defining the number of available registers are suggested minimums; the + * compiler should use DTRACEIOC_CONF to dynamically obtain the number of + * registers provided by the current DTrace implementation. + */ #define DIF_VERSION_1 1 /* DIF version 1: Solaris 10 Beta */ #define DIF_VERSION_2 2 /* DIF version 2: Solaris 10 FCS */ #define DIF_VERSION DIF_VERSION_2 /* latest DIF instruction set version */ @@ -288,10 +288,11 @@ typedef enum dtrace_probespec { #define DIF_SUBR_INET_NTOA6 43 #define DIF_SUBR_TOUPPER 44 #define DIF_SUBR_TOLOWER 45 +#define DIF_SUBR_GETF 46 -#define DIF_SUBR_MAX 45 /* max subroutine value */ +#define DIF_SUBR_MAX 46 /* max subroutine value */ -typedef uint32_t dif_instr_t; + typedef uint32_t dif_instr_t; #define DIF_INSTR_OP(i) (((i) >> 24) & 0xff) #define DIF_INSTR_R1(i) (((i) >> 16) & 0xff) @@ -333,39 +334,39 @@ typedef uint32_t dif_instr_t; #define DIF_REG_R0 0 /* %r0 is always set to zero */ -/* - * A DTrace Intermediate Format Type (DIF Type) is used to represent the types - * of variables, function and associative array arguments, and the return type - * for each DIF object (shown below). It contains a description of the type, - * its size in bytes, and a module identifier. - */ -typedef struct dtrace_diftype { - uint8_t dtdt_kind; /* type kind (see below) */ - uint8_t dtdt_ckind; /* type kind in CTF */ - uint8_t dtdt_flags; /* type flags (see below) */ - uint8_t dtdt_pad; /* reserved for future use */ - uint32_t dtdt_size; /* type size in bytes (unless string) */ -} dtrace_diftype_t; + /* + * A DTrace Intermediate Format Type (DIF Type) is used to represent the types + * of variables, function and associative array arguments, and the return type + * for each DIF object (shown below). It contains a description of the type, + * its size in bytes, and a module identifier. + */ + typedef struct dtrace_diftype { + uint8_t dtdt_kind; /* type kind (see below) */ + uint8_t dtdt_ckind; /* type kind in CTF */ + uint8_t dtdt_flags; /* type flags (see below) */ + uint8_t dtdt_pad; /* reserved for future use */ + uint32_t dtdt_size; /* type size in bytes (unless string) */ + } dtrace_diftype_t; #define DIF_TYPE_CTF 0 /* type is a CTF type */ #define DIF_TYPE_STRING 1 /* type is a D string */ #define DIF_TF_BYREF 0x1 /* type is passed by reference */ -/* - * A DTrace Intermediate Format variable record is used to describe each of the - * variables referenced by a given DIF object. It contains an integer variable - * identifier along with variable scope and properties, as shown below. The - * size of this structure must be sizeof (int) aligned. - */ -typedef struct dtrace_difv { - uint32_t dtdv_name; /* variable name index in dtdo_strtab */ - uint32_t dtdv_id; /* variable reference identifier */ - uint8_t dtdv_kind; /* variable kind (see below) */ - uint8_t dtdv_scope; /* variable scope (see below) */ - uint16_t dtdv_flags; /* variable flags (see below) */ - dtrace_diftype_t dtdv_type; /* variable type (see above) */ -} dtrace_difv_t; + /* + * A DTrace Intermediate Format variable record is used to describe each of the + * variables referenced by a given DIF object. It contains an integer variable + * identifier along with variable scope and properties, as shown below. The + * size of this structure must be sizeof (int) aligned. + */ + typedef struct dtrace_difv { + uint32_t dtdv_name; /* variable name index in dtdo_strtab */ + uint32_t dtdv_id; /* variable reference identifier */ + uint8_t dtdv_kind; /* variable kind (see below) */ + uint8_t dtdv_scope; /* variable scope (see below) */ + uint16_t dtdv_flags; /* variable flags (see below) */ + dtrace_diftype_t dtdv_type; /* variable type (see above) */ + } dtrace_difv_t; #define DIFV_KIND_ARRAY 0 /* variable is an array of quantities */ #define DIFV_KIND_SCALAR 1 /* variable is a scalar quantity */ @@ -377,21 +378,21 @@ typedef struct dtrace_difv { #define DIFV_F_REF 0x1 /* variable is referenced by DIFO */ #define DIFV_F_MOD 0x2 /* variable is written by DIFO */ -/* - * DTrace Actions - * - * The upper byte determines the class of the action; the low bytes determines - * the specific action within that class. The classes of actions are as - * follows: - * - * [ no class ] <= May record process- or kernel-related data - * DTRACEACT_PROC <= Only records process-related data - * DTRACEACT_PROC_DESTRUCTIVE <= Potentially destructive to processes - * DTRACEACT_KERNEL <= Only records kernel-related data - * DTRACEACT_KERNEL_DESTRUCTIVE <= Potentially destructive to the kernel - * DTRACEACT_SPECULATIVE <= Speculation-related action - * DTRACEACT_AGGREGATION <= Aggregating action - */ + /* + * DTrace Actions + * + * The upper byte determines the class of the action; the low bytes determines + * the specific action within that class. The classes of actions are as + * follows: + * + * [ no class ] <= May record process- or kernel-related data + * DTRACEACT_PROC <= Only records process-related data + * DTRACEACT_PROC_DESTRUCTIVE <= Potentially destructive to processes + * DTRACEACT_KERNEL <= Only records kernel-related data + * DTRACEACT_KERNEL_DESTRUCTIVE <= Potentially destructive to the kernel + * DTRACEACT_SPECULATIVE <= Speculation-related action + * DTRACEACT_AGGREGATION <= Aggregating action + */ #define DTRACEACT_NONE 0 /* no action */ #define DTRACEACT_DIFEXPR 1 /* action is DIF expression */ #define DTRACEACT_EXIT 2 /* exit() action */ @@ -435,27 +436,27 @@ typedef struct dtrace_difv { #define DTRACEACT_ISDESTRUCTIVE(x) \ (DTRACEACT_CLASS(x) == DTRACEACT_PROC_DESTRUCTIVE || \ - DTRACEACT_CLASS(x) == DTRACEACT_KERNEL_DESTRUCTIVE) + DTRACEACT_CLASS(x) == DTRACEACT_KERNEL_DESTRUCTIVE) #define DTRACEACT_ISSPECULATIVE(x) \ (DTRACEACT_CLASS(x) == DTRACEACT_SPECULATIVE) #define DTRACEACT_ISPRINTFLIKE(x) \ ((x) == DTRACEACT_PRINTF || (x) == DTRACEACT_PRINTA || \ - (x) == DTRACEACT_SYSTEM || (x) == DTRACEACT_FREOPEN) - -/* - * DTrace Aggregating Actions - * - * These are functions f(x) for which the following is true: - * - * f(f(x_0) U f(x_1) U ... U f(x_n)) = f(x_0 U x_1 U ... U x_n) - * - * where x_n is a set of arbitrary data. Aggregating actions are in their own - * DTrace action class, DTTRACEACT_AGGREGATION. The macros provided here allow - * for easier processing of the aggregation argument and data payload for a few - * aggregating actions (notably: quantize(), lquantize(), and ustack()). - */ + (x) == DTRACEACT_SYSTEM || (x) == DTRACEACT_FREOPEN) + + /* + * DTrace Aggregating Actions + * + * These are functions f(x) for which the following is true: + * + * f(f(x_0) U f(x_1) U ... U f(x_n)) = f(x_0 U x_1 U ... U x_n) + * + * where x_n is a set of arbitrary data. Aggregating actions are in their own + * DTrace action class, DTTRACEACT_AGGREGATION. The macros provided here allow + * for easier processing of the aggregation argument and data payload for a few + * aggregating actions (notably: quantize(), lquantize(), and ustack()). + */ #define DTRACEACT_AGGREGATION 0x0700 #define DTRACEAGG_COUNT (DTRACEACT_AGGREGATION + 1) #define DTRACEAGG_MIN (DTRACEACT_AGGREGATION + 2) @@ -477,9 +478,9 @@ typedef struct dtrace_difv { #define DTRACE_QUANTIZE_BUCKETVAL(buck) \ (int64_t)((buck) < DTRACE_QUANTIZE_ZEROBUCKET ? \ - -(1LL << (DTRACE_QUANTIZE_ZEROBUCKET - 1 - (buck))) : \ - (buck) == DTRACE_QUANTIZE_ZEROBUCKET ? 0 : \ - 1LL << ((buck) - DTRACE_QUANTIZE_ZEROBUCKET - 1)) + -(1LL << (DTRACE_QUANTIZE_ZEROBUCKET - 1 - (buck))) : \ + (buck) == DTRACE_QUANTIZE_ZEROBUCKET ? 0 : \ + 1LL << ((buck) - DTRACE_QUANTIZE_ZEROBUCKET - 1)) #define DTRACE_LQUANTIZE_STEPSHIFT 48 #define DTRACE_LQUANTIZE_STEPMASK ((uint64_t)UINT16_MAX << 48) @@ -490,15 +491,15 @@ typedef struct dtrace_difv { #define DTRACE_LQUANTIZE_STEP(x) \ (uint16_t)(((x) & DTRACE_LQUANTIZE_STEPMASK) >> \ - DTRACE_LQUANTIZE_STEPSHIFT) + DTRACE_LQUANTIZE_STEPSHIFT) #define DTRACE_LQUANTIZE_LEVELS(x) \ (uint16_t)(((x) & DTRACE_LQUANTIZE_LEVELMASK) >> \ - DTRACE_LQUANTIZE_LEVELSHIFT) + DTRACE_LQUANTIZE_LEVELSHIFT) #define DTRACE_LQUANTIZE_BASE(x) \ (int32_t)(((x) & DTRACE_LQUANTIZE_BASEMASK) >> \ - DTRACE_LQUANTIZE_BASESHIFT) + DTRACE_LQUANTIZE_BASESHIFT) #define DTRACE_LLQUANTIZE_FACTORSHIFT 48 #define DTRACE_LLQUANTIZE_FACTORMASK ((uint64_t)UINT16_MAX << 48) @@ -511,19 +512,19 @@ typedef struct dtrace_difv { #define DTRACE_LLQUANTIZE_FACTOR(x) \ (uint16_t)(((x) & DTRACE_LLQUANTIZE_FACTORMASK) >> \ - DTRACE_LLQUANTIZE_FACTORSHIFT) + DTRACE_LLQUANTIZE_FACTORSHIFT) #define DTRACE_LLQUANTIZE_LOW(x) \ (uint16_t)(((x) & DTRACE_LLQUANTIZE_LOWMASK) >> \ - DTRACE_LLQUANTIZE_LOWSHIFT) + DTRACE_LLQUANTIZE_LOWSHIFT) #define DTRACE_LLQUANTIZE_HIGH(x) \ (uint16_t)(((x) & DTRACE_LLQUANTIZE_HIGHMASK) >> \ - DTRACE_LLQUANTIZE_HIGHSHIFT) + DTRACE_LLQUANTIZE_HIGHSHIFT) #define DTRACE_LLQUANTIZE_NSTEP(x) \ (uint16_t)(((x) & DTRACE_LLQUANTIZE_NSTEPMASK) >> \ - DTRACE_LLQUANTIZE_NSTEPSHIFT) + DTRACE_LLQUANTIZE_NSTEPSHIFT) #define DTRACE_USTACK_NFRAMES(x) (uint32_t)((x) & UINT32_MAX) #define DTRACE_USTACK_STRSIZE(x) (uint32_t)((x) >> 32) @@ -540,72 +541,72 @@ typedef struct dtrace_difv { #define DTRACE_PTR(type, name) type *name #endif -/* - * DTrace Object Format (DOF) - * - * DTrace programs can be persistently encoded in the DOF format so that they - * may be embedded in other programs (for example, in an ELF file) or in the - * dtrace driver configuration file for use in anonymous tracing. The DOF - * format is versioned and extensible so that it can be revised and so that - * internal data structures can be modified or extended compatibly. All DOF - * structures use fixed-size types, so the 32-bit and 64-bit representations - * are identical and consumers can use either data model transparently. - * - * The file layout is structured as follows: - * - * +---------------+-------------------+----- ... ----+---- ... ------+ - * | dof_hdr_t | dof_sec_t[ ... ] | loadable | non-loadable | - * | (file header) | (section headers) | section data | section data | - * +---------------+-------------------+----- ... ----+---- ... ------+ - * |<------------ dof_hdr.dofh_loadsz --------------->| | - * |<------------ dof_hdr.dofh_filesz ------------------------------->| - * - * The file header stores meta-data including a magic number, data model for - * the instrumentation, data encoding, and properties of the DIF code within. - * The header describes its own size and the size of the section headers. By - * convention, an array of section headers follows the file header, and then - * the data for all loadable sections and unloadable sections. This permits - * consumer code to easily download the headers and all loadable data into the - * DTrace driver in one contiguous chunk, omitting other extraneous sections. - * - * The section headers describe the size, offset, alignment, and section type - * for each section. Sections are described using a set of #defines that tell - * the consumer what kind of data is expected. Sections can contain links to - * other sections by storing a dof_secidx_t, an index into the section header - * array, inside of the section data structures. The section header includes - * an entry size so that sections with data arrays can grow their structures. - * - * The DOF data itself can contain many snippets of DIF (i.e. >1 DIFOs), which - * are represented themselves as a collection of related DOF sections. This - * permits us to change the set of sections associated with a DIFO over time, - * and also permits us to encode DIFOs that contain different sets of sections. - * When a DOF section wants to refer to a DIFO, it stores the dof_secidx_t of a - * section of type DOF_SECT_DIFOHDR. This section's data is then an array of - * dof_secidx_t's which in turn denote the sections associated with this DIFO. - * - * This loose coupling of the file structure (header and sections) to the - * structure of the DTrace program itself (ECB descriptions, action - * descriptions, and DIFOs) permits activities such as relocation processing - * to occur in a single pass without having to understand D program structure. - * - * Finally, strings are always stored in ELF-style string tables along with a - * string table section index and string table offset. Therefore strings in - * DOF are always arbitrary-length and not bound to the current implementation. - */ + /* + * DTrace Object Format (DOF) + * + * DTrace programs can be persistently encoded in the DOF format so that they + * may be embedded in other programs (for example, in an ELF file) or in the + * dtrace driver configuration file for use in anonymous tracing. The DOF + * format is versioned and extensible so that it can be revised and so that + * internal data structures can be modified or extended compatibly. All DOF + * structures use fixed-size types, so the 32-bit and 64-bit representations + * are identical and consumers can use either data model transparently. + * + * The file layout is structured as follows: + * + * +---------------+-------------------+----- ... ----+---- ... ------+ + * | dof_hdr_t | dof_sec_t[ ... ] | loadable | non-loadable | + * | (file header) | (section headers) | section data | section data | + * +---------------+-------------------+----- ... ----+---- ... ------+ + * |<------------ dof_hdr.dofh_loadsz --------------->| | + * |<------------ dof_hdr.dofh_filesz ------------------------------->| + * + * The file header stores meta-data including a magic number, data model for + * the instrumentation, data encoding, and properties of the DIF code within. + * The header describes its own size and the size of the section headers. By + * convention, an array of section headers follows the file header, and then + * the data for all loadable sections and unloadable sections. This permits + * consumer code to easily download the headers and all loadable data into the + * DTrace driver in one contiguous chunk, omitting other extraneous sections. + * + * The section headers describe the size, offset, alignment, and section type + * for each section. Sections are described using a set of #defines that tell + * the consumer what kind of data is expected. Sections can contain links to + * other sections by storing a dof_secidx_t, an index into the section header + * array, inside of the section data structures. The section header includes + * an entry size so that sections with data arrays can grow their structures. + * + * The DOF data itself can contain many snippets of DIF (i.e. >1 DIFOs), which + * are represented themselves as a collection of related DOF sections. This + * permits us to change the set of sections associated with a DIFO over time, + * and also permits us to encode DIFOs that contain different sets of sections. + * When a DOF section wants to refer to a DIFO, it stores the dof_secidx_t of a + * section of type DOF_SECT_DIFOHDR. This section's data is then an array of + * dof_secidx_t's which in turn denote the sections associated with this DIFO. + * + * This loose coupling of the file structure (header and sections) to the + * structure of the DTrace program itself (ECB descriptions, action + * descriptions, and DIFOs) permits activities such as relocation processing + * to occur in a single pass without having to understand D program structure. + * + * Finally, strings are always stored in ELF-style string tables along with a + * string table section index and string table offset. Therefore strings in + * DOF are always arbitrary-length and not bound to the current implementation. + */ #define DOF_ID_SIZE 16 /* total size of dofh_ident[] in bytes */ -typedef struct dof_hdr { - uint8_t dofh_ident[DOF_ID_SIZE]; /* identification bytes (see below) */ - uint32_t dofh_flags; /* file attribute flags (if any) */ - uint32_t dofh_hdrsize; /* size of file header in bytes */ - uint32_t dofh_secsize; /* size of section header in bytes */ - uint32_t dofh_secnum; /* number of section headers */ - uint64_t dofh_secoff; /* file offset of section headers */ - uint64_t dofh_loadsz; /* file size of loadable portion */ - uint64_t dofh_filesz; /* file size of entire DOF file */ - uint64_t dofh_pad; /* reserved for future use */ -} dof_hdr_t; + typedef struct dof_hdr { + uint8_t dofh_ident[DOF_ID_SIZE]; /* identification bytes (see below) */ + uint32_t dofh_flags; /* file attribute flags (if any) */ + uint32_t dofh_hdrsize; /* size of file header in bytes */ + uint32_t dofh_secsize; /* size of section header in bytes */ + uint32_t dofh_secnum; /* number of section headers */ + uint64_t dofh_secoff; /* file offset of section headers */ + uint64_t dofh_loadsz; /* file size of loadable portion */ + uint64_t dofh_filesz; /* file size of entire DOF file */ + uint64_t dofh_pad; /* reserved for future use */ + } dof_hdr_t; #define DOF_ID_MAG0 0 /* first byte of magic number */ #define DOF_ID_MAG1 1 /* second byte of magic number */ @@ -653,20 +654,20 @@ typedef struct dof_hdr { #define DOF_FL_VALID 0 /* mask of all valid dofh_flags bits */ -typedef uint32_t dof_secidx_t; /* section header table index type */ -typedef uint32_t dof_stridx_t; /* string table index type */ + typedef uint32_t dof_secidx_t; /* section header table index type */ + typedef uint32_t dof_stridx_t; /* string table index type */ #define DOF_SECIDX_NONE (-1U) /* null value for section indices */ #define DOF_STRIDX_NONE (-1U) /* null value for string indices */ -typedef struct dof_sec { - uint32_t dofs_type; /* section type (see below) */ - uint32_t dofs_align; /* section data memory alignment */ - uint32_t dofs_flags; /* section flags (if any) */ - uint32_t dofs_entsize; /* size of section entry (if table) */ - uint64_t dofs_offset; /* offset of section data within file */ - uint64_t dofs_size; /* size of section data in bytes */ -} dof_sec_t; + typedef struct dof_sec { + uint32_t dofs_type; /* section type (see below) */ + uint32_t dofs_align; /* section data memory alignment */ + uint32_t dofs_flags; /* section flags (if any) */ + uint32_t dofs_entsize; /* size of section entry (if table) */ + uint64_t dofs_offset; /* offset of section data within file */ + uint64_t dofs_size; /* size of section data in bytes */ + } dof_sec_t; #define DOF_SECT_NONE 0 /* null section */ #define DOF_SECT_COMMENTS 1 /* compiler comments */ @@ -700,297 +701,297 @@ typedef struct dof_sec { #define DOF_SEC_ISLOADABLE(x) \ (((x) == DOF_SECT_ECBDESC) || ((x) == DOF_SECT_PROBEDESC) || \ - ((x) == DOF_SECT_ACTDESC) || ((x) == DOF_SECT_DIFOHDR) || \ - ((x) == DOF_SECT_DIF) || ((x) == DOF_SECT_STRTAB) || \ - ((x) == DOF_SECT_VARTAB) || ((x) == DOF_SECT_RELTAB) || \ - ((x) == DOF_SECT_TYPTAB) || ((x) == DOF_SECT_URELHDR) || \ - ((x) == DOF_SECT_KRELHDR) || ((x) == DOF_SECT_OPTDESC) || \ - ((x) == DOF_SECT_PROVIDER) || ((x) == DOF_SECT_PROBES) || \ - ((x) == DOF_SECT_PRARGS) || ((x) == DOF_SECT_PROFFS) || \ - ((x) == DOF_SECT_INTTAB) || ((x) == DOF_SECT_XLTAB) || \ - ((x) == DOF_SECT_XLMEMBERS) || ((x) == DOF_SECT_XLIMPORT) || \ - ((x) == DOF_SECT_XLIMPORT) || ((x) == DOF_SECT_XLEXPORT) || \ - ((x) == DOF_SECT_PREXPORT) || ((x) == DOF_SECT_PRENOFFS)) - -typedef struct dof_ecbdesc { - dof_secidx_t dofe_probes; /* link to DOF_SECT_PROBEDESC */ - dof_secidx_t dofe_pred; /* link to DOF_SECT_DIFOHDR */ - dof_secidx_t dofe_actions; /* link to DOF_SECT_ACTDESC */ - uint32_t dofe_pad; /* reserved for future use */ - uint64_t dofe_uarg; /* user-supplied library argument */ -} dof_ecbdesc_t; - -typedef struct dof_probedesc { - dof_secidx_t dofp_strtab; /* link to DOF_SECT_STRTAB section */ - dof_stridx_t dofp_provider; /* provider string */ - dof_stridx_t dofp_mod; /* module string */ - dof_stridx_t dofp_func; /* function string */ - dof_stridx_t dofp_name; /* name string */ - uint32_t dofp_id; /* probe identifier (or zero) */ -} dof_probedesc_t; - -typedef struct dof_actdesc { - dof_secidx_t dofa_difo; /* link to DOF_SECT_DIFOHDR */ - dof_secidx_t dofa_strtab; /* link to DOF_SECT_STRTAB section */ - uint32_t dofa_kind; /* action kind (DTRACEACT_* constant) */ - uint32_t dofa_ntuple; /* number of subsequent tuple actions */ - uint64_t dofa_arg; /* kind-specific argument */ - uint64_t dofa_uarg; /* user-supplied argument */ -} dof_actdesc_t; - -typedef struct dof_difohdr { - dtrace_diftype_t dofd_rtype; /* return type for this fragment */ - dof_secidx_t dofd_links[1]; /* variable length array of indices */ -} dof_difohdr_t; - -typedef struct dof_relohdr { - dof_secidx_t dofr_strtab; /* link to DOF_SECT_STRTAB for names */ - dof_secidx_t dofr_relsec; /* link to DOF_SECT_RELTAB for relos */ - dof_secidx_t dofr_tgtsec; /* link to section we are relocating */ -} dof_relohdr_t; - -typedef struct dof_relodesc { - dof_stridx_t dofr_name; /* string name of relocation symbol */ - uint32_t dofr_type; /* relo type (DOF_RELO_* constant) */ - uint64_t dofr_offset; /* byte offset for relocation */ - uint64_t dofr_data; /* additional type-specific data */ -} dof_relodesc_t; + ((x) == DOF_SECT_ACTDESC) || ((x) == DOF_SECT_DIFOHDR) || \ + ((x) == DOF_SECT_DIF) || ((x) == DOF_SECT_STRTAB) || \ + ((x) == DOF_SECT_VARTAB) || ((x) == DOF_SECT_RELTAB) || \ + ((x) == DOF_SECT_TYPTAB) || ((x) == DOF_SECT_URELHDR) || \ + ((x) == DOF_SECT_KRELHDR) || ((x) == DOF_SECT_OPTDESC) || \ + ((x) == DOF_SECT_PROVIDER) || ((x) == DOF_SECT_PROBES) || \ + ((x) == DOF_SECT_PRARGS) || ((x) == DOF_SECT_PROFFS) || \ + ((x) == DOF_SECT_INTTAB) || ((x) == DOF_SECT_XLTAB) || \ + ((x) == DOF_SECT_XLMEMBERS) || ((x) == DOF_SECT_XLIMPORT) || \ + ((x) == DOF_SECT_XLIMPORT) || ((x) == DOF_SECT_XLEXPORT) || \ + ((x) == DOF_SECT_PREXPORT) || ((x) == DOF_SECT_PRENOFFS)) + + typedef struct dof_ecbdesc { + dof_secidx_t dofe_probes; /* link to DOF_SECT_PROBEDESC */ + dof_secidx_t dofe_pred; /* link to DOF_SECT_DIFOHDR */ + dof_secidx_t dofe_actions; /* link to DOF_SECT_ACTDESC */ + uint32_t dofe_pad; /* reserved for future use */ + uint64_t dofe_uarg; /* user-supplied library argument */ + } dof_ecbdesc_t; + + typedef struct dof_probedesc { + dof_secidx_t dofp_strtab; /* link to DOF_SECT_STRTAB section */ + dof_stridx_t dofp_provider; /* provider string */ + dof_stridx_t dofp_mod; /* module string */ + dof_stridx_t dofp_func; /* function string */ + dof_stridx_t dofp_name; /* name string */ + uint32_t dofp_id; /* probe identifier (or zero) */ + } dof_probedesc_t; + + typedef struct dof_actdesc { + dof_secidx_t dofa_difo; /* link to DOF_SECT_DIFOHDR */ + dof_secidx_t dofa_strtab; /* link to DOF_SECT_STRTAB section */ + uint32_t dofa_kind; /* action kind (DTRACEACT_* constant) */ + uint32_t dofa_ntuple; /* number of subsequent tuple actions */ + uint64_t dofa_arg; /* kind-specific argument */ + uint64_t dofa_uarg; /* user-supplied argument */ + } dof_actdesc_t; + + typedef struct dof_difohdr { + dtrace_diftype_t dofd_rtype; /* return type for this fragment */ + dof_secidx_t dofd_links[1]; /* variable length array of indices */ + } dof_difohdr_t; + + typedef struct dof_relohdr { + dof_secidx_t dofr_strtab; /* link to DOF_SECT_STRTAB for names */ + dof_secidx_t dofr_relsec; /* link to DOF_SECT_RELTAB for relos */ + dof_secidx_t dofr_tgtsec; /* link to section we are relocating */ + } dof_relohdr_t; + + typedef struct dof_relodesc { + dof_stridx_t dofr_name; /* string name of relocation symbol */ + uint32_t dofr_type; /* relo type (DOF_RELO_* constant) */ + uint64_t dofr_offset; /* byte offset for relocation */ + uint64_t dofr_data; /* additional type-specific data */ + } dof_relodesc_t; #define DOF_RELO_NONE 0 /* empty relocation entry */ #define DOF_RELO_SETX 1 /* relocate setx value */ -typedef struct dof_optdesc { - uint32_t dofo_option; /* option identifier */ - dof_secidx_t dofo_strtab; /* string table, if string option */ - uint64_t dofo_value; /* option value or string index */ -} dof_optdesc_t; + typedef struct dof_optdesc { + uint32_t dofo_option; /* option identifier */ + dof_secidx_t dofo_strtab; /* string table, if string option */ + uint64_t dofo_value; /* option value or string index */ + } dof_optdesc_t; -typedef uint32_t dof_attr_t; /* encoded stability attributes */ + typedef uint32_t dof_attr_t; /* encoded stability attributes */ #define DOF_ATTR(n, d, c) (((n) << 24) | ((d) << 16) | ((c) << 8)) #define DOF_ATTR_NAME(a) (((a) >> 24) & 0xff) #define DOF_ATTR_DATA(a) (((a) >> 16) & 0xff) #define DOF_ATTR_CLASS(a) (((a) >> 8) & 0xff) -typedef struct dof_provider { - dof_secidx_t dofpv_strtab; /* link to DOF_SECT_STRTAB section */ - dof_secidx_t dofpv_probes; /* link to DOF_SECT_PROBES section */ - dof_secidx_t dofpv_prargs; /* link to DOF_SECT_PRARGS section */ - dof_secidx_t dofpv_proffs; /* link to DOF_SECT_PROFFS section */ - dof_stridx_t dofpv_name; /* provider name string */ - dof_attr_t dofpv_provattr; /* provider attributes */ - dof_attr_t dofpv_modattr; /* module attributes */ - dof_attr_t dofpv_funcattr; /* function attributes */ - dof_attr_t dofpv_nameattr; /* name attributes */ - dof_attr_t dofpv_argsattr; /* args attributes */ - dof_secidx_t dofpv_prenoffs; /* link to DOF_SECT_PRENOFFS section */ -} dof_provider_t; - -typedef struct dof_probe { - uint64_t dofpr_addr; /* probe base address or offset */ - dof_stridx_t dofpr_func; /* probe function string */ - dof_stridx_t dofpr_name; /* probe name string */ - dof_stridx_t dofpr_nargv; /* native argument type strings */ - dof_stridx_t dofpr_xargv; /* translated argument type strings */ - uint32_t dofpr_argidx; /* index of first argument mapping */ - uint32_t dofpr_offidx; /* index of first offset entry */ - uint8_t dofpr_nargc; /* native argument count */ - uint8_t dofpr_xargc; /* translated argument count */ - uint16_t dofpr_noffs; /* number of offset entries for probe */ - uint32_t dofpr_enoffidx; /* index of first is-enabled offset */ - uint16_t dofpr_nenoffs; /* number of is-enabled offsets */ - uint16_t dofpr_pad1; /* reserved for future use */ - uint32_t dofpr_pad2; /* reserved for future use */ -} dof_probe_t; - -typedef struct dof_xlator { - dof_secidx_t dofxl_members; /* link to DOF_SECT_XLMEMBERS section */ - dof_secidx_t dofxl_strtab; /* link to DOF_SECT_STRTAB section */ - dof_stridx_t dofxl_argv; /* input parameter type strings */ - uint32_t dofxl_argc; /* input parameter list length */ - dof_stridx_t dofxl_type; /* output type string name */ - dof_attr_t dofxl_attr; /* output stability attributes */ -} dof_xlator_t; - -typedef struct dof_xlmember { - dof_secidx_t dofxm_difo; /* member link to DOF_SECT_DIFOHDR */ - dof_stridx_t dofxm_name; /* member name */ - dtrace_diftype_t dofxm_type; /* member type */ -} dof_xlmember_t; - -typedef struct dof_xlref { - dof_secidx_t dofxr_xlator; /* link to DOF_SECT_XLATORS section */ - uint32_t dofxr_member; /* index of referenced dof_xlmember */ - uint32_t dofxr_argn; /* index of argument for DIF_OP_XLARG */ -} dof_xlref_t; - -/* - * DTrace Intermediate Format Object (DIFO) - * - * A DIFO is used to store the compiled DIF for a D expression, its return - * type, and its string and variable tables. The string table is a single - * buffer of character data into which sets instructions and variable - * references can reference strings using a byte offset. The variable table - * is an array of dtrace_difv_t structures that describe the name and type of - * each variable and the id used in the DIF code. This structure is described - * above in the DIF section of this header file. The DIFO is used at both - * user-level (in the library) and in the kernel, but the structure is never - * passed between the two: the DOF structures form the only interface. As a - * result, the definition can change depending on the presence of _KERNEL. - */ -typedef struct dtrace_difo { - dif_instr_t *dtdo_buf; /* instruction buffer */ - uint64_t *dtdo_inttab; /* integer table (optional) */ - char *dtdo_strtab; /* string table (optional) */ - dtrace_difv_t *dtdo_vartab; /* variable table (optional) */ - uint_t dtdo_len; /* length of instruction buffer */ - uint_t dtdo_intlen; /* length of integer table */ - uint_t dtdo_strlen; /* length of string table */ - uint_t dtdo_varlen; /* length of variable table */ - dtrace_diftype_t dtdo_rtype; /* return type */ - uint_t dtdo_refcnt; /* owner reference count */ - uint_t dtdo_destructive; /* invokes destructive subroutines */ + typedef struct dof_provider { + dof_secidx_t dofpv_strtab; /* link to DOF_SECT_STRTAB section */ + dof_secidx_t dofpv_probes; /* link to DOF_SECT_PROBES section */ + dof_secidx_t dofpv_prargs; /* link to DOF_SECT_PRARGS section */ + dof_secidx_t dofpv_proffs; /* link to DOF_SECT_PROFFS section */ + dof_stridx_t dofpv_name; /* provider name string */ + dof_attr_t dofpv_provattr; /* provider attributes */ + dof_attr_t dofpv_modattr; /* module attributes */ + dof_attr_t dofpv_funcattr; /* function attributes */ + dof_attr_t dofpv_nameattr; /* name attributes */ + dof_attr_t dofpv_argsattr; /* args attributes */ + dof_secidx_t dofpv_prenoffs; /* link to DOF_SECT_PRENOFFS section */ + } dof_provider_t; + + typedef struct dof_probe { + uint64_t dofpr_addr; /* probe base address or offset */ + dof_stridx_t dofpr_func; /* probe function string */ + dof_stridx_t dofpr_name; /* probe name string */ + dof_stridx_t dofpr_nargv; /* native argument type strings */ + dof_stridx_t dofpr_xargv; /* translated argument type strings */ + uint32_t dofpr_argidx; /* index of first argument mapping */ + uint32_t dofpr_offidx; /* index of first offset entry */ + uint8_t dofpr_nargc; /* native argument count */ + uint8_t dofpr_xargc; /* translated argument count */ + uint16_t dofpr_noffs; /* number of offset entries for probe */ + uint32_t dofpr_enoffidx; /* index of first is-enabled offset */ + uint16_t dofpr_nenoffs; /* number of is-enabled offsets */ + uint16_t dofpr_pad1; /* reserved for future use */ + uint32_t dofpr_pad2; /* reserved for future use */ + } dof_probe_t; + + typedef struct dof_xlator { + dof_secidx_t dofxl_members; /* link to DOF_SECT_XLMEMBERS section */ + dof_secidx_t dofxl_strtab; /* link to DOF_SECT_STRTAB section */ + dof_stridx_t dofxl_argv; /* input parameter type strings */ + uint32_t dofxl_argc; /* input parameter list length */ + dof_stridx_t dofxl_type; /* output type string name */ + dof_attr_t dofxl_attr; /* output stability attributes */ + } dof_xlator_t; + + typedef struct dof_xlmember { + dof_secidx_t dofxm_difo; /* member link to DOF_SECT_DIFOHDR */ + dof_stridx_t dofxm_name; /* member name */ + dtrace_diftype_t dofxm_type; /* member type */ + } dof_xlmember_t; + + typedef struct dof_xlref { + dof_secidx_t dofxr_xlator; /* link to DOF_SECT_XLATORS section */ + uint32_t dofxr_member; /* index of referenced dof_xlmember */ + uint32_t dofxr_argn; /* index of argument for DIF_OP_XLARG */ + } dof_xlref_t; + + /* + * DTrace Intermediate Format Object (DIFO) + * + * A DIFO is used to store the compiled DIF for a D expression, its return + * type, and its string and variable tables. The string table is a single + * buffer of character data into which sets instructions and variable + * references can reference strings using a byte offset. The variable table + * is an array of dtrace_difv_t structures that describe the name and type of + * each variable and the id used in the DIF code. This structure is described + * above in the DIF section of this header file. The DIFO is used at both + * user-level (in the library) and in the kernel, but the structure is never + * passed between the two: the DOF structures form the only interface. As a + * result, the definition can change depending on the presence of _KERNEL. + */ + typedef struct dtrace_difo { + dif_instr_t *dtdo_buf; /* instruction buffer */ + uint64_t *dtdo_inttab; /* integer table (optional) */ + char *dtdo_strtab; /* string table (optional) */ + dtrace_difv_t *dtdo_vartab; /* variable table (optional) */ + uint_t dtdo_len; /* length of instruction buffer */ + uint_t dtdo_intlen; /* length of integer table */ + uint_t dtdo_strlen; /* length of string table */ + uint_t dtdo_varlen; /* length of variable table */ + dtrace_diftype_t dtdo_rtype; /* return type */ + uint_t dtdo_refcnt; /* owner reference count */ + uint_t dtdo_destructive; /* invokes destructive subroutines */ #ifndef _KERNEL - dof_relodesc_t *dtdo_kreltab; /* kernel relocations */ - dof_relodesc_t *dtdo_ureltab; /* user relocations */ - struct dt_node **dtdo_xlmtab; /* translator references */ - uint_t dtdo_krelen; /* length of krelo table */ - uint_t dtdo_urelen; /* length of urelo table */ - uint_t dtdo_xlmlen; /* length of translator table */ + dof_relodesc_t *dtdo_kreltab; /* kernel relocations */ + dof_relodesc_t *dtdo_ureltab; /* user relocations */ + struct dt_node **dtdo_xlmtab; /* translator references */ + uint_t dtdo_krelen; /* length of krelo table */ + uint_t dtdo_urelen; /* length of urelo table */ + uint_t dtdo_xlmlen; /* length of translator table */ #endif -} dtrace_difo_t; - -/* - * DTrace Enabling Description Structures - * - * When DTrace is tracking the description of a DTrace enabling entity (probe, - * predicate, action, ECB, record, etc.), it does so in a description - * structure. These structures all end in "desc", and are used at both - * user-level and in the kernel -- but (with the exception of - * dtrace_probedesc_t) they are never passed between them. Typically, - * user-level will use the description structures when assembling an enabling. - * It will then distill those description structures into a DOF object (see - * above), and send it into the kernel. The kernel will again use the - * description structures to create a description of the enabling as it reads - * the DOF. When the description is complete, the enabling will be actually - * created -- turning it into the structures that represent the enabling - * instead of merely describing it. Not surprisingly, the description - * structures bear a strong resemblance to the DOF structures that act as their - * conduit. - */ -struct dtrace_predicate; - -typedef struct dtrace_probedesc { - dtrace_id_t dtpd_id; /* probe identifier */ - char dtpd_provider[DTRACE_PROVNAMELEN]; /* probe provider name */ - char dtpd_mod[DTRACE_MODNAMELEN]; /* probe module name */ - char dtpd_func[DTRACE_FUNCNAMELEN]; /* probe function name */ - char dtpd_name[DTRACE_NAMELEN]; /* probe name */ -} dtrace_probedesc_t; - -typedef struct dtrace_repldesc { - dtrace_probedesc_t dtrpd_match; /* probe descr. to match */ - dtrace_probedesc_t dtrpd_create; /* probe descr. to create */ -} dtrace_repldesc_t; - -typedef struct dtrace_preddesc { - dtrace_difo_t *dtpdd_difo; /* pointer to DIF object */ - struct dtrace_predicate *dtpdd_predicate; /* pointer to predicate */ -} dtrace_preddesc_t; - -typedef struct dtrace_actdesc { - dtrace_difo_t *dtad_difo; /* pointer to DIF object */ - struct dtrace_actdesc *dtad_next; /* next action */ - dtrace_actkind_t dtad_kind; /* kind of action */ - uint32_t dtad_ntuple; /* number in tuple */ - uint64_t dtad_arg; /* action argument */ - uint64_t dtad_uarg; /* user argument */ - int dtad_refcnt; /* reference count */ -} dtrace_actdesc_t; - -typedef struct dtrace_ecbdesc { - dtrace_actdesc_t *dted_action; /* action description(s) */ - dtrace_preddesc_t dted_pred; /* predicate description */ - dtrace_probedesc_t dted_probe; /* probe description */ - uint64_t dted_uarg; /* library argument */ - int dted_refcnt; /* reference count */ -} dtrace_ecbdesc_t; - -/* - * DTrace Metadata Description Structures - * - * DTrace separates the trace data stream from the metadata stream. The only - * metadata tokens placed in the data stream are the dtrace_rechdr_t (EPID + - * timestamp) or (in the case of aggregations) aggregation identifiers. To - * determine the structure of the data, DTrace consumers pass the token to the - * kernel, and receive in return a corresponding description of the enabled - * probe (via the dtrace_eprobedesc structure) or the aggregation (via the - * dtrace_aggdesc structure). Both of these structures are expressed in terms - * of record descriptions (via the dtrace_recdesc structure) that describe the - * exact structure of the data. Some record descriptions may also contain a - * format identifier; this additional bit of metadata can be retrieved from the - * kernel, for which a format description is returned via the dtrace_fmtdesc - * structure. Note that all four of these structures must be bitness-neutral - * to allow for a 32-bit DTrace consumer on a 64-bit kernel. - */ -typedef struct dtrace_recdesc { - dtrace_actkind_t dtrd_action; /* kind of action */ - uint32_t dtrd_size; /* size of record */ - uint32_t dtrd_offset; /* offset in ECB's data */ - uint16_t dtrd_alignment; /* required alignment */ - uint16_t dtrd_format; /* format, if any */ - uint64_t dtrd_arg; /* action argument */ - uint64_t dtrd_uarg; /* user argument */ -} dtrace_recdesc_t; - -typedef struct dtrace_eprobedesc { - dtrace_epid_t dtepd_epid; /* enabled probe ID */ - dtrace_id_t dtepd_probeid; /* probe ID */ - uint64_t dtepd_uarg; /* library argument */ - uint32_t dtepd_size; /* total size */ - int dtepd_nrecs; /* number of records */ - dtrace_recdesc_t dtepd_rec[1]; /* records themselves */ -} dtrace_eprobedesc_t; - -typedef struct dtrace_aggdesc { - DTRACE_PTR(char, dtagd_name); /* not filled in by kernel */ - dtrace_aggvarid_t dtagd_varid; /* not filled in by kernel */ - int dtagd_flags; /* not filled in by kernel */ - dtrace_aggid_t dtagd_id; /* aggregation ID */ - dtrace_epid_t dtagd_epid; /* enabled probe ID */ - uint32_t dtagd_size; /* size in bytes */ - int dtagd_nrecs; /* number of records */ - uint32_t dtagd_pad; /* explicit padding */ - dtrace_recdesc_t dtagd_rec[1]; /* record descriptions */ -} dtrace_aggdesc_t; - -typedef struct dtrace_fmtdesc { - DTRACE_PTR(char, dtfd_string); /* format string */ - int dtfd_length; /* length of format string */ - uint16_t dtfd_format; /* format identifier */ -} dtrace_fmtdesc_t; + } dtrace_difo_t; + + /* + * DTrace Enabling Description Structures + * + * When DTrace is tracking the description of a DTrace enabling entity (probe, + * predicate, action, ECB, record, etc.), it does so in a description + * structure. These structures all end in "desc", and are used at both + * user-level and in the kernel -- but (with the exception of + * dtrace_probedesc_t) they are never passed between them. Typically, + * user-level will use the description structures when assembling an enabling. + * It will then distill those description structures into a DOF object (see + * above), and send it into the kernel. The kernel will again use the + * description structures to create a description of the enabling as it reads + * the DOF. When the description is complete, the enabling will be actually + * created -- turning it into the structures that represent the enabling + * instead of merely describing it. Not surprisingly, the description + * structures bear a strong resemblance to the DOF structures that act as their + * conduit. + */ + struct dtrace_predicate; + + typedef struct dtrace_probedesc { + dtrace_id_t dtpd_id; /* probe identifier */ + char dtpd_provider[DTRACE_PROVNAMELEN]; /* probe provider name */ + char dtpd_mod[DTRACE_MODNAMELEN]; /* probe module name */ + char dtpd_func[DTRACE_FUNCNAMELEN]; /* probe function name */ + char dtpd_name[DTRACE_NAMELEN]; /* probe name */ + } dtrace_probedesc_t; + + typedef struct dtrace_repldesc { + dtrace_probedesc_t dtrpd_match; /* probe descr. to match */ + dtrace_probedesc_t dtrpd_create; /* probe descr. to create */ + } dtrace_repldesc_t; + + typedef struct dtrace_preddesc { + dtrace_difo_t *dtpdd_difo; /* pointer to DIF object */ + struct dtrace_predicate *dtpdd_predicate; /* pointer to predicate */ + } dtrace_preddesc_t; + + typedef struct dtrace_actdesc { + dtrace_difo_t *dtad_difo; /* pointer to DIF object */ + struct dtrace_actdesc *dtad_next; /* next action */ + dtrace_actkind_t dtad_kind; /* kind of action */ + uint32_t dtad_ntuple; /* number in tuple */ + uint64_t dtad_arg; /* action argument */ + uint64_t dtad_uarg; /* user argument */ + int dtad_refcnt; /* reference count */ + } dtrace_actdesc_t; + + typedef struct dtrace_ecbdesc { + dtrace_actdesc_t *dted_action; /* action description(s) */ + dtrace_preddesc_t dted_pred; /* predicate description */ + dtrace_probedesc_t dted_probe; /* probe description */ + uint64_t dted_uarg; /* library argument */ + int dted_refcnt; /* reference count */ + } dtrace_ecbdesc_t; + + /* + * DTrace Metadata Description Structures + * + * DTrace separates the trace data stream from the metadata stream. The only + * metadata tokens placed in the data stream are the dtrace_rechdr_t (EPID + + * timestamp) or (in the case of aggregations) aggregation identifiers. To + * determine the structure of the data, DTrace consumers pass the token to the + * kernel, and receive in return a corresponding description of the enabled + * probe (via the dtrace_eprobedesc structure) or the aggregation (via the + * dtrace_aggdesc structure). Both of these structures are expressed in terms + * of record descriptions (via the dtrace_recdesc structure) that describe the + * exact structure of the data. Some record descriptions may also contain a + * format identifier; this additional bit of metadata can be retrieved from the + * kernel, for which a format description is returned via the dtrace_fmtdesc + * structure. Note that all four of these structures must be bitness-neutral + * to allow for a 32-bit DTrace consumer on a 64-bit kernel. + */ + typedef struct dtrace_recdesc { + dtrace_actkind_t dtrd_action; /* kind of action */ + uint32_t dtrd_size; /* size of record */ + uint32_t dtrd_offset; /* offset in ECB's data */ + uint16_t dtrd_alignment; /* required alignment */ + uint16_t dtrd_format; /* format, if any */ + uint64_t dtrd_arg; /* action argument */ + uint64_t dtrd_uarg; /* user argument */ + } dtrace_recdesc_t; + + typedef struct dtrace_eprobedesc { + dtrace_epid_t dtepd_epid; /* enabled probe ID */ + dtrace_id_t dtepd_probeid; /* probe ID */ + uint64_t dtepd_uarg; /* library argument */ + uint32_t dtepd_size; /* total size */ + int dtepd_nrecs; /* number of records */ + dtrace_recdesc_t dtepd_rec[1]; /* records themselves */ + } dtrace_eprobedesc_t; + + typedef struct dtrace_aggdesc { + DTRACE_PTR(char, dtagd_name); /* not filled in by kernel */ + dtrace_aggvarid_t dtagd_varid; /* not filled in by kernel */ + int dtagd_flags; /* not filled in by kernel */ + dtrace_aggid_t dtagd_id; /* aggregation ID */ + dtrace_epid_t dtagd_epid; /* enabled probe ID */ + uint32_t dtagd_size; /* size in bytes */ + int dtagd_nrecs; /* number of records */ + uint32_t dtagd_pad; /* explicit padding */ + dtrace_recdesc_t dtagd_rec[1]; /* record descriptions */ + } dtrace_aggdesc_t; + + typedef struct dtrace_fmtdesc { + DTRACE_PTR(char, dtfd_string); /* format string */ + int dtfd_length; /* length of format string */ + uint16_t dtfd_format; /* format identifier */ + } dtrace_fmtdesc_t; #define DTRACE_SIZEOF_EPROBEDESC(desc) \ (sizeof (dtrace_eprobedesc_t) + ((desc)->dtepd_nrecs ? \ - (((desc)->dtepd_nrecs - 1) * sizeof (dtrace_recdesc_t)) : 0)) + (((desc)->dtepd_nrecs - 1) * sizeof (dtrace_recdesc_t)) : 0)) #define DTRACE_SIZEOF_AGGDESC(desc) \ (sizeof (dtrace_aggdesc_t) + ((desc)->dtagd_nrecs ? \ - (((desc)->dtagd_nrecs - 1) * sizeof (dtrace_recdesc_t)) : 0)) - -/* - * DTrace Option Interface - * - * Run-time DTrace options are set and retrieved via DOF_SECT_OPTDESC sections - * in a DOF image. The dof_optdesc structure contains an option identifier and - * an option value. The valid option identifiers are found below; the mapping - * between option identifiers and option identifying strings is maintained at - * user-level. Note that the value of DTRACEOPT_UNSET is such that all of the - * following are potentially valid option values: all positive integers, zero - * and negative one. Some options (notably "bufpolicy" and "bufresize") take - * predefined tokens as their values; these are defined with - * DTRACEOPT_{option}_{token}. - */ + (((desc)->dtagd_nrecs - 1) * sizeof (dtrace_recdesc_t)) : 0)) + + /* + * DTrace Option Interface + * + * Run-time DTrace options are set and retrieved via DOF_SECT_OPTDESC sections + * in a DOF image. The dof_optdesc structure contains an option identifier and + * an option value. The valid option identifiers are found below; the mapping + * between option identifiers and option identifying strings is maintained at + * user-level. Note that the value of DTRACEOPT_UNSET is such that all of the + * following are potentially valid option values: all positive integers, zero + * and negative one. Some options (notably "bufpolicy" and "bufresize") take + * predefined tokens as their values; these are defined with + * DTRACEOPT_{option}_{token}. + */ #define DTRACEOPT_BUFSIZE 0 /* buffer size */ #define DTRACEOPT_BUFPOLICY 1 /* buffer policy */ #define DTRACEOPT_DYNVARSIZE 2 /* dynamic variable size */ @@ -1019,7 +1020,8 @@ typedef struct dtrace_fmtdesc { #define DTRACEOPT_AGGSORTPOS 25 /* agg. position to sort on */ #define DTRACEOPT_AGGSORTKEYPOS 26 /* agg. key position to sort on */ #define DTRACEOPT_TEMPORAL 27 /* temporally ordered output */ -#define DTRACEOPT_MAX 28 /* number of options */ +#define DTRACEOPT_ZONE 28 /* zone in which to enable probes */ +#define DTRACEOPT_MAX 29 /* number of options */ #define DTRACEOPT_UNSET (dtrace_optval_t)-2 /* unset option */ @@ -1649,14 +1651,21 @@ typedef struct dof_helper { * * A bitwise OR that encapsulates both the mode (either DTRACE_MODE_KERNEL * or DTRACE_MODE_USER) and the policy when the privilege of the enabling - * is insufficient for that mode (either DTRACE_MODE_NOPRIV_DROP or - * DTRACE_MODE_NOPRIV_RESTRICT). If the policy is DTRACE_MODE_NOPRIV_DROP, - * insufficient privilege will result in the probe firing being silently - * ignored for the enabling; if the policy is DTRACE_NODE_NOPRIV_RESTRICT, - * insufficient privilege will not prevent probe processing for the - * enabling, but restrictions will be in place that induce a UPRIV fault - * upon attempt to examine probe arguments or current process state. - * + * is insufficient for that mode (a combination of DTRACE_MODE_NOPRIV_DROP, + * DTRACE_MODE_NOPRIV_RESTRICT, and DTRACE_MODE_LIMITEDPRIV_RESTRICT). If + * DTRACE_MODE_NOPRIV_DROP bit is set, insufficient privilege will result + * in the probe firing being silently ignored for the enabling; if the + * DTRACE_NODE_NOPRIV_RESTRICT bit is set, insufficient privilege will not + * prevent probe processing for the enabling, but restrictions will be in + * place that induce a UPRIV fault upon attempt to examine probe arguments + * or current process state. If the DTRACE_MODE_LIMITEDPRIV_RESTRICT bit + * is set, similar restrictions will be placed upon operation if the + * privilege is sufficient to process the enabling, but does not otherwise + * entitle the enabling to all zones. The DTRACE_MODE_NOPRIV_DROP and + * DTRACE_MODE_NOPRIV_RESTRICT are mutually exclusive (and one of these + * two policies must be specified), but either may be combined (or not) + * with DTRACE_MODE_LIMITEDPRIV_RESTRICT. + * * 1.10.4 Caller's context * * This is called from within dtrace_probe() meaning that interrupts @@ -2054,6 +2063,7 @@ typedef struct dtrace_pops { #define DTRACE_MODE_USER 0x02 #define DTRACE_MODE_NOPRIV_DROP 0x10 #define DTRACE_MODE_NOPRIV_RESTRICT 0x20 +#define DTRACE_MODE_LIMITEDPRIV_RESTRICT 0x40 typedef uintptr_t dtrace_provider_id_t; @@ -2268,6 +2278,7 @@ extern void (*dtrace_helpers_cleanup)(); extern void (*dtrace_helpers_fork)(proc_t *parent, proc_t *child); extern void (*dtrace_cpustart_init)(); extern void (*dtrace_cpustart_fini)(); +extern void (*dtrace_closef)(); extern void (*dtrace_debugger_init)(); extern void (*dtrace_debugger_fini)(); diff --git a/usr/src/uts/common/sys/dtrace_impl.h b/usr/src/uts/common/sys/dtrace_impl.h index d780082137..f79bf1e42e 100644 --- a/usr/src/uts/common/sys/dtrace_impl.h +++ b/usr/src/uts/common/sys/dtrace_impl.h @@ -924,6 +924,7 @@ typedef struct dtrace_mstate { uintptr_t dtms_strtok; /* saved strtok() pointer */ uint32_t dtms_access; /* memory access rights */ dtrace_difo_t *dtms_difo; /* current dif object */ + file_t *dtms_getf; /* cached rval of getf() */ } dtrace_mstate_t; #define DTRACE_COND_OWNER 0x1 @@ -1144,6 +1145,7 @@ struct dtrace_state { dtrace_optval_t dts_options[DTRACEOPT_MAX]; /* options */ dtrace_cred_t dts_cred; /* credentials */ size_t dts_nretained; /* number of retained enabs */ + int dts_getf; /* number of getf() calls */ }; struct dtrace_provider { diff --git a/usr/src/uts/common/sys/fs/hyprlofs.h b/usr/src/uts/common/sys/fs/hyprlofs.h new file mode 100644 index 0000000000..b8c4149df2 --- /dev/null +++ b/usr/src/uts/common/sys/fs/hyprlofs.h @@ -0,0 +1,91 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2012, Joyent, Inc. All rights reserved. + */ + +#ifndef _SYS_FS_HYPRLOFS_H +#define _SYS_FS_HYPRLOFS_H + +#include <sys/param.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * hyprlofs ioctl numbers. + */ +#define HYPRLOFS_IOC ('H' << 8) + +#define HYPRLOFS_ADD_ENTRIES (HYPRLOFS_IOC | 1) +#define HYPRLOFS_RM_ENTRIES (HYPRLOFS_IOC | 2) +#define HYPRLOFS_RM_ALL (HYPRLOFS_IOC | 3) +#define HYPRLOFS_GET_ENTRIES (HYPRLOFS_IOC | 4) + +typedef struct { + char *hle_path; + uint_t hle_plen; + char *hle_name; + uint_t hle_nlen; +} hyprlofs_entry_t; + +typedef struct { + hyprlofs_entry_t *hle_entries; + uint_t hle_len; +} hyprlofs_entries_t; + +typedef struct { + char hce_path[MAXPATHLEN]; + char hce_name[MAXPATHLEN]; +} hyprlofs_curr_entry_t; + +typedef struct { + hyprlofs_curr_entry_t *hce_entries; + uint_t hce_cnt; +} hyprlofs_curr_entries_t; + +#ifdef _KERNEL +typedef struct { + caddr32_t hle_path; + uint_t hle_plen; + caddr32_t hle_name; + uint_t hle_nlen; +} hyprlofs_entry32_t; + +typedef struct { + caddr32_t hle_entries; + uint_t hle_len; +} hyprlofs_entries32_t; + +typedef struct { + caddr32_t hce_entries; + uint_t hce_cnt; +} hyprlofs_curr_entries32_t; + +#endif /* _KERNEL */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_FS_HYPRLOFS_H */ diff --git a/usr/src/uts/common/sys/fs/hyprlofs_info.h b/usr/src/uts/common/sys/fs/hyprlofs_info.h new file mode 100644 index 0000000000..29bdadc4e2 --- /dev/null +++ b/usr/src/uts/common/sys/fs/hyprlofs_info.h @@ -0,0 +1,189 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2012, Joyent, Inc. All rights reserved. + */ + +#ifndef _SYS_FS_HLOFS_INFO_H +#define _SYS_FS_HLOFS_INFO_H + +#include <sys/t_lock.h> +#include <vm/seg.h> +#include <vm/seg_vn.h> +#include <sys/vfs_opreg.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * hlnode is the file system dependent node for hyprlofs. + * It is modeled on the tmpfs tmpnode. + * + * hln_rwlock protects access of the directory list at hln_dir + * as well as syncronizing read/writes to directory hlnodes. + * hln_tlock protects updates to hln_mode and hln_nlink. + * hln_tlock doesn't require any hlnode locks. + */ +typedef struct hlnode { + struct hlnode *hln_back; /* linked list of hlnodes */ + struct hlnode *hln_forw; /* linked list of hlnodes */ + union { + struct { + struct hldirent *un_dirlist; /* dirent list */ + uint_t un_dirents; /* number of dirents */ + } un_dirstruct; + vnode_t *un_realvp; /* real vnode */ + } un_hlnode; + vnode_t *hln_vnode; /* vnode for this hlnode */ + int hln_gen; /* pseudo gen num for hlfid */ + int hln_looped; /* flag indicating loopback */ + vattr_t hln_attr; /* attributes */ + krwlock_t hln_rwlock; /* rw - serialize mods and */ + /* directory updates */ + kmutex_t hln_tlock; /* time, flag, and nlink lock */ +} hlnode_t; + +/* + * hyprlofs per-mount data structure. + * All fields are protected by hlm_contents. + */ +typedef struct { + vfs_t *hlm_vfsp; /* filesystem's vfs struct */ + hlnode_t *hlm_rootnode; /* root hlnode */ + char *hlm_mntpath; /* name of hyprlofs mount point */ + dev_t hlm_dev; /* unique dev # of mounted `device' */ + uint_t hlm_gen; /* pseudo generation number for files */ + kmutex_t hlm_contents; /* lock for hlfsmount structure */ +} hlfsmount_t; + +/* + * hyprlofs directories are made up of a linked list of hldirent structures + * hanging off directory hlnodes. File names are not fixed length, + * but are null terminated. + */ +typedef struct hldirent { + hlnode_t *hld_hlnode; /* hlnode for this file */ + struct hldirent *hld_next; /* next directory entry */ + struct hldirent *hld_prev; /* prev directory entry */ + uint_t hld_offset; /* "offset" of dir entry */ + uint_t hld_hash; /* a hash of td_name */ + struct hldirent *hld_link; /* linked via the hash table */ + hlnode_t *hld_parent; /* parent, dir we are in */ + char *hld_name; /* must be null terminated */ + /* max length is MAXNAMELEN */ +} hldirent_t; + +/* + * hlfid overlays the fid structure (for VFS_VGET) + */ +typedef struct { + uint16_t hlfid_len; + ino32_t hlfid_ino; + int32_t hlfid_gen; +} hlfid_t; + +/* + * File system independent to hyprlofs conversion macros + */ +#define VFSTOHLM(vfsp) ((hlfsmount_t *)(vfsp)->vfs_data) +#define VTOHLM(vp) ((hlfsmount_t *)(vp)->v_vfsp->vfs_data) +#define VTOHLN(vp) ((hlnode_t *)(vp)->v_data) +#define HLNTOV(tp) ((tp)->hln_vnode) +#define REALVP(vp) ((vnode_t *)VTOHLN(vp)->hln_realvp) +#define hlnode_hold(tp) VN_HOLD(HLNTOV(tp)) +#define hlnode_rele(tp) VN_RELE(HLNTOV(tp)) + +#define hln_dir un_hlnode.un_dirstruct.un_dirlist +#define hln_dirents un_hlnode.un_dirstruct.un_dirents +#define hln_realvp un_hlnode.un_realvp + +/* + * Attributes + */ +#define hln_mask hln_attr.va_mask +#define hln_type hln_attr.va_type +#define hln_mode hln_attr.va_mode +#define hln_uid hln_attr.va_uid +#define hln_gid hln_attr.va_gid +#define hln_fsid hln_attr.va_fsid +#define hln_nodeid hln_attr.va_nodeid +#define hln_nlink hln_attr.va_nlink +#define hln_size hln_attr.va_size +#define hln_atime hln_attr.va_atime +#define hln_mtime hln_attr.va_mtime +#define hln_ctime hln_attr.va_ctime +#define hln_rdev hln_attr.va_rdev +#define hln_blksize hln_attr.va_blksize +#define hln_nblocks hln_attr.va_nblocks +#define hln_seq hln_attr.va_seq + +#define HL_MUSTHAVE 1 + +/* + * enums + */ +enum de_op { DE_CREATE, DE_MKDIR }; /* direnter ops */ +enum dr_op { DR_REMOVE, DR_RMDIR }; /* dirremove ops */ + +/* + * hyprlofs_minfree is the amount (in pages) of anonymous memory that hyprlofs + * leaves free for the rest of the system. The default value for + * hyprlofs_minfree is btopr(HYPRLOFSMINFREE) but it can be patched to a + * different number of pages. Since hyprlofs doesn't actually use much + * memory, its unlikely this ever needs to be patched. + */ +#define HYPRLOFSMINFREE 8 * 1024 * 1024 /* 8 Megabytes */ + +extern size_t hyprlofs_minfree; /* Anonymous memory in pages */ + +/* + * hyprlofs can allocate only a certain percentage of kernel memory, + * which is used for hlnodes, directories, file names, etc. + * This is statically set as HYPRLOFSMAXFRACKMEM of physical memory. + * The actual number of allocatable bytes can be patched in hyprlofs_maxkmem. + */ +#define HYPRLOFSMAXFRACKMEM 25 /* 1/25 of physical memory */ + +extern size_t hyprlofs_kmemspace; +extern size_t hyprlofs_maxkmem; /* Allocatable kernel memory in bytes */ + +extern void hyprlofs_node_init(hlfsmount_t *, hlnode_t *, vattr_t *, + cred_t *); +extern int hyprlofs_dirlookup(hlnode_t *, char *, hlnode_t **, cred_t *); +extern int hyprlofs_dirdelete(hlnode_t *, hlnode_t *, char *, enum dr_op, + cred_t *); +extern void hyprlofs_dirinit(hlnode_t *, hlnode_t *); +extern void hyprlofs_dirtrunc(hlnode_t *); +extern void *hyprlofs_memalloc(size_t, int); +extern void hyprlofs_memfree(void *, size_t); +extern int hyprlofs_taccess(void *, int, cred_t *); +extern int hyprlofs_direnter(hlfsmount_t *, hlnode_t *, char *, enum de_op, + vnode_t *, vattr_t *, hlnode_t **, cred_t *); + +extern struct vnodeops *hyprlofs_vnodeops; +extern const struct fs_operation_def hyprlofs_vnodeops_template[]; + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_FS_HLOFS_INFO_H */ diff --git a/usr/src/uts/common/sys/fss.h b/usr/src/uts/common/sys/fss.h index 583586fd75..03c35277d4 100644 --- a/usr/src/uts/common/sys/fss.h +++ b/usr/src/uts/common/sys/fss.h @@ -22,6 +22,7 @@ /* * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2012 Joyent, Inc. All rights reserved. */ #ifndef _SYS_FSS_H @@ -86,6 +87,7 @@ typedef struct fsspset { /* on the list */ struct fssproj *fssps_list; /* list of project parts */ struct fsszone *fssps_zones; /* list of fsszone_t's in pset */ + uint32_t fssps_gen; /* generation for zone's kstats */ } fsspset_t; /* @@ -101,7 +103,10 @@ typedef struct fssproj { /* protected by fssps_lock */ uint32_t fssp_shares; /* copy of our kpj_shares */ /* protected by fssps_displock */ - uint32_t fssp_ticks; /* total of all ticks */ + uint32_t fssp_ticks; /* total of nice tick values */ + /* protected by fssps_displock */ + uint32_t fssp_tick_cnt; /* cnt of all ticks in this sec */ + uint32_t fssp_shr_pct; /* active shr % in this sec */ /* protected by fssps_displock */ fssusage_t fssp_usage; /* this project's decayed usage */ fssusage_t fssp_shusage; /* normalized usage */ diff --git a/usr/src/uts/common/sys/ipd.h b/usr/src/uts/common/sys/ipd.h new file mode 100644 index 0000000000..901e74f44c --- /dev/null +++ b/usr/src/uts/common/sys/ipd.h @@ -0,0 +1,81 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2012, Joyent, Inc. All rights reserved. + */ + +/* + * These definitions are private to ipd and ipdadm. + */ + +#ifndef _SYS_IPD_H +#define _SYS_IPD_H + +#ifdef __cplusplus +extern "C" { +#endif + +#define IPD_DEV_PATH "/dev/ipd" +#define IPD_MAX_DELAY 1000000 /* 1s in us */ + +typedef struct ipd_ioc_perturb { + zoneid_t ipip_zoneid; + uint32_t ipip_arg; +} ipd_ioc_perturb_t; + +typedef struct ipd_ioc_info { + zoneid_t ipii_zoneid; + uint32_t ipii_corrupt; + uint32_t ipii_drop; + uint32_t ipii_delay; +} ipd_ioc_info_t; + +#ifdef _KERNEL + +typedef struct ipd_ioc_list32 { + uint_t ipil_nzones; + caddr32_t ipil_info; +} ipd_ioc_list32_t; + +#endif /* _KERNEL */ + +typedef struct ipd_ioc_list { + uint_t ipil_nzones; + ipd_ioc_info_t *ipil_info; +} ipd_ioc_list_t; + +#define IPD_CORRUPT 0x1 +#define IPD_DELAY 0x2 +#define IPD_DROP 0x4 + +#define IPDIOC (('i' << 24) | ('p' << 16) | ('d' << 8)) +#define IPDIOC_CORRUPT (IPDIOC | 1) /* disable ipd */ +#define IPDIOC_DELAY (IPDIOC | 2) /* disable ipd */ +#define IPDIOC_DROP (IPDIOC | 3) /* disable ipd */ +#define IPDIOC_LIST (IPDIOC | 4) /* enable ipd */ +#define IPDIOC_REMOVE (IPDIOC | 5) /* disable ipd */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_IPD_H */ diff --git a/usr/src/uts/common/sys/mac.h b/usr/src/uts/common/sys/mac.h index 6876fccb1a..220446af65 100644 --- a/usr/src/uts/common/sys/mac.h +++ b/usr/src/uts/common/sys/mac.h @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ #ifndef _SYS_MAC_H @@ -205,6 +206,7 @@ typedef enum { MAC_PROP_MAX_RXHWCLNT_AVAIL, MAC_PROP_MAX_TXHWCLNT_AVAIL, MAC_PROP_IB_LINKMODE, + MAC_PROP_VN_PROMISC_FILTERED, MAC_PROP_PRIVATE = -1 } mac_prop_id_t; diff --git a/usr/src/uts/common/sys/mac_client_impl.h b/usr/src/uts/common/sys/mac_client_impl.h index ae25df6a0d..ec49527300 100644 --- a/usr/src/uts/common/sys/mac_client_impl.h +++ b/usr/src/uts/common/sys/mac_client_impl.h @@ -21,6 +21,7 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ #ifndef _SYS_MAC_CLIENT_IMPL_H @@ -302,6 +303,7 @@ extern int mac_tx_percpu_cnt; /* Mac protection flags */ #define MPT_FLAG_V6_LOCAL_ADDR_SET 0x0001 +#define MPT_FLAG_PROMISC_FILTERED 0x0002 /* in mac_client.c */ extern void mac_promisc_client_dispatch(mac_client_impl_t *, mblk_t *); diff --git a/usr/src/uts/common/sys/mac_impl.h b/usr/src/uts/common/sys/mac_impl.h index 8f9f23ff71..2eef66113d 100644 --- a/usr/src/uts/common/sys/mac_impl.h +++ b/usr/src/uts/common/sys/mac_impl.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ #ifndef _SYS_MAC_IMPL_H @@ -885,6 +886,8 @@ extern void mac_protect_fini(mac_client_impl_t *); extern int mac_set_resources(mac_handle_t, mac_resource_props_t *); extern void mac_get_resources(mac_handle_t, mac_resource_props_t *); extern void mac_get_effective_resources(mac_handle_t, mac_resource_props_t *); +extern void mac_set_promisc_filtered(mac_client_handle_t, boolean_t); +extern boolean_t mac_get_promisc_filtered(mac_client_handle_t); extern cpupart_t *mac_pset_find(mac_resource_props_t *, boolean_t *); extern void mac_set_pool_effective(boolean_t, cpupart_t *, diff --git a/usr/src/uts/common/sys/mman.h b/usr/src/uts/common/sys/mman.h index 6c9119e56d..82344607b0 100644 --- a/usr/src/uts/common/sys/mman.h +++ b/usr/src/uts/common/sys/mman.h @@ -22,6 +22,7 @@ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2012 Joyent, Inc. All rights reserved. */ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ @@ -353,6 +354,7 @@ struct memcntl_mha32 { #define MS_SYNC 0x4 /* wait for msync */ #define MS_ASYNC 0x1 /* return immediately */ #define MS_INVALIDATE 0x2 /* invalidate caches */ +#define MS_INVALCURPROC 0x8 /* invalidate cache for curproc only */ #if (_POSIX_C_SOURCE <= 2) && !defined(_XPG4_2) || defined(__EXTENSIONS__) /* functions to mctl */ diff --git a/usr/src/uts/common/sys/mntent.h b/usr/src/uts/common/sys/mntent.h index e95ef3fccc..d215d88790 100644 --- a/usr/src/uts/common/sys/mntent.h +++ b/usr/src/uts/common/sys/mntent.h @@ -21,6 +21,7 @@ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2012, Joyent, Inc. All rights reserved. * * Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T * All Rights Reserved @@ -47,6 +48,7 @@ extern "C" { #define MNTTYPE_PCFS "pcfs" /* PC (MSDOS) file system */ #define MNTTYPE_PC MNTTYPE_PCFS /* Deprecated name; use MNTTYPE_PCFS */ #define MNTTYPE_LOFS "lofs" /* Loop back file system */ +#define MNTTYPE_HYPRLOFS "hyprlofs" /* Hyperlofs file system */ #define MNTTYPE_LO MNTTYPE_LOFS /* Deprecated name; use MNTTYPE_LOFS */ #define MNTTYPE_HSFS "hsfs" /* High Sierra (9660) file system */ #define MNTTYPE_SWAP "swap" /* Swap file system */ diff --git a/usr/src/uts/common/sys/policy.h b/usr/src/uts/common/sys/policy.h index bcd5ba2b4c..819c788b9e 100644 --- a/usr/src/uts/common/sys/policy.h +++ b/usr/src/uts/common/sys/policy.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, Joyent, Inc. All rights reserved. */ #ifndef _SYS_POLICY_H @@ -171,6 +172,7 @@ int secpolicy_setid_setsticky_clear(vnode_t *, vattr_t *, const vattr_t *, cred_t *); int secpolicy_xvattr(xvattr_t *, uid_t, cred_t *, vtype_t); int secpolicy_xvm_control(const cred_t *); +int secpolicy_hyprlofs_control(const cred_t *); int secpolicy_basic_exec(const cred_t *, vnode_t *); int secpolicy_basic_fork(const cred_t *); diff --git a/usr/src/uts/common/sys/port.h b/usr/src/uts/common/sys/port.h index ccb0308255..d4d74d55ea 100644 --- a/usr/src/uts/common/sys/port.h +++ b/usr/src/uts/common/sys/port.h @@ -24,11 +24,13 @@ * Use is subject to license terms. */ +/* + * Copyright (c) 2012, Joyent, Inc. All rights reserved. + */ + #ifndef _SYS_PORT_H #define _SYS_PORT_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef __cplusplus extern "C" { #endif @@ -106,6 +108,7 @@ typedef struct port_notify32 { #define FILE_ACCESS 0x00000001 #define FILE_MODIFIED 0x00000002 #define FILE_ATTRIB 0x00000004 +#define FILE_TRUNC 0x00100000 #define FILE_NOFOLLOW 0x10000000 /* diff --git a/usr/src/uts/common/sys/port_impl.h b/usr/src/uts/common/sys/port_impl.h index 9f3f291874..504fb9ece1 100644 --- a/usr/src/uts/common/sys/port_impl.h +++ b/usr/src/uts/common/sys/port_impl.h @@ -24,6 +24,10 @@ * Use is subject to license terms. */ +/* + * Copyright (c) 2012, Joyent, Inc. All rights reserved. + */ + #ifndef _SYS_PORT_IMPL_H #define _SYS_PORT_IMPL_H @@ -311,6 +315,7 @@ typedef struct portfop_vp { #define FOP_FILE_SETATTR_MTIME 0x00080000 #define FOP_FILE_SETATTR_CTIME 0x00100000 #define FOP_FILE_LINK_SRC 0x00200000 +#define FOP_FILE_TRUNC 0x00400000 /* * File modification event. @@ -339,10 +344,15 @@ typedef struct portfop_vp { /* + * File trunc event + */ +#define FOP_TRUNC_MASK (FOP_FILE_TRUNC|FOP_FILE_CREATE) + +/* * valid watchable events */ #define FILE_EVENTS_MASK (FILE_ACCESS|FILE_MODIFIED|FILE_ATTRIB \ - |FILE_NOFOLLOW) + |FILE_NOFOLLOW|FILE_TRUNC) /* --- End file events --- */ /* diff --git a/usr/src/uts/common/sys/procfs.h b/usr/src/uts/common/sys/procfs.h index 0c4a48fcdd..f592fd9dcf 100644 --- a/usr/src/uts/common/sys/procfs.h +++ b/usr/src/uts/common/sys/procfs.h @@ -65,10 +65,6 @@ extern "C" { #include <sys/stat.h> #include <sys/param.h> -#if !defined(_LP64) && _FILE_OFFSET_BITS == 64 -#error "Cannot use procfs in the large file compilation environment" -#endif - /* * System call interfaces for /proc. */ diff --git a/usr/src/uts/common/sys/sdt_impl.h b/usr/src/uts/common/sys/sdt_impl.h index cbe95f7c66..f7cc683f2f 100644 --- a/usr/src/uts/common/sys/sdt_impl.h +++ b/usr/src/uts/common/sys/sdt_impl.h @@ -24,11 +24,13 @@ * Use is subject to license terms. */ +/* + * Copyright (c) 2012, Joyent, Inc. All rights reserved. + */ + #ifndef _SYS_SDT_IMPL_H #define _SYS_SDT_IMPL_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef __cplusplus extern "C" { #endif @@ -45,6 +47,7 @@ typedef struct sdt_provider { char *sdtp_name; /* name of provider */ char *sdtp_prefix; /* prefix for probe names */ dtrace_pattr_t *sdtp_attr; /* stability attributes */ + uint32_t sdtp_priv; /* privilege, if any */ dtrace_provider_id_t sdtp_id; /* provider ID */ } sdt_provider_t; @@ -75,6 +78,7 @@ typedef struct sdt_argdesc { } sdt_argdesc_t; extern void sdt_getargdesc(void *, dtrace_id_t, void *, dtrace_argdesc_t *); +extern int sdt_mode(void *, dtrace_id_t, void *); #ifdef __cplusplus } diff --git a/usr/src/uts/common/sys/thread.h b/usr/src/uts/common/sys/thread.h index 188230d61e..c7f460e7c7 100644 --- a/usr/src/uts/common/sys/thread.h +++ b/usr/src/uts/common/sys/thread.h @@ -68,6 +68,8 @@ typedef struct ctxop { void (*free_op)(void *, int); /* function which frees the context */ void *arg; /* argument to above functions, ctx pointer */ struct ctxop *next; /* next context ops */ + hrtime_t save_ts; /* timestamp of last save */ + hrtime_t restore_ts; /* timestamp of last restore */ } ctxop_t; /* diff --git a/usr/src/uts/common/sys/uadmin.h b/usr/src/uts/common/sys/uadmin.h index c35d0a5cfb..6adeb477bb 100644 --- a/usr/src/uts/common/sys/uadmin.h +++ b/usr/src/uts/common/sys/uadmin.h @@ -21,6 +21,7 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2011 Joyent, Inc. All rights reserved. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -157,7 +158,7 @@ extern kmutex_t ualock; extern void mdboot(int, int, char *, boolean_t); extern void mdpreboot(int, int, char *); extern int kadmin(int, int, void *, cred_t *); -extern void killall(zoneid_t); +extern void killall(zoneid_t, boolean_t); #endif #if defined(__STDC__) diff --git a/usr/src/uts/common/sys/vm_usage.h b/usr/src/uts/common/sys/vm_usage.h index 1aa4a8ee6d..97e3430ae2 100644 --- a/usr/src/uts/common/sys/vm_usage.h +++ b/usr/src/uts/common/sys/vm_usage.h @@ -21,6 +21,7 @@ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2012 Joyent, Inc. All rights reserved. */ #ifndef _SYS_VM_USAGE_H @@ -79,8 +80,9 @@ extern "C" { /* zoneid */ #define VMUSAGE_COL_EUSERS 0x2000 /* same as VMUSAGE_COL_RUSERS, but by */ /* euser */ +#define VMUSAGE_A_ZONE 0x4000 /* rss/swap for a specified zone */ -#define VMUSAGE_MASK 0x3fff /* all valid flags for getvmusage() */ +#define VMUSAGE_MASK 0x7fff /* all valid flags for getvmusage() */ typedef struct vmusage { id_t vmu_zoneid; /* zoneid, or ALL_ZONES for */ diff --git a/usr/src/uts/common/sys/zone.h b/usr/src/uts/common/sys/zone.h index 3ba7bf47f4..a44930c853 100644 --- a/usr/src/uts/common/sys/zone.h +++ b/usr/src/uts/common/sys/zone.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ #ifndef _SYS_ZONE_H @@ -94,12 +95,17 @@ extern "C" { #define ZONE_ATTR_INITNAME 9 #define ZONE_ATTR_BOOTARGS 10 #define ZONE_ATTR_BRAND 11 -#define ZONE_ATTR_PHYS_MCAP 12 +#define ZONE_ATTR_PMCAP_NOVER 12 #define ZONE_ATTR_SCHED_CLASS 13 #define ZONE_ATTR_FLAGS 14 #define ZONE_ATTR_HOSTID 15 #define ZONE_ATTR_FS_ALLOWED 16 #define ZONE_ATTR_NETWORK 17 +#define ZONE_ATTR_DID 18 +#define ZONE_ATTR_PMCAP_PAGEOUT 19 +#define ZONE_ATTR_INITNORESTART 20 +#define ZONE_ATTR_PG_FLT_DELAY 21 +#define ZONE_ATTR_RSS 22 /* Start of the brand-specific attribute namespace */ #define ZONE_ATTR_BRAND_ATTRS 32768 @@ -180,6 +186,7 @@ typedef struct { uint32_t doi; /* DOI for label */ caddr32_t label; /* label associated with zone */ int flags; + zoneid_t zoneid; /* requested zoneid */ } zone_def32; #endif typedef struct { @@ -196,6 +203,7 @@ typedef struct { uint32_t doi; /* DOI for label */ const bslabel_t *label; /* label associated with zone */ int flags; + zoneid_t zoneid; /* requested zoneid */ } zone_def; /* extended error information */ @@ -240,7 +248,7 @@ typedef enum zone_cmd { typedef struct zone_cmd_arg { uint64_t uniqid; /* unique "generation number" */ zone_cmd_t cmd; /* requested action */ - uint32_t _pad; /* need consistent 32/64 bit alignmt */ + uint32_t debug; /* enable brand hook debug */ char locale[MAXPATHLEN]; /* locale in which to render messages */ char bootbuf[BOOTARGS_MAX]; /* arguments passed to zone_boot() */ } zone_cmd_arg_t; @@ -320,6 +328,7 @@ typedef struct zone_net_data { * libraries which may be defining ther own versions. */ #include <sys/list.h> +#include <sys/cpuvar.h> #define GLOBAL_ZONEUNIQID 0 /* uniqid of the global zone */ @@ -367,7 +376,7 @@ typedef struct zone_dataset { } zone_dataset_t; /* - * structure for zone kstats + * structure for rctl zone kstats */ typedef struct zone_kstat { kstat_named_t zk_zonename; @@ -377,6 +386,73 @@ typedef struct zone_kstat { struct cpucap; +typedef struct { + hrtime_t cycle_start; + uint_t cycle_cnt; + hrtime_t zone_avg_cnt; +} sys_zio_cntr_t; + +typedef struct { + kstat_named_t zv_zonename; + kstat_named_t zv_nread; + kstat_named_t zv_reads; + kstat_named_t zv_rtime; + kstat_named_t zv_rlentime; + kstat_named_t zv_nwritten; + kstat_named_t zv_writes; + kstat_named_t zv_wtime; + kstat_named_t zv_wlentime; + kstat_named_t zv_10ms_ops; + kstat_named_t zv_100ms_ops; + kstat_named_t zv_1s_ops; + kstat_named_t zv_10s_ops; + kstat_named_t zv_delay_cnt; + kstat_named_t zv_delay_time; +} zone_vfs_kstat_t; + +typedef struct { + kstat_named_t zz_zonename; + kstat_named_t zz_nread; + kstat_named_t zz_reads; + kstat_named_t zz_rtime; + kstat_named_t zz_rlentime; + kstat_named_t zz_nwritten; + kstat_named_t zz_writes; + kstat_named_t zz_waittime; +} zone_zfs_kstat_t; + +typedef struct { + kstat_named_t zm_zonename; + kstat_named_t zm_rss; + kstat_named_t zm_phys_cap; + kstat_named_t zm_swap; + kstat_named_t zm_swap_cap; + kstat_named_t zm_nover; + kstat_named_t zm_pagedout; + kstat_named_t zm_pgpgin; + kstat_named_t zm_anonpgin; + kstat_named_t zm_execpgin; + kstat_named_t zm_fspgin; + kstat_named_t zm_anon_alloc_fail; + kstat_named_t zm_pf_throttle; + kstat_named_t zm_pf_throttle_usec; +} zone_mcap_kstat_t; + +typedef struct { + kstat_named_t zm_zonename; /* full name, kstat truncates name */ + kstat_named_t zm_utime; + kstat_named_t zm_stime; + kstat_named_t zm_wtime; + kstat_named_t zm_avenrun1; + kstat_named_t zm_avenrun5; + kstat_named_t zm_avenrun15; + kstat_named_t zm_run_ticks; + kstat_named_t zm_run_wait; + kstat_named_t zm_fss_shr_pct; + kstat_named_t zm_fss_pri_hi; + kstat_named_t zm_fss_pri_avg; +} zone_misc_kstat_t; + typedef struct zone { /* * zone_name is never modified once set. @@ -416,6 +492,7 @@ typedef struct zone { */ list_node_t zone_linkage; zoneid_t zone_id; /* ID of zone */ + zoneid_t zone_did; /* persistent debug ID of zone */ uint_t zone_ref; /* count of zone_hold()s on zone */ uint_t zone_cred_ref; /* count of zone_hold_cred()s on zone */ /* @@ -471,7 +548,7 @@ typedef struct zone { char *zone_initname; /* fs path to 'init' */ int zone_boot_err; /* for zone_boot() if boot fails */ char *zone_bootargs; /* arguments passed via zone_boot() */ - uint64_t zone_phys_mcap; /* physical memory cap */ + rctl_qty_t zone_phys_mem_ctl; /* current phys. memory limit */ /* * zone_kthreads is protected by zone_status_lock. */ @@ -490,6 +567,9 @@ typedef struct zone { hrtime_t zone_pool_mod; /* last pool bind modification time */ /* zone_psetid is protected by cpu_lock */ psetid_t zone_psetid; /* pset the zone is bound to */ + + time_t zone_boot_time; /* Similar to boot_time */ + /* * The following two can be read without holding any locks. They are * updated under cpu_lock. @@ -517,6 +597,37 @@ typedef struct zone { list_t zone_dl_list; netstack_t *zone_netstack; struct cpucap *zone_cpucap; /* CPU caps data */ + + /* + * Data and counters used for ZFS fair-share disk IO. + */ + rctl_qty_t zone_zfs_io_pri; /* ZFS IO priority */ + uint_t zone_zfs_queued; /* enqueued count */ + uint64_t zone_zfs_weight; /* used to prevent starvation */ + uint64_t zone_io_util; /* IO utilization metric */ + boolean_t zone_io_util_above_avg; /* IO util percent > avg. */ + uint16_t zone_io_delay; /* IO delay on logical r/w */ + kmutex_t zone_stg_io_lock; /* protects IO window data */ + sys_zio_cntr_t zone_rd_ops; /* Counters for ZFS reads, */ + sys_zio_cntr_t zone_wr_ops; /* writes and logical writes. */ + sys_zio_cntr_t zone_lwr_ops; + + /* + * kstats and counters for VFS ops and bytes. + */ + kmutex_t zone_vfs_lock; /* protects VFS statistics */ + kstat_t *zone_vfs_ksp; + kstat_io_t zone_vfs_rwstats; + zone_vfs_kstat_t *zone_vfs_stats; + + /* + * kstats for ZFS I/O ops and bytes. + */ + kmutex_t zone_zfs_lock; /* protects ZFS statistics */ + kstat_t *zone_zfs_ksp; + kstat_io_t zone_zfs_rwstats; + zone_zfs_kstat_t *zone_zfs_stats; + /* * Solaris Auditing per-zone audit context */ @@ -534,6 +645,69 @@ typedef struct zone { rctl_qty_t zone_nprocs_ctl; /* current limit protected by */ /* zone_rctls->rcs_lock */ kstat_t *zone_nprocs_kstat; + + /* + * kstats and counters for physical memory capping. + */ + rctl_qty_t zone_phys_mem; /* current bytes of phys. mem. (RSS) */ + kstat_t *zone_physmem_kstat; + uint64_t zone_mcap_nover; /* # of times over phys. cap */ + uint64_t zone_mcap_pagedout; /* bytes of mem. paged out */ + kmutex_t zone_mcap_lock; /* protects mcap statistics */ + kstat_t *zone_mcap_ksp; + zone_mcap_kstat_t *zone_mcap_stats; + uint64_t zone_pgpgin; /* pages paged in */ + uint64_t zone_anonpgin; /* anon pages paged in */ + uint64_t zone_execpgin; /* exec pages paged in */ + uint64_t zone_fspgin; /* fs pages paged in */ + uint64_t zone_anon_alloc_fail; /* cnt of anon alloc fails */ + uint64_t zone_pf_throttle; /* cnt of page flt throttles */ + uint64_t zone_pf_throttle_usec; /* time of page flt throttles */ + + /* Num usecs to throttle page fault when zone is over phys. mem cap */ + uint32_t zone_pg_flt_delay; + + /* + * Misc. kstats and counters for zone cpu-usage aggregation. + * The zone_Xtime values are the sum of the micro-state accounting + * values for all threads that are running or have run in the zone. + * This is tracked in msacct.c as threads change state. + * The zone_stime is the sum of the LMS_SYSTEM times. + * The zone_utime is the sum of the LMS_USER times. + * The zone_wtime is the sum of the LMS_WAIT_CPU times. + * As with per-thread micro-state accounting values, these values are + * not scaled to nanosecs. The scaling is done by the + * zone_misc_kstat_update function when kstats are requested. + */ + kmutex_t zone_misc_lock; /* protects misc statistics */ + kstat_t *zone_misc_ksp; + zone_misc_kstat_t *zone_misc_stats; + uint64_t zone_stime; /* total system time */ + uint64_t zone_utime; /* total user time */ + uint64_t zone_wtime; /* total time waiting in runq */ + + struct loadavg_s zone_loadavg; /* loadavg for this zone */ + uint64_t zone_hp_avenrun[3]; /* high-precision avenrun */ + int zone_avenrun[3]; /* FSCALED avg. run queue len */ + + /* + * FSS stats updated once per second by fss_decay_usage. + */ + uint32_t zone_fss_gen; /* FSS generation cntr */ + uint64_t zone_run_ticks; /* tot # of ticks running */ + + /* + * DTrace-private per-zone state + */ + int zone_dtrace_getf; /* # of unprivileged getf()s */ + + /* + * Synchronization primitives used to synchronize between mounts and + * zone creation/destruction. + */ + int zone_mounts_in_progress; + kcondvar_t zone_mount_cv; + kmutex_t zone_mount_lock; } zone_t; /* @@ -566,9 +740,11 @@ extern zone_t *zone_find_by_name(char *); extern zone_t *zone_find_by_any_path(const char *, boolean_t); extern zone_t *zone_find_by_path(const char *); extern zoneid_t getzoneid(void); +extern zoneid_t getzonedid(void); extern zone_t *zone_find_by_id_nolock(zoneid_t); extern int zone_datalink_walk(zoneid_t, int (*)(datalink_id_t, void *), void *); extern int zone_check_datalink(zoneid_t *, datalink_id_t); +extern void zone_loadavg_update(); /* * Zone-specific data (ZSD) APIs @@ -752,13 +928,14 @@ extern int zone_dataset_visible(const char *, int *); extern int zone_kadmin(int, int, const char *, cred_t *); extern void zone_shutdown_global(void); -extern void mount_in_progress(void); -extern void mount_completed(void); +extern void mount_in_progress(zone_t *); +extern void mount_completed(zone_t *); extern int zone_walk(int (*)(zone_t *, void *), void *); extern rctl_hndl_t rc_zone_locked_mem; extern rctl_hndl_t rc_zone_max_swap; +extern rctl_hndl_t rc_zone_phys_mem; extern rctl_hndl_t rc_zone_max_lofi; #endif /* _KERNEL */ |