summaryrefslogtreecommitdiff
path: root/usr/src
diff options
context:
space:
mode:
authorBryan Cantrill <bryan@joyent.com>2012-06-04 06:54:42 +0000
committerRichard Lowe <richlowe@richlowe.net>2014-02-25 12:53:02 -0500
commitb0f673c4626e4cb1db7785287eaeed2731dfefe8 (patch)
treed75fad195a057999974278cf1742080d6eebf350 /usr/src
parent3c7284bd3243d42a710edac3a15f6019b4c849be (diff)
downloadillumos-gate-b0f673c4626e4cb1db7785287eaeed2731dfefe8.tar.gz
2915 DTrace in a zone should see "cpu", "curpsinfo", et al
2916 DTrace in a zone should be able to access fds[] 2917 DTrace in a zone should have limited provider access Reviewed by: Joshua M. Clulow <josh@sysmgr.org> Reviewed by: Adam Leventhal <ahl@delphix.com> Approved by: Gordon Ross <gwr@nexenta.com>
Diffstat (limited to 'usr/src')
-rw-r--r--usr/src/cmd/dtrace/test/tst/common/aggs/tst.subr.d1
-rw-r--r--usr/src/cmd/dtrace/test/tst/common/privs/tst.fds.ksh91
-rw-r--r--usr/src/cmd/dtrace/test/tst/common/privs/tst.getf.ksh98
-rwxr-xr-xusr/src/cmd/dtrace/test/tst/common/privs/tst.procpriv.ksh138
-rw-r--r--usr/src/cmd/dtrace/test/tst/common/privs/tst.providers.ksh126
-rw-r--r--usr/src/lib/libdtrace/common/dt_open.c10
-rw-r--r--usr/src/lib/libdtrace/common/io.d.in8
-rw-r--r--usr/src/pkg/manifests/system-dtrace-tests.mf4
-rw-r--r--usr/src/uts/common/dtrace/dtrace.c277
-rw-r--r--usr/src/uts/common/dtrace/sdt_subr.c55
-rw-r--r--usr/src/uts/common/os/dtrace_subr.c1
-rw-r--r--usr/src/uts/common/os/fio.c14
-rw-r--r--usr/src/uts/common/sys/dtrace.h28
-rw-r--r--usr/src/uts/common/sys/dtrace_impl.h2
-rw-r--r--usr/src/uts/common/sys/sdt_impl.h8
-rw-r--r--usr/src/uts/common/sys/zone.h5
-rw-r--r--usr/src/uts/intel/dtrace/sdt.c17
-rw-r--r--usr/src/uts/sparc/dtrace/sdt.c17
18 files changed, 814 insertions, 86 deletions
diff --git a/usr/src/cmd/dtrace/test/tst/common/aggs/tst.subr.d b/usr/src/cmd/dtrace/test/tst/common/aggs/tst.subr.d
index bb0739f0d3..b9b1ebdb49 100644
--- a/usr/src/cmd/dtrace/test/tst/common/aggs/tst.subr.d
+++ b/usr/src/cmd/dtrace/test/tst/common/aggs/tst.subr.d
@@ -98,6 +98,7 @@ STRFUNC(inet_ntoa6((in6_addr_t *)alloca(sizeof (in6_addr_t))))
STRFUNC(inet_ntop(AF_INET, (void *)alloca(sizeof (ipaddr_t))))
STRFUNC(toupper("foo"))
STRFUNC(tolower("BAR"))
+INTFUNC(getf(0))
BEGIN
/subr == DIF_SUBR_MAX + 1/
diff --git a/usr/src/cmd/dtrace/test/tst/common/privs/tst.fds.ksh b/usr/src/cmd/dtrace/test/tst/common/privs/tst.fds.ksh
new file mode 100644
index 0000000000..a5aa27137e
--- /dev/null
+++ b/usr/src/cmd/dtrace/test/tst/common/privs/tst.fds.ksh
@@ -0,0 +1,91 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2012, Joyent, Inc. All rights reserved.
+#
+
+tmpin=/tmp/tst.fds.$$.d
+tmpout1=/tmp/tst.fds.$$.out1
+tmpout2=/tmp/tst.fds.$$.out2
+
+cat > $tmpin <<EOF
+#define DUMPFIELD(fd, fmt, field) \
+ errmsg = "could not dump field"; \
+ printf("%d: field =fmt\n", fd, fds[fd].field);
+
+/*
+ * Note that we are explicitly not looking at fi_mount -- it (by design) does
+ * not work if not running with kernel permissions.
+ */
+#define DUMP(fd) \
+ DUMPFIELD(fd, %s, fi_name); \
+ DUMPFIELD(fd, %s, fi_dirname); \
+ DUMPFIELD(fd, %s, fi_pathname); \
+ DUMPFIELD(fd, %d, fi_offset); \
+ DUMPFIELD(fd, %s, fi_fs); \
+ DUMPFIELD(fd, %o, fi_oflags);
+
+BEGIN
+{
+ DUMP(0);
+ DUMP(1);
+ DUMP(2);
+ DUMP(3);
+ DUMP(4);
+ exit(0);
+}
+
+ERROR
+{
+ printf("error: %s\n", errmsg);
+ exit(1);
+}
+EOF
+
+#
+# First, with all privs
+#
+/usr/sbin/dtrace -q -Cs /dev/stdin < $tmpin > $tmpout2
+mv $tmpout2 $tmpout1
+
+#
+# And now with only dtrace_proc and dtrace_user -- the output should be
+# identical.
+#
+ppriv -s A=basic,dtrace_proc,dtrace_user $$
+
+/usr/sbin/dtrace -q -Cs /dev/stdin < $tmpin > $tmpout2
+
+echo ">>> $tmpout1"
+cat $tmpout1
+
+echo ">>> $tmpout2"
+cat $tmpout2
+
+rval=0
+
+if ! cmp $tmpout1 $tmpout2 ; then
+ rval=1
+fi
+
+rm $tmpout1 $tmpout2 $tmpin
+exit $rval
diff --git a/usr/src/cmd/dtrace/test/tst/common/privs/tst.getf.ksh b/usr/src/cmd/dtrace/test/tst/common/privs/tst.getf.ksh
new file mode 100644
index 0000000000..7dbb83fba9
--- /dev/null
+++ b/usr/src/cmd/dtrace/test/tst/common/privs/tst.getf.ksh
@@ -0,0 +1,98 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2012, Joyent, Inc. All rights reserved.
+#
+
+ppriv -s A=basic,dtrace_proc,dtrace_user $$
+
+/usr/sbin/dtrace -q -Cs /dev/stdin <<EOF
+
+#define CANREAD(field) \
+ BEGIN { this->fp = getf(0); errmsg = "can't read field"; \
+ printf("field: "); trace(this->fp->field); printf("\n"); }
+
+#define CANTREAD(field) \
+ BEGIN { errmsg = ""; this->fp = getf(0); trace(this->fp->field); \
+ printf("\nable to successfully read field!"); exit(1); }
+
+CANREAD(f_flag)
+CANREAD(f_flag2)
+CANREAD(f_vnode)
+CANREAD(f_offset)
+CANREAD(f_cred)
+CANREAD(f_audit_data)
+CANREAD(f_count)
+
+/*
+ * We can potentially read parts of our cred, but we can't dereference
+ * through cr_zone.
+ */
+CANTREAD(f_cred->cr_zone->zone_id)
+
+CANREAD(f_vnode->v_path)
+CANREAD(f_vnode->v_op)
+CANREAD(f_vnode->v_op->vnop_name)
+
+CANTREAD(f_vnode->v_flag)
+CANTREAD(f_vnode->v_count)
+CANTREAD(f_vnode->v_pages)
+CANTREAD(f_vnode->v_type)
+CANTREAD(f_vnode->v_vfsmountedhere)
+CANTREAD(f_vnode->v_op->vop_open)
+
+BEGIN
+{
+ errmsg = "";
+ this->fp = getf(0);
+ this->fp2 = getf(1);
+
+ trace(this->fp->f_vnode);
+ printf("\nable to successfully read this->fp!");
+ exit(1);
+}
+
+BEGIN
+{
+ errmsg = "";
+ this->fp = getf(0);
+}
+
+BEGIN
+{
+ trace(this->fp->f_vnode);
+ printf("\nable to successfully read this->fp from prior clause!");
+}
+
+BEGIN
+{
+ exit(0);
+}
+
+ERROR
+/errmsg != ""/
+{
+ printf("fatal error: %s", errmsg);
+ exit(1);
+}
+
+EOF
diff --git a/usr/src/cmd/dtrace/test/tst/common/privs/tst.procpriv.ksh b/usr/src/cmd/dtrace/test/tst/common/privs/tst.procpriv.ksh
new file mode 100755
index 0000000000..7022566391
--- /dev/null
+++ b/usr/src/cmd/dtrace/test/tst/common/privs/tst.procpriv.ksh
@@ -0,0 +1,138 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2012, Joyent, Inc. All rights reserved.
+#
+
+ppriv -s A=basic,dtrace_proc,dtrace_user $$
+
+#
+# When we have dtrace_proc (but lack dtrace_kernel), we expect to be able to
+# read certain curpsinfo/curlwpsinfo/curcpu fields even though they require
+# reading in-kernel state. However, there are other fields in these translated
+# structures that we know we shouldn't be able to read, as they require reading
+# in-kernel state that we cannot read with only dtrace_proc. Finally, there
+# are a few fields that we may or may not be able to read depending on the
+# specifics of context. This test therefore asserts that we can read what we
+# think we should be able to, that we can't read what we think we shouldn't be
+# able to, and (for purposes of completeness) that we are indifferent about
+# what we cannot assert one way or the other.
+#
+/usr/sbin/dtrace -q -Cs /dev/stdin <<EOF
+
+#define CANREAD(what, field) \
+ BEGIN { errmsg = "can't read field from what"; printf("field: "); \
+ trace(what->field); printf("\n"); }
+
+#define CANTREAD(what, field) \
+ BEGIN { errmsg = ""; trace(what->field); \
+ printf("\nable to successfully read field from what!"); exit(1); }
+
+#define MIGHTREAD(what, field) \
+ BEGIN { errmsg = ""; printf("field: "); trace(what->field); printf("\n"); }
+
+#define CANREADVAR(vname) \
+ BEGIN { errmsg = "can't read vname"; printf("vname: "); \
+ trace(vname); printf("\n"); }
+
+#define CANTREADVAR(vname) \
+ BEGIN { errmsg = ""; trace(vname); \
+ printf("\nable to successfully read vname!"); exit(1); }
+
+#define MIGHTREADVAR(vname) \
+ BEGIN { errmsg = ""; printf("vname: "); trace(vname); printf("\n"); }
+
+CANREAD(curpsinfo, pr_pid)
+CANREAD(curpsinfo, pr_nlwp)
+CANREAD(curpsinfo, pr_ppid)
+CANREAD(curpsinfo, pr_uid)
+CANREAD(curpsinfo, pr_euid)
+CANREAD(curpsinfo, pr_gid)
+CANREAD(curpsinfo, pr_egid)
+CANREAD(curpsinfo, pr_addr)
+CANREAD(curpsinfo, pr_start)
+CANREAD(curpsinfo, pr_fname)
+CANREAD(curpsinfo, pr_psargs)
+CANREAD(curpsinfo, pr_argc)
+CANREAD(curpsinfo, pr_argv)
+CANREAD(curpsinfo, pr_envp)
+CANREAD(curpsinfo, pr_dmodel)
+
+/*
+ * If our p_pgidp points to the same pid structure as our p_pidp, we will
+ * be able to read pr_pgid -- but we won't if not.
+ */
+MIGHTREAD(curpsinfo, pr_pgid)
+
+CANTREAD(curpsinfo, pr_sid)
+CANTREAD(curpsinfo, pr_ttydev)
+CANTREAD(curpsinfo, pr_projid)
+CANTREAD(curpsinfo, pr_zoneid)
+CANTREAD(curpsinfo, pr_contract)
+
+CANREAD(curlwpsinfo, pr_flag)
+CANREAD(curlwpsinfo, pr_lwpid)
+CANREAD(curlwpsinfo, pr_addr)
+CANREAD(curlwpsinfo, pr_wchan)
+CANREAD(curlwpsinfo, pr_stype)
+CANREAD(curlwpsinfo, pr_state)
+CANREAD(curlwpsinfo, pr_sname)
+CANREAD(curlwpsinfo, pr_syscall)
+CANREAD(curlwpsinfo, pr_pri)
+CANREAD(curlwpsinfo, pr_onpro)
+CANREAD(curlwpsinfo, pr_bindpro)
+CANREAD(curlwpsinfo, pr_bindpset)
+
+CANTREAD(curlwpsinfo, pr_clname)
+CANTREAD(curlwpsinfo, pr_lgrp)
+
+CANREAD(curcpu, cpu_id)
+
+CANTREAD(curcpu, cpu_pset)
+CANTREAD(curcpu, cpu_chip)
+CANTREAD(curcpu, cpu_lgrp)
+CANTREAD(curcpu, cpu_info)
+
+/*
+ * We cannot assert one thing or another about the variable "root": for those
+ * with only dtrace_proc, it will be readable in the global but not readable in
+ * the non-global.
+ */
+MIGHTREADVAR(root)
+
+CANREADVAR(cpu)
+CANTREADVAR(pset)
+CANTREADVAR(cwd)
+CANTREADVAR(chip)
+CANTREADVAR(lgrp)
+
+BEGIN
+{
+ exit(0);
+}
+
+ERROR
+/errmsg != ""/
+{
+ printf("fatal error: %s", errmsg);
+ exit(1);
+}
diff --git a/usr/src/cmd/dtrace/test/tst/common/privs/tst.providers.ksh b/usr/src/cmd/dtrace/test/tst/common/privs/tst.providers.ksh
new file mode 100644
index 0000000000..94c3722f78
--- /dev/null
+++ b/usr/src/cmd/dtrace/test/tst/common/privs/tst.providers.ksh
@@ -0,0 +1,126 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2012, Joyent, Inc. All rights reserved.
+#
+
+#
+# First, make sure that we can successfully enable the io provider
+#
+if ! dtrace -P io -n BEGIN'{exit(0)}' > /dev/null 2>&1 ; then
+ echo failed to enable io provider with full privs
+ exit 1
+fi
+
+ppriv -s A=basic,dtrace_proc,dtrace_user $$
+
+#
+# Now make sure that we cannot enable the io provider with reduced privs
+#
+if ! dtrace -x errtags -P io -n BEGIN'{exit(1)}' 2>&1 | \
+ grep D_PDESC_ZERO > /dev/null 2>&1 ; then
+ echo successfully enabled the io provider with reduced privs
+ exit 1
+fi
+
+#
+# Keeping our reduced privs, we want to assure that we can see every provider
+# that we think we should be able to see -- and that we can see curpsinfo
+# state but can't otherwise see arguments.
+#
+/usr/sbin/dtrace -wq -Cs /dev/stdin <<EOF
+
+int seen[string];
+int err;
+
+#define CANENABLE(provider) \
+provider::: \
+/err == 0 && progenyof(\$pid) && !seen["provider"]/ \
+{ \
+ trace(arg0); \
+ printf("\nsuccessful trace of arg0 in %s:%s:%s:%s\n", \
+ probeprov, probemod, probefunc, probename); \
+ exit(++err); \
+} \
+ \
+provider::: \
+/progenyof(\$pid)/ \
+{ \
+ seen["provider"]++; \
+} \
+ \
+provider::: \
+/progenyof(\$pid)/ \
+{ \
+ errstr = "provider"; \
+ this->ignore = stringof(curpsinfo->pr_psargs); \
+ errstr = ""; \
+} \
+ \
+END \
+/err == 0 && !seen["provider"]/ \
+{ \
+ printf("no probes from provider\n"); \
+ exit(++err); \
+} \
+ \
+END \
+/err == 0/ \
+{ \
+ printf("saw %d probes from provider\n", seen["provider"]); \
+}
+
+CANENABLE(proc)
+CANENABLE(sched)
+CANENABLE(vminfo)
+CANENABLE(sysinfo)
+
+BEGIN
+{
+ /*
+ * We'll kick off a system of a do-nothing command -- which should be
+ * enough to kick proc, sched, vminfo and sysinfo probes.
+ */
+ system("echo > /dev/null");
+}
+
+ERROR
+/err == 0 && errstr != ""/
+{
+ printf("fatal error: couldn't read curpsinfo->pr_psargs in ");
+ printf("%s-provided probe\n", errstr);
+ exit(++err);
+}
+
+proc:::exit
+/progenyof(\$pid)/
+{
+ exit(0);
+}
+
+tick-10ms
+/i++ > 500/
+{
+ printf("exit probe did not seem to fire\n");
+ exit(++err);
+}
+EOF
diff --git a/usr/src/lib/libdtrace/common/dt_open.c b/usr/src/lib/libdtrace/common/dt_open.c
index dd83370ee8..2c6640e82f 100644
--- a/usr/src/lib/libdtrace/common/dt_open.c
+++ b/usr/src/lib/libdtrace/common/dt_open.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
@@ -111,8 +111,9 @@
#define DT_VERS_1_8_1 DT_VERSION_NUMBER(1, 8, 1)
#define DT_VERS_1_9 DT_VERSION_NUMBER(1, 9, 0)
#define DT_VERS_1_9_1 DT_VERSION_NUMBER(1, 9, 1)
-#define DT_VERS_LATEST DT_VERS_1_9_1
-#define DT_VERS_STRING "Sun D 1.9.1"
+#define DT_VERS_1_10 DT_VERSION_NUMBER(1, 10, 0)
+#define DT_VERS_LATEST DT_VERS_1_10
+#define DT_VERS_STRING "Sun D 1.10"
const dt_version_t _dtrace_versions[] = {
DT_VERS_1_0, /* D API 1.0.0 (PSARC 2001/466) Solaris 10 FCS */
@@ -134,6 +135,7 @@ const dt_version_t _dtrace_versions[] = {
DT_VERS_1_8_1, /* D API 1.8.1 */
DT_VERS_1_9, /* D API 1.9 */
DT_VERS_1_9_1, /* D API 1.9.1 */
+ DT_VERS_1_10, /* D API 1.10 */
0
};
@@ -247,6 +249,8 @@ static const dt_ident_t _dtrace_globals[] = {
&dt_idops_func, "uint64_t(uint64_t)" },
{ "htons", DT_IDENT_FUNC, 0, DIF_SUBR_HTONS, DT_ATTR_EVOLCMN, DT_VERS_1_3,
&dt_idops_func, "uint16_t(uint16_t)" },
+{ "getf", DT_IDENT_FUNC, 0, DIF_SUBR_GETF, DT_ATTR_STABCMN, DT_VERS_1_10,
+ &dt_idops_func, "file_t *(int)" },
{ "gid", DT_IDENT_SCALAR, 0, DIF_VAR_GID, DT_ATTR_STABCMN, DT_VERS_1_0,
&dt_idops_type, "gid_t" },
{ "id", DT_IDENT_SCALAR, 0, DIF_VAR_ID, DT_ATTR_STABCMN, DT_VERS_1_0,
diff --git a/usr/src/lib/libdtrace/common/io.d.in b/usr/src/lib/libdtrace/common/io.d.in
index cb1e32cffe..5f676b9546 100644
--- a/usr/src/lib/libdtrace/common/io.d.in
+++ b/usr/src/lib/libdtrace/common/io.d.in
@@ -23,7 +23,9 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
+/*
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ */
#pragma D depends_on module unix
#pragma D depends_on provider io
@@ -197,9 +199,7 @@ translator fileinfo_t < struct file *F > {
fi_oflags = F == NULL ? 0 : F->f_flag + (int)@FOPEN@;
};
-inline fileinfo_t fds[int fd] = xlate <fileinfo_t> (
- fd >= 0 && fd < curthread->t_procp->p_user.u_finfo.fi_nfiles ?
- curthread->t_procp->p_user.u_finfo.fi_list[fd].uf_file : NULL);
+inline fileinfo_t fds[int fd] = xlate <fileinfo_t> (getf(fd));
#pragma D attributes Stable/Stable/Common fds
#pragma D binding "1.1" fds
diff --git a/usr/src/pkg/manifests/system-dtrace-tests.mf b/usr/src/pkg/manifests/system-dtrace-tests.mf
index 30e91fbb41..fdab627515 100644
--- a/usr/src/pkg/manifests/system-dtrace-tests.mf
+++ b/usr/src/pkg/manifests/system-dtrace-tests.mf
@@ -1366,10 +1366,14 @@ file path=opt/SUNWdtrt/tst/common/printf/tst.widths.d.out mode=0444
file path=opt/SUNWdtrt/tst/common/printf/tst.widths1.d mode=0444
file path=opt/SUNWdtrt/tst/common/printf/tst.wp.d mode=0444
file path=opt/SUNWdtrt/tst/common/printf/tst.wp.d.out mode=0444
+file path=opt/SUNWdtrt/tst/common/privs/tst.fds.ksh mode=0444
file path=opt/SUNWdtrt/tst/common/privs/tst.func_access.ksh mode=0444
+file path=opt/SUNWdtrt/tst/common/privs/tst.getf.ksh mode=0444
file path=opt/SUNWdtrt/tst/common/privs/tst.noprivdrop.ksh mode=0444
file path=opt/SUNWdtrt/tst/common/privs/tst.noprivrestrict.ksh mode=0444
file path=opt/SUNWdtrt/tst/common/privs/tst.op_access.ksh mode=0444
+file path=opt/SUNWdtrt/tst/common/privs/tst.procpriv.ksh mode=0444
+file path=opt/SUNWdtrt/tst/common/privs/tst.providers.ksh mode=0444
file path=opt/SUNWdtrt/tst/common/privs/tst.tick.ksh mode=0444
file path=opt/SUNWdtrt/tst/common/privs/tst.unpriv_funcs.ksh mode=0444
file path=opt/SUNWdtrt/tst/common/probes/err.D_PDESC_ZERO.probeqtn.d mode=0444
diff --git a/usr/src/uts/common/dtrace/dtrace.c b/usr/src/uts/common/dtrace/dtrace.c
index c3b3311e0b..d4d3d9c86e 100644
--- a/usr/src/uts/common/dtrace/dtrace.c
+++ b/usr/src/uts/common/dtrace/dtrace.c
@@ -171,6 +171,7 @@ static dtrace_provider_t *dtrace_provider; /* provider list */
static dtrace_meta_t *dtrace_meta_pid; /* user-land meta provider */
static int dtrace_opens; /* number of opens */
static int dtrace_helpers; /* number of helpers */
+static int dtrace_getf; /* number of unpriv getf()s */
static void *dtrace_softstate; /* softstate pointer */
static dtrace_hash_t *dtrace_bymod; /* probes hashed by module */
static dtrace_hash_t *dtrace_byfunc; /* probes hashed by function */
@@ -373,8 +374,8 @@ static kmutex_t dtrace_errlock;
* disallow all negative sizes. Ranges of size 0 are allowed.
*/
#define DTRACE_INRANGE(testaddr, testsz, baseaddr, basesz) \
- ((testaddr) - (baseaddr) < (basesz) && \
- (testaddr) + (testsz) - (baseaddr) <= (basesz) && \
+ ((testaddr) - (uintptr_t)(baseaddr) < (basesz) && \
+ (testaddr) + (testsz) - (uintptr_t)(baseaddr) <= (basesz) && \
(testaddr) + (testsz) >= (testaddr))
/*
@@ -475,6 +476,8 @@ static int dtrace_state_option(dtrace_state_t *, dtrace_optid_t,
dtrace_optval_t);
static int dtrace_ecb_create_enable(dtrace_probe_t *, void *);
static void dtrace_helper_provider_destroy(dtrace_helper_provider_t *);
+static int dtrace_priv_proc(dtrace_state_t *, dtrace_mstate_t *);
+static void dtrace_getf_barrier(void);
/*
* DTrace Probe Context Functions
@@ -619,7 +622,7 @@ dtrace_canstore(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
* up both thread-local variables and any global dynamically-allocated
* variables.
*/
- if (DTRACE_INRANGE(addr, sz, (uintptr_t)vstate->dtvs_dynvars.dtds_base,
+ if (DTRACE_INRANGE(addr, sz, vstate->dtvs_dynvars.dtds_base,
vstate->dtvs_dynvars.dtds_size)) {
dtrace_dstate_t *dstate = &vstate->dtvs_dynvars;
uintptr_t base = (uintptr_t)dstate->dtds_base +
@@ -686,6 +689,7 @@ dtrace_canload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
dtrace_vstate_t *vstate)
{
volatile uintptr_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval;
+ file_t *fp;
/*
* If we hold the privilege to read from kernel memory, then
@@ -703,10 +707,99 @@ dtrace_canload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
/*
* We're allowed to read from our own string table.
*/
- if (DTRACE_INRANGE(addr, sz, (uintptr_t)mstate->dtms_difo->dtdo_strtab,
+ if (DTRACE_INRANGE(addr, sz, mstate->dtms_difo->dtdo_strtab,
mstate->dtms_difo->dtdo_strlen))
return (1);
+ if (vstate->dtvs_state != NULL &&
+ dtrace_priv_proc(vstate->dtvs_state, mstate)) {
+ proc_t *p;
+
+ /*
+ * When we have privileges to the current process, there are
+ * several context-related kernel structures that are safe to
+ * read, even absent the privilege to read from kernel memory.
+ * These reads are safe because these structures contain only
+ * state that (1) we're permitted to read, (2) is harmless or
+ * (3) contains pointers to additional kernel state that we're
+ * not permitted to read (and as such, do not present an
+ * opportunity for privilege escalation). Finally (and
+ * critically), because of the nature of their relation with
+ * the current thread context, the memory associated with these
+ * structures cannot change over the duration of probe context,
+ * and it is therefore impossible for this memory to be
+ * deallocated and reallocated as something else while it's
+ * being operated upon.
+ */
+ if (DTRACE_INRANGE(addr, sz, curthread, sizeof (kthread_t)))
+ return (1);
+
+ if ((p = curthread->t_procp) != NULL && DTRACE_INRANGE(addr,
+ sz, curthread->t_procp, sizeof (proc_t))) {
+ return (1);
+ }
+
+ if (curthread->t_cred != NULL && DTRACE_INRANGE(addr, sz,
+ curthread->t_cred, sizeof (cred_t))) {
+ return (1);
+ }
+
+ if (p != NULL && p->p_pidp != NULL && DTRACE_INRANGE(addr, sz,
+ &(p->p_pidp->pid_id), sizeof (pid_t))) {
+ return (1);
+ }
+
+ if (curthread->t_cpu != NULL && DTRACE_INRANGE(addr, sz,
+ curthread->t_cpu, offsetof(cpu_t, cpu_pause_thread))) {
+ return (1);
+ }
+ }
+
+ if ((fp = mstate->dtms_getf) != NULL) {
+ uintptr_t psz = sizeof (void *);
+ vnode_t *vp;
+ vnodeops_t *op;
+
+ /*
+ * When getf() returns a file_t, the enabling is implicitly
+ * granted the (transient) right to read the returned file_t
+ * as well as the v_path and v_op->vnop_name of the underlying
+ * vnode. These accesses are allowed after a successful
+ * getf() because the members that they refer to cannot change
+ * once set -- and the barrier logic in the kernel's closef()
+ * path assures that the file_t and its referenced vode_t
+ * cannot themselves be stale (that is, it impossible for
+ * either dtms_getf itself or its f_vnode member to reference
+ * freed memory).
+ */
+ if (DTRACE_INRANGE(addr, sz, fp, sizeof (file_t)))
+ return (1);
+
+ if ((vp = fp->f_vnode) != NULL) {
+ if (DTRACE_INRANGE(addr, sz, &vp->v_path, psz))
+ return (1);
+
+ if (vp->v_path != NULL && DTRACE_INRANGE(addr, sz,
+ vp->v_path, strlen(vp->v_path) + 1)) {
+ return (1);
+ }
+
+ if (DTRACE_INRANGE(addr, sz, &vp->v_op, psz))
+ return (1);
+
+ if ((op = vp->v_op) != NULL &&
+ DTRACE_INRANGE(addr, sz, &op->vnop_name, psz)) {
+ return (1);
+ }
+
+ if (op != NULL && op->vnop_name != NULL &&
+ DTRACE_INRANGE(addr, sz, op->vnop_name,
+ strlen(op->vnop_name) + 1)) {
+ return (1);
+ }
+ }
+ }
+
DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV);
*illval = addr;
return (0);
@@ -1085,8 +1178,7 @@ dtrace_priv_proc_common_zone(dtrace_state_t *state)
*/
ASSERT(s_cr != NULL);
- if ((cr = CRED()) != NULL &&
- s_cr->cr_zone == cr->cr_zone)
+ if ((cr = CRED()) != NULL && s_cr->cr_zone == cr->cr_zone)
return (1);
return (0);
@@ -1209,19 +1301,17 @@ dtrace_priv_probe(dtrace_state_t *state, dtrace_mstate_t *mstate,
mode = pops->dtps_mode(prov->dtpv_arg,
probe->dtpr_id, probe->dtpr_arg);
- ASSERT((mode & DTRACE_MODE_USER) ||
- (mode & DTRACE_MODE_KERNEL));
- ASSERT((mode & DTRACE_MODE_NOPRIV_RESTRICT) ||
- (mode & DTRACE_MODE_NOPRIV_DROP));
+ ASSERT(mode & (DTRACE_MODE_USER | DTRACE_MODE_KERNEL));
+ ASSERT(mode & (DTRACE_MODE_NOPRIV_RESTRICT |
+ DTRACE_MODE_NOPRIV_DROP));
}
/*
* If the dte_cond bits indicate that this consumer is only allowed to
- * see user-mode firings of this probe, call the provider's dtps_mode()
- * entry point to check that the probe was fired while in a user
- * context. If that's not the case, use the policy specified by the
- * provider to determine if we drop the probe or merely restrict
- * operation.
+ * see user-mode firings of this probe, check that the probe was fired
+ * while in a user context. If that's not the case, use the policy
+ * specified by the provider to determine if we drop the probe or
+ * merely restrict operation.
*/
if (ecb->dte_cond & DTRACE_COND_USERMODE) {
ASSERT(mode != DTRACE_MODE_NOPRIV_DROP);
@@ -1288,6 +1378,15 @@ dtrace_priv_probe(dtrace_state_t *state, dtrace_mstate_t *mstate,
}
}
+ /*
+ * By merits of being in this code path at all, we have limited
+ * privileges. If the provider has indicated that limited privileges
+ * are to denote restricted operation, strip off the ability to access
+ * arguments.
+ */
+ if (mode & DTRACE_MODE_LIMITEDPRIV_RESTRICT)
+ mstate->dtms_access &= ~DTRACE_ACCESS_ARGS;
+
return (1);
}
@@ -2924,7 +3023,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
}
case DIF_VAR_CURTHREAD:
- if (!dtrace_priv_kernel(state))
+ if (!dtrace_priv_proc(state, mstate))
return (0);
return ((uint64_t)(uintptr_t)curthread);
@@ -4452,11 +4551,35 @@ case DIF_SUBR_GETMAJOR:
break;
}
+ case DIF_SUBR_GETF: {
+ uintptr_t fd = tupregs[0].dttk_value;
+ uf_info_t *finfo = &curthread->t_procp->p_user.u_finfo;
+ file_t *fp;
+
+ if (!dtrace_priv_proc(state, mstate)) {
+ regs[rd] = NULL;
+ break;
+ }
+
+ /*
+ * This is safe because fi_nfiles only increases, and the
+ * fi_list array is not freed when the array size doubles.
+ * (See the comment in flist_grow() for details on the
+ * management of the u_finfo structure.)
+ */
+ fp = fd < finfo->fi_nfiles ? finfo->fi_list[fd].uf_file : NULL;
+
+ mstate->dtms_getf = fp;
+ regs[rd] = (uintptr_t)fp;
+ break;
+ }
+
case DIF_SUBR_CLEANPATH: {
char *dest = (char *)mstate->dtms_scratch_ptr, c;
uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
uintptr_t src = tupregs[0].dttk_value;
int i = 0, j = 0;
+ zone_t *z;
if (!dtrace_strcanload(src, size, mstate, vstate)) {
regs[rd] = NULL;
@@ -4555,6 +4678,23 @@ next:
} while (c != '\0');
dest[j] = '\0';
+
+ if (mstate->dtms_getf != NULL &&
+ !(mstate->dtms_access & DTRACE_ACCESS_KERNEL) &&
+ (z = state->dts_cred.dcr_cred->cr_zone) != kcred->cr_zone) {
+ /*
+ * If we've done a getf() as a part of this ECB and we
+ * don't have kernel access (and we're not in the global
+ * zone), check if the path we cleaned up begins with
+ * the zone's root path, and trim it off if so. Note
+ * that this is an output cleanliness issue, not a
+ * security issue: knowing one's zone root path does
+ * not enable privilege escalation.
+ */
+ if (strstr(dest, z->zone_rootpath) == dest)
+ dest += strlen(z->zone_rootpath) - 1;
+ }
+
regs[rd] = (uintptr_t)dest;
mstate->dtms_scratch_ptr += size;
break;
@@ -4939,71 +5079,50 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate,
pc = DIF_INSTR_LABEL(instr);
break;
case DIF_OP_RLDSB:
- if (!dtrace_canstore(regs[r1], 1, mstate, vstate)) {
- *flags |= CPU_DTRACE_KPRIV;
- *illval = regs[r1];
+ if (!dtrace_canload(regs[r1], 1, mstate, vstate))
break;
- }
/*FALLTHROUGH*/
case DIF_OP_LDSB:
regs[rd] = (int8_t)dtrace_load8(regs[r1]);
break;
case DIF_OP_RLDSH:
- if (!dtrace_canstore(regs[r1], 2, mstate, vstate)) {
- *flags |= CPU_DTRACE_KPRIV;
- *illval = regs[r1];
+ if (!dtrace_canload(regs[r1], 2, mstate, vstate))
break;
- }
/*FALLTHROUGH*/
case DIF_OP_LDSH:
regs[rd] = (int16_t)dtrace_load16(regs[r1]);
break;
case DIF_OP_RLDSW:
- if (!dtrace_canstore(regs[r1], 4, mstate, vstate)) {
- *flags |= CPU_DTRACE_KPRIV;
- *illval = regs[r1];
+ if (!dtrace_canload(regs[r1], 4, mstate, vstate))
break;
- }
/*FALLTHROUGH*/
case DIF_OP_LDSW:
regs[rd] = (int32_t)dtrace_load32(regs[r1]);
break;
case DIF_OP_RLDUB:
- if (!dtrace_canstore(regs[r1], 1, mstate, vstate)) {
- *flags |= CPU_DTRACE_KPRIV;
- *illval = regs[r1];
+ if (!dtrace_canload(regs[r1], 1, mstate, vstate))
break;
- }
/*FALLTHROUGH*/
case DIF_OP_LDUB:
regs[rd] = dtrace_load8(regs[r1]);
break;
case DIF_OP_RLDUH:
- if (!dtrace_canstore(regs[r1], 2, mstate, vstate)) {
- *flags |= CPU_DTRACE_KPRIV;
- *illval = regs[r1];
+ if (!dtrace_canload(regs[r1], 2, mstate, vstate))
break;
- }
/*FALLTHROUGH*/
case DIF_OP_LDUH:
regs[rd] = dtrace_load16(regs[r1]);
break;
case DIF_OP_RLDUW:
- if (!dtrace_canstore(regs[r1], 4, mstate, vstate)) {
- *flags |= CPU_DTRACE_KPRIV;
- *illval = regs[r1];
+ if (!dtrace_canload(regs[r1], 4, mstate, vstate))
break;
- }
/*FALLTHROUGH*/
case DIF_OP_LDUW:
regs[rd] = dtrace_load32(regs[r1]);
break;
case DIF_OP_RLDX:
- if (!dtrace_canstore(regs[r1], 8, mstate, vstate)) {
- *flags |= CPU_DTRACE_KPRIV;
- *illval = regs[r1];
+ if (!dtrace_canload(regs[r1], 8, mstate, vstate))
break;
- }
/*FALLTHROUGH*/
case DIF_OP_LDX:
regs[rd] = dtrace_load64(regs[r1]);
@@ -5942,6 +6061,8 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
mstate.dtms_present = DTRACE_MSTATE_ARGS | DTRACE_MSTATE_PROBE;
mstate.dtms_access = DTRACE_ACCESS_ARGS | DTRACE_ACCESS_PROC;
+ mstate.dtms_getf = NULL;
+
*flags &= ~CPU_DTRACE_ERROR;
if (prov == dtrace_provider) {
@@ -8448,6 +8569,20 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs,
subr == DIF_SUBR_COPYOUTSTR) {
dp->dtdo_destructive = 1;
}
+
+ if (subr == DIF_SUBR_GETF) {
+ /*
+ * If we have a getf() we need to record that
+ * in our state. Note that our state can be
+ * NULL if this is a helper -- but in that
+ * case, the call to getf() is itself illegal,
+ * and will be caught (slightly later) when
+ * the helper is validated.
+ */
+ if (vstate->dtvs_state != NULL)
+ vstate->dtvs_state->dts_getf++;
+ }
+
break;
case DIF_OP_PUSHTR:
if (type != DIF_TYPE_STRING && type != DIF_TYPE_CTF)
@@ -13090,6 +13225,22 @@ dtrace_state_go(dtrace_state_t *state, processorid_t *cpu)
state->dts_activity = DTRACE_ACTIVITY_WARMUP;
+ if (state->dts_getf != 0 &&
+ !(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)) {
+ /*
+ * We don't have kernel privs but we have at least one call
+ * to getf(); we need to bump our zone's count, and (if
+ * this is the first enabling to have an unprivileged call
+ * to getf()) we need to hook into closef().
+ */
+ state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf++;
+
+ if (dtrace_getf++ == 0) {
+ ASSERT(dtrace_closef == NULL);
+ dtrace_closef = dtrace_getf_barrier;
+ }
+ }
+
/*
* Now it's time to actually fire the BEGIN probe. We need to disable
* interrupts here both to record the CPU on which we fired the BEGIN
@@ -13206,6 +13357,24 @@ dtrace_state_stop(dtrace_state_t *state, processorid_t *cpu)
state->dts_activity = DTRACE_ACTIVITY_STOPPED;
dtrace_sync();
+ if (state->dts_getf != 0 &&
+ !(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)) {
+ /*
+ * We don't have kernel privs but we have at least one call
+ * to getf(); we need to lower our zone's count, and (if
+ * this is the last enabling to have an unprivileged call
+ * to getf()) we need to clear the closef() hook.
+ */
+ ASSERT(state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf > 0);
+ ASSERT(dtrace_closef == dtrace_getf_barrier);
+ ASSERT(dtrace_getf > 0);
+
+ state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf--;
+
+ if (--dtrace_getf == 0)
+ dtrace_closef = NULL;
+ }
+
return (0);
}
@@ -14766,6 +14935,23 @@ dtrace_toxrange_add(uintptr_t base, uintptr_t limit)
dtrace_toxranges++;
}
+static void
+dtrace_getf_barrier()
+{
+ /*
+ * When we have unprivileged (that is, non-DTRACE_CRV_KERNEL) enablings
+ * that contain calls to getf(), this routine will be called on every
+ * closef() before either the underlying vnode is released or the
+ * file_t itself is freed. By the time we are here, it is essential
+ * that the file_t can no longer be accessed from a call to getf()
+ * in probe context -- that assures that a dtrace_sync() can be used
+ * to clear out any enablings referring to the old structures.
+ */
+ if (curthread->t_procp->p_zone->zone_dtrace_getf != 0 ||
+ kcred->cr_zone->zone_dtrace_getf != 0)
+ dtrace_sync();
+}
+
/*
* DTrace Driver Cookbook Functions
*/
@@ -15922,6 +16108,9 @@ dtrace_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
dtrace_modload = NULL;
dtrace_modunload = NULL;
+ ASSERT(dtrace_getf == 0);
+ ASSERT(dtrace_closef == NULL);
+
mutex_exit(&cpu_lock);
if (dtrace_helptrace_enabled) {
diff --git a/usr/src/uts/common/dtrace/sdt_subr.c b/usr/src/uts/common/dtrace/sdt_subr.c
index 242185071b..157acc25fc 100644
--- a/usr/src/uts/common/dtrace/sdt_subr.c
+++ b/usr/src/uts/common/dtrace/sdt_subr.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
#include <sys/sdt_impl.h>
@@ -97,26 +98,26 @@ static dtrace_pattr_t iscsi_attr = {
};
sdt_provider_t sdt_providers[] = {
- { "vtrace", "__vtrace_", &vtrace_attr, 0 },
- { "sysinfo", "__cpu_sysinfo_", &info_attr, 0 },
- { "vminfo", "__cpu_vminfo_", &info_attr, 0 },
- { "fpuinfo", "__fpuinfo_", &fpu_attr, 0 },
- { "sched", "__sched_", &stab_attr, 0 },
- { "proc", "__proc_", &stab_attr, 0 },
- { "io", "__io_", &stab_attr, 0 },
- { "ip", "__ip_", &stab_attr, 0 },
- { "tcp", "__tcp_", &stab_attr, 0 },
- { "udp", "__udp_", &stab_attr, 0 },
- { "mib", "__mib_", &stab_attr, 0 },
- { "fsinfo", "__fsinfo_", &fsinfo_attr, 0 },
- { "iscsi", "__iscsi_", &iscsi_attr, 0 },
- { "nfsv3", "__nfsv3_", &stab_attr, 0 },
- { "nfsv4", "__nfsv4_", &stab_attr, 0 },
- { "xpv", "__xpv_", &xpv_attr, 0 },
- { "fc", "__fc_", &fc_attr, 0 },
- { "srp", "__srp_", &fc_attr, 0 },
- { "sysevent", "__sysevent_", &stab_attr, 0 },
- { "sdt", NULL, &sdt_attr, 0 },
+ { "vtrace", "__vtrace_", &vtrace_attr },
+ { "sysinfo", "__cpu_sysinfo_", &info_attr, DTRACE_PRIV_USER },
+ { "vminfo", "__cpu_vminfo_", &info_attr, DTRACE_PRIV_USER },
+ { "fpuinfo", "__fpuinfo_", &fpu_attr },
+ { "sched", "__sched_", &stab_attr, DTRACE_PRIV_USER },
+ { "proc", "__proc_", &stab_attr, DTRACE_PRIV_USER },
+ { "io", "__io_", &stab_attr },
+ { "ip", "__ip_", &stab_attr },
+ { "tcp", "__tcp_", &stab_attr },
+ { "udp", "__udp_", &stab_attr },
+ { "mib", "__mib_", &stab_attr },
+ { "fsinfo", "__fsinfo_", &fsinfo_attr },
+ { "iscsi", "__iscsi_", &iscsi_attr },
+ { "nfsv3", "__nfsv3_", &stab_attr },
+ { "nfsv4", "__nfsv4_", &stab_attr },
+ { "xpv", "__xpv_", &xpv_attr },
+ { "fc", "__fc_", &fc_attr },
+ { "srp", "__srp_", &fc_attr },
+ { "sysevent", "__sysevent_", &stab_attr },
+ { "sdt", NULL, &sdt_attr },
{ NULL }
};
@@ -1155,6 +1156,20 @@ sdt_argdesc_t sdt_args[] = {
};
/*ARGSUSED*/
+int
+sdt_mode(void *arg, dtrace_id_t id, void *parg)
+{
+ /*
+ * We tell DTrace that we're in kernel mode, that the firing needs to
+ * be dropped for anything that doesn't have necessary privileges, and
+ * that it needs to be restricted for anything that has restricted
+ * (i.e., not all-zone) privileges.
+ */
+ return (DTRACE_MODE_KERNEL | DTRACE_MODE_NOPRIV_DROP |
+ DTRACE_MODE_LIMITEDPRIV_RESTRICT);
+}
+
+/*ARGSUSED*/
void
sdt_getargdesc(void *arg, dtrace_id_t id, void *parg, dtrace_argdesc_t *desc)
{
diff --git a/usr/src/uts/common/os/dtrace_subr.c b/usr/src/uts/common/os/dtrace_subr.c
index f2a9ac1b7d..d2ce3361c1 100644
--- a/usr/src/uts/common/os/dtrace_subr.c
+++ b/usr/src/uts/common/os/dtrace_subr.c
@@ -44,6 +44,7 @@ void (*dtrace_helpers_fork)(proc_t *, proc_t *);
void (*dtrace_cpustart_init)(void);
void (*dtrace_cpustart_fini)(void);
void (*dtrace_cpc_fire)(uint64_t);
+void (*dtrace_closef)(void);
void (*dtrace_debugger_init)(void);
void (*dtrace_debugger_fini)(void);
diff --git a/usr/src/uts/common/os/fio.c b/usr/src/uts/common/os/fio.c
index a014d25c0f..3b47e05ef2 100644
--- a/usr/src/uts/common/os/fio.c
+++ b/usr/src/uts/common/os/fio.c
@@ -21,6 +21,7 @@
/*
* Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, Joyent Inc. All rights reserved.
*/
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
@@ -54,6 +55,7 @@
#include <sys/poll.h>
#include <sys/rctl.h>
#include <sys/port_impl.h>
+#include <sys/dtrace.h>
#include <c2/audit.h>
#include <sys/nbmlock.h>
@@ -952,6 +954,18 @@ closef(file_t *fp)
ASSERT(fp->f_count == 0);
mutex_exit(&fp->f_tlock);
+ /*
+ * If DTrace has getf() subroutines active, it will set dtrace_closef
+ * to point to code that implements a barrier with respect to probe
+ * context. This must be called before the file_t is freed (and the
+ * vnode that it refers to is released) -- but it must be after the
+ * file_t has been removed from the uf_entry_t. That is, there must
+ * be no way for a racing getf() in probe context to yield the fp that
+ * we're operating upon.
+ */
+ if (dtrace_closef != NULL)
+ (*dtrace_closef)();
+
VN_RELE(vp);
/*
* deallocate resources to audit_data
diff --git a/usr/src/uts/common/sys/dtrace.h b/usr/src/uts/common/sys/dtrace.h
index fd7612f88a..5da41b409b 100644
--- a/usr/src/uts/common/sys/dtrace.h
+++ b/usr/src/uts/common/sys/dtrace.h
@@ -25,7 +25,7 @@
*/
/*
- * Copyright (c) 2011, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
@@ -288,8 +288,9 @@ typedef enum dtrace_probespec {
#define DIF_SUBR_INET_NTOA6 43
#define DIF_SUBR_TOUPPER 44
#define DIF_SUBR_TOLOWER 45
+#define DIF_SUBR_GETF 46
-#define DIF_SUBR_MAX 45 /* max subroutine value */
+#define DIF_SUBR_MAX 46 /* max subroutine value */
typedef uint32_t dif_instr_t;
@@ -1649,13 +1650,20 @@ typedef struct dof_helper {
*
* A bitwise OR that encapsulates both the mode (either DTRACE_MODE_KERNEL
* or DTRACE_MODE_USER) and the policy when the privilege of the enabling
- * is insufficient for that mode (either DTRACE_MODE_NOPRIV_DROP or
- * DTRACE_MODE_NOPRIV_RESTRICT). If the policy is DTRACE_MODE_NOPRIV_DROP,
- * insufficient privilege will result in the probe firing being silently
- * ignored for the enabling; if the policy is DTRACE_NODE_NOPRIV_RESTRICT,
- * insufficient privilege will not prevent probe processing for the
- * enabling, but restrictions will be in place that induce a UPRIV fault
- * upon attempt to examine probe arguments or current process state.
+ * is insufficient for that mode (a combination of DTRACE_MODE_NOPRIV_DROP,
+ * DTRACE_MODE_NOPRIV_RESTRICT, and DTRACE_MODE_LIMITEDPRIV_RESTRICT). If
+ * DTRACE_MODE_NOPRIV_DROP bit is set, insufficient privilege will result
+ * in the probe firing being silently ignored for the enabling; if the
+ * DTRACE_NODE_NOPRIV_RESTRICT bit is set, insufficient privilege will not
+ * prevent probe processing for the enabling, but restrictions will be in
+ * place that induce a UPRIV fault upon attempt to examine probe arguments
+ * or current process state. If the DTRACE_MODE_LIMITEDPRIV_RESTRICT bit
+ * is set, similar restrictions will be placed upon operation if the
+ * privilege is sufficient to process the enabling, but does not otherwise
+ * entitle the enabling to all zones. The DTRACE_MODE_NOPRIV_DROP and
+ * DTRACE_MODE_NOPRIV_RESTRICT are mutually exclusive (and one of these
+ * two policies must be specified), but either may be combined (or not)
+ * with DTRACE_MODE_LIMITEDPRIV_RESTRICT.
*
* 1.10.4 Caller's context
*
@@ -2054,6 +2062,7 @@ typedef struct dtrace_pops {
#define DTRACE_MODE_USER 0x02
#define DTRACE_MODE_NOPRIV_DROP 0x10
#define DTRACE_MODE_NOPRIV_RESTRICT 0x20
+#define DTRACE_MODE_LIMITEDPRIV_RESTRICT 0x40
typedef uintptr_t dtrace_provider_id_t;
@@ -2268,6 +2277,7 @@ extern void (*dtrace_helpers_cleanup)();
extern void (*dtrace_helpers_fork)(proc_t *parent, proc_t *child);
extern void (*dtrace_cpustart_init)();
extern void (*dtrace_cpustart_fini)();
+extern void (*dtrace_closef)();
extern void (*dtrace_debugger_init)();
extern void (*dtrace_debugger_fini)();
diff --git a/usr/src/uts/common/sys/dtrace_impl.h b/usr/src/uts/common/sys/dtrace_impl.h
index d780082137..f79bf1e42e 100644
--- a/usr/src/uts/common/sys/dtrace_impl.h
+++ b/usr/src/uts/common/sys/dtrace_impl.h
@@ -924,6 +924,7 @@ typedef struct dtrace_mstate {
uintptr_t dtms_strtok; /* saved strtok() pointer */
uint32_t dtms_access; /* memory access rights */
dtrace_difo_t *dtms_difo; /* current dif object */
+ file_t *dtms_getf; /* cached rval of getf() */
} dtrace_mstate_t;
#define DTRACE_COND_OWNER 0x1
@@ -1144,6 +1145,7 @@ struct dtrace_state {
dtrace_optval_t dts_options[DTRACEOPT_MAX]; /* options */
dtrace_cred_t dts_cred; /* credentials */
size_t dts_nretained; /* number of retained enabs */
+ int dts_getf; /* number of getf() calls */
};
struct dtrace_provider {
diff --git a/usr/src/uts/common/sys/sdt_impl.h b/usr/src/uts/common/sys/sdt_impl.h
index cbe95f7c66..f7cc683f2f 100644
--- a/usr/src/uts/common/sys/sdt_impl.h
+++ b/usr/src/uts/common/sys/sdt_impl.h
@@ -24,11 +24,13 @@
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ */
+
#ifndef _SYS_SDT_IMPL_H
#define _SYS_SDT_IMPL_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -45,6 +47,7 @@ typedef struct sdt_provider {
char *sdtp_name; /* name of provider */
char *sdtp_prefix; /* prefix for probe names */
dtrace_pattr_t *sdtp_attr; /* stability attributes */
+ uint32_t sdtp_priv; /* privilege, if any */
dtrace_provider_id_t sdtp_id; /* provider ID */
} sdt_provider_t;
@@ -75,6 +78,7 @@ typedef struct sdt_argdesc {
} sdt_argdesc_t;
extern void sdt_getargdesc(void *, dtrace_id_t, void *, dtrace_argdesc_t *);
+extern int sdt_mode(void *, dtrace_id_t, void *);
#ifdef __cplusplus
}
diff --git a/usr/src/uts/common/sys/zone.h b/usr/src/uts/common/sys/zone.h
index cc2a9f0e24..4419144f2b 100644
--- a/usr/src/uts/common/sys/zone.h
+++ b/usr/src/uts/common/sys/zone.h
@@ -538,6 +538,11 @@ typedef struct zone {
rctl_qty_t zone_nprocs_ctl; /* current limit protected by */
/* zone_rctls->rcs_lock */
kstat_t *zone_nprocs_kstat;
+
+ /*
+ * DTrace-private per-zone state
+ */
+ int zone_dtrace_getf; /* # of unprivileged getf()s */
} zone_t;
/*
diff --git a/usr/src/uts/intel/dtrace/sdt.c b/usr/src/uts/intel/dtrace/sdt.c
index 38be2233b1..27cb19e06c 100644
--- a/usr/src/uts/intel/dtrace/sdt.c
+++ b/usr/src/uts/intel/dtrace/sdt.c
@@ -23,6 +23,9 @@
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ */
#include <sys/modctl.h>
#include <sys/sunddi.h>
@@ -419,9 +422,19 @@ sdt_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
dtrace_invop_add(sdt_invop);
for (prov = sdt_providers; prov->sdtp_name != NULL; prov++) {
+ uint32_t priv;
+
+ if (prov->sdtp_priv == DTRACE_PRIV_NONE) {
+ priv = DTRACE_PRIV_KERNEL;
+ sdt_pops.dtps_mode = NULL;
+ } else {
+ priv = prov->sdtp_priv;
+ ASSERT(priv == DTRACE_PRIV_USER);
+ sdt_pops.dtps_mode = sdt_mode;
+ }
+
if (dtrace_register(prov->sdtp_name, prov->sdtp_attr,
- DTRACE_PRIV_KERNEL, NULL,
- &sdt_pops, prov, &prov->sdtp_id) != 0) {
+ priv, NULL, &sdt_pops, prov, &prov->sdtp_id) != 0) {
cmn_err(CE_WARN, "failed to register sdt provider %s",
prov->sdtp_name);
}
diff --git a/usr/src/uts/sparc/dtrace/sdt.c b/usr/src/uts/sparc/dtrace/sdt.c
index af9ea8e155..8fbdf24fee 100644
--- a/usr/src/uts/sparc/dtrace/sdt.c
+++ b/usr/src/uts/sparc/dtrace/sdt.c
@@ -23,6 +23,9 @@
* Use is subject to license terms.
*/
+/*
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ */
#include <sys/modctl.h>
#include <sys/sunddi.h>
@@ -373,9 +376,19 @@ sdt_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
sdt_devi = devi;
for (prov = sdt_providers; prov->sdtp_name != NULL; prov++) {
+ uint32_t priv;
+
+ if (prov->sdtp_priv == DTRACE_PRIV_NONE) {
+ priv = DTRACE_PRIV_KERNEL;
+ sdt_pops.dtps_mode = NULL;
+ } else {
+ priv = prov->sdtp_priv;
+ ASSERT(priv == DTRACE_PRIV_USER);
+ sdt_pops.dtps_mode = sdt_mode;
+ }
+
if (dtrace_register(prov->sdtp_name, prov->sdtp_attr,
- DTRACE_PRIV_KERNEL, NULL,
- &sdt_pops, prov, &prov->sdtp_id) != 0) {
+ priv, NULL, &sdt_pops, prov, &prov->sdtp_id) != 0) {
cmn_err(CE_WARN, "failed to register sdt provider %s",
prov->sdtp_name);
}