summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/dtrace/dtrace.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/dtrace/dtrace.c')
-rw-r--r--usr/src/uts/common/dtrace/dtrace.c410
1 files changed, 324 insertions, 86 deletions
diff --git a/usr/src/uts/common/dtrace/dtrace.c b/usr/src/uts/common/dtrace/dtrace.c
index 5013661588..8ef84d1322 100644
--- a/usr/src/uts/common/dtrace/dtrace.c
+++ b/usr/src/uts/common/dtrace/dtrace.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2012, Joyent, Inc. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
@@ -116,7 +116,7 @@
int dtrace_destructive_disallow = 0;
dtrace_optval_t dtrace_nonroot_maxsize = (16 * 1024 * 1024);
size_t dtrace_difo_maxsize = (256 * 1024);
-dtrace_optval_t dtrace_dof_maxsize = (256 * 1024);
+dtrace_optval_t dtrace_dof_maxsize = (8 * 1024 * 1024);
size_t dtrace_global_maxsize = (16 * 1024);
size_t dtrace_actions_max = (16 * 1024);
size_t dtrace_retain_max = 1024;
@@ -171,6 +171,7 @@ static dtrace_provider_t *dtrace_provider; /* provider list */
static dtrace_meta_t *dtrace_meta_pid; /* user-land meta provider */
static int dtrace_opens; /* number of opens */
static int dtrace_helpers; /* number of helpers */
+static int dtrace_getf; /* number of unpriv getf()s */
static void *dtrace_softstate; /* softstate pointer */
static dtrace_hash_t *dtrace_bymod; /* probes hashed by module */
static dtrace_hash_t *dtrace_byfunc; /* probes hashed by function */
@@ -267,17 +268,22 @@ dtrace_id_t dtrace_probeid_error; /* special ERROR probe */
/*
* DTrace Helper Tracing Variables
+ *
+ * These variables should be set dynamically to enable helper tracing. The
+ * only variables that should be set are dtrace_helptrace_enable (which should
+ * be set to a non-zero value to allocate helper tracing buffers on the next
+ * open of /dev/dtrace) and dtrace_helptrace_disable (which should be set to a
+ * non-zero value to deallocate helper tracing buffers on the next close of
+ * /dev/dtrace). When (and only when) helper tracing is disabled, the
+ * buffer size may also be set via dtrace_helptrace_bufsize.
*/
-uint32_t dtrace_helptrace_next = 0;
-uint32_t dtrace_helptrace_nlocals;
-char *dtrace_helptrace_buffer;
-int dtrace_helptrace_bufsize = 512 * 1024;
-
-#ifdef DEBUG
-int dtrace_helptrace_enabled = 1;
-#else
-int dtrace_helptrace_enabled = 0;
-#endif
+int dtrace_helptrace_enable = 0;
+int dtrace_helptrace_disable = 0;
+int dtrace_helptrace_bufsize = 16 * 1024 * 1024;
+uint32_t dtrace_helptrace_nlocals;
+static dtrace_helptrace_t *dtrace_helptrace_buffer;
+static uint32_t dtrace_helptrace_next = 0;
+static int dtrace_helptrace_wrapped = 0;
/*
* DTrace Error Hashing
@@ -373,8 +379,8 @@ static kmutex_t dtrace_errlock;
* disallow all negative sizes. Ranges of size 0 are allowed.
*/
#define DTRACE_INRANGE(testaddr, testsz, baseaddr, basesz) \
- ((testaddr) - (baseaddr) < (basesz) && \
- (testaddr) + (testsz) - (baseaddr) <= (basesz) && \
+ ((testaddr) - (uintptr_t)(baseaddr) < (basesz) && \
+ (testaddr) + (testsz) - (uintptr_t)(baseaddr) <= (basesz) && \
(testaddr) + (testsz) >= (testaddr))
/*
@@ -475,6 +481,8 @@ static int dtrace_state_option(dtrace_state_t *, dtrace_optid_t,
dtrace_optval_t);
static int dtrace_ecb_create_enable(dtrace_probe_t *, void *);
static void dtrace_helper_provider_destroy(dtrace_helper_provider_t *);
+static int dtrace_priv_proc(dtrace_state_t *, dtrace_mstate_t *);
+static void dtrace_getf_barrier(void);
/*
* DTrace Probe Context Functions
@@ -619,7 +627,7 @@ dtrace_canstore(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
* up both thread-local variables and any global dynamically-allocated
* variables.
*/
- if (DTRACE_INRANGE(addr, sz, (uintptr_t)vstate->dtvs_dynvars.dtds_base,
+ if (DTRACE_INRANGE(addr, sz, vstate->dtvs_dynvars.dtds_base,
vstate->dtvs_dynvars.dtds_size)) {
dtrace_dstate_t *dstate = &vstate->dtvs_dynvars;
uintptr_t base = (uintptr_t)dstate->dtds_base +
@@ -686,6 +694,7 @@ dtrace_canload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
dtrace_vstate_t *vstate)
{
volatile uintptr_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval;
+ file_t *fp;
/*
* If we hold the privilege to read from kernel memory, then
@@ -703,10 +712,99 @@ dtrace_canload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
/*
* We're allowed to read from our own string table.
*/
- if (DTRACE_INRANGE(addr, sz, (uintptr_t)mstate->dtms_difo->dtdo_strtab,
+ if (DTRACE_INRANGE(addr, sz, mstate->dtms_difo->dtdo_strtab,
mstate->dtms_difo->dtdo_strlen))
return (1);
+ if (vstate->dtvs_state != NULL &&
+ dtrace_priv_proc(vstate->dtvs_state, mstate)) {
+ proc_t *p;
+
+ /*
+ * When we have privileges to the current process, there are
+ * several context-related kernel structures that are safe to
+ * read, even absent the privilege to read from kernel memory.
+ * These reads are safe because these structures contain only
+ * state that (1) we're permitted to read, (2) is harmless or
+ * (3) contains pointers to additional kernel state that we're
+ * not permitted to read (and as such, do not present an
+ * opportunity for privilege escalation). Finally (and
+ * critically), because of the nature of their relation with
+ * the current thread context, the memory associated with these
+ * structures cannot change over the duration of probe context,
+ * and it is therefore impossible for this memory to be
+ * deallocated and reallocated as something else while it's
+ * being operated upon.
+ */
+ if (DTRACE_INRANGE(addr, sz, curthread, sizeof (kthread_t)))
+ return (1);
+
+ if ((p = curthread->t_procp) != NULL && DTRACE_INRANGE(addr,
+ sz, curthread->t_procp, sizeof (proc_t))) {
+ return (1);
+ }
+
+ if (curthread->t_cred != NULL && DTRACE_INRANGE(addr, sz,
+ curthread->t_cred, sizeof (cred_t))) {
+ return (1);
+ }
+
+ if (p != NULL && p->p_pidp != NULL && DTRACE_INRANGE(addr, sz,
+ &(p->p_pidp->pid_id), sizeof (pid_t))) {
+ return (1);
+ }
+
+ if (curthread->t_cpu != NULL && DTRACE_INRANGE(addr, sz,
+ curthread->t_cpu, offsetof(cpu_t, cpu_pause_thread))) {
+ return (1);
+ }
+ }
+
+ if ((fp = mstate->dtms_getf) != NULL) {
+ uintptr_t psz = sizeof (void *);
+ vnode_t *vp;
+ vnodeops_t *op;
+
+ /*
+ * When getf() returns a file_t, the enabling is implicitly
+ * granted the (transient) right to read the returned file_t
+ * as well as the v_path and v_op->vnop_name of the underlying
+ * vnode. These accesses are allowed after a successful
+ * getf() because the members that they refer to cannot change
+ * once set -- and the barrier logic in the kernel's closef()
+ * path assures that the file_t and its referenced vode_t
+ * cannot themselves be stale (that is, it impossible for
+ * either dtms_getf itself or its f_vnode member to reference
+ * freed memory).
+ */
+ if (DTRACE_INRANGE(addr, sz, fp, sizeof (file_t)))
+ return (1);
+
+ if ((vp = fp->f_vnode) != NULL) {
+ if (DTRACE_INRANGE(addr, sz, &vp->v_path, psz))
+ return (1);
+
+ if (vp->v_path != NULL && DTRACE_INRANGE(addr, sz,
+ vp->v_path, strlen(vp->v_path) + 1)) {
+ return (1);
+ }
+
+ if (DTRACE_INRANGE(addr, sz, &vp->v_op, psz))
+ return (1);
+
+ if ((op = vp->v_op) != NULL &&
+ DTRACE_INRANGE(addr, sz, &op->vnop_name, psz)) {
+ return (1);
+ }
+
+ if (op != NULL && op->vnop_name != NULL &&
+ DTRACE_INRANGE(addr, sz, op->vnop_name,
+ strlen(op->vnop_name) + 1)) {
+ return (1);
+ }
+ }
+ }
+
DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV);
*illval = addr;
return (0);
@@ -746,7 +844,7 @@ static int
dtrace_vcanload(void *src, dtrace_diftype_t *type, dtrace_mstate_t *mstate,
dtrace_vstate_t *vstate)
{
- size_t sz;
+ size_t sz, strsize;
ASSERT(type->dtdt_flags & DIF_TF_BYREF);
/*
@@ -756,11 +854,24 @@ dtrace_vcanload(void *src, dtrace_diftype_t *type, dtrace_mstate_t *mstate,
if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0)
return (1);
- if (type->dtdt_kind == DIF_TYPE_STRING)
- sz = dtrace_strlen(src,
- vstate->dtvs_state->dts_options[DTRACEOPT_STRSIZE]) + 1;
- else
+ if (type->dtdt_kind == DIF_TYPE_STRING) {
+ dtrace_state_t *state = vstate->dtvs_state;
+
+ if (state != NULL) {
+ strsize = state->dts_options[DTRACEOPT_STRSIZE];
+ } else {
+ /*
+ * In helper context, we have a NULL state; fall back
+ * to using the system-wide default for the string size
+ * in this case.
+ */
+ strsize = dtrace_strsize_default;
+ }
+
+ sz = dtrace_strlen(src, strsize) + 1;
+ } else {
sz = type->dtdt_size;
+ }
return (dtrace_canload((uintptr_t)src, sz, mstate, vstate));
}
@@ -1085,8 +1196,7 @@ dtrace_priv_proc_common_zone(dtrace_state_t *state)
*/
ASSERT(s_cr != NULL);
- if ((cr = CRED()) != NULL &&
- s_cr->cr_zone == cr->cr_zone)
+ if ((cr = CRED()) != NULL && s_cr->cr_zone == cr->cr_zone)
return (1);
return (0);
@@ -1209,19 +1319,17 @@ dtrace_priv_probe(dtrace_state_t *state, dtrace_mstate_t *mstate,
mode = pops->dtps_mode(prov->dtpv_arg,
probe->dtpr_id, probe->dtpr_arg);
- ASSERT((mode & DTRACE_MODE_USER) ||
- (mode & DTRACE_MODE_KERNEL));
- ASSERT((mode & DTRACE_MODE_NOPRIV_RESTRICT) ||
- (mode & DTRACE_MODE_NOPRIV_DROP));
+ ASSERT(mode & (DTRACE_MODE_USER | DTRACE_MODE_KERNEL));
+ ASSERT(mode & (DTRACE_MODE_NOPRIV_RESTRICT |
+ DTRACE_MODE_NOPRIV_DROP));
}
/*
* If the dte_cond bits indicate that this consumer is only allowed to
- * see user-mode firings of this probe, call the provider's dtps_mode()
- * entry point to check that the probe was fired while in a user
- * context. If that's not the case, use the policy specified by the
- * provider to determine if we drop the probe or merely restrict
- * operation.
+ * see user-mode firings of this probe, check that the probe was fired
+ * while in a user context. If that's not the case, use the policy
+ * specified by the provider to determine if we drop the probe or
+ * merely restrict operation.
*/
if (ecb->dte_cond & DTRACE_COND_USERMODE) {
ASSERT(mode != DTRACE_MODE_NOPRIV_DROP);
@@ -1288,6 +1396,15 @@ dtrace_priv_probe(dtrace_state_t *state, dtrace_mstate_t *mstate,
}
}
+ /*
+ * By merits of being in this code path at all, we have limited
+ * privileges. If the provider has indicated that limited privileges
+ * are to denote restricted operation, strip off the ability to access
+ * arguments.
+ */
+ if (mode & DTRACE_MODE_LIMITEDPRIV_RESTRICT)
+ mstate->dtms_access &= ~DTRACE_ACCESS_ARGS;
+
return (1);
}
@@ -2924,7 +3041,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
}
case DIF_VAR_CURTHREAD:
- if (!dtrace_priv_kernel(state))
+ if (!dtrace_priv_proc(state, mstate))
return (0);
return ((uint64_t)(uintptr_t)curthread);
@@ -4452,11 +4569,35 @@ case DIF_SUBR_GETMAJOR:
break;
}
+ case DIF_SUBR_GETF: {
+ uintptr_t fd = tupregs[0].dttk_value;
+ uf_info_t *finfo = &curthread->t_procp->p_user.u_finfo;
+ file_t *fp;
+
+ if (!dtrace_priv_proc(state, mstate)) {
+ regs[rd] = NULL;
+ break;
+ }
+
+ /*
+ * This is safe because fi_nfiles only increases, and the
+ * fi_list array is not freed when the array size doubles.
+ * (See the comment in flist_grow() for details on the
+ * management of the u_finfo structure.)
+ */
+ fp = fd < finfo->fi_nfiles ? finfo->fi_list[fd].uf_file : NULL;
+
+ mstate->dtms_getf = fp;
+ regs[rd] = (uintptr_t)fp;
+ break;
+ }
+
case DIF_SUBR_CLEANPATH: {
char *dest = (char *)mstate->dtms_scratch_ptr, c;
uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
uintptr_t src = tupregs[0].dttk_value;
int i = 0, j = 0;
+ zone_t *z;
if (!dtrace_strcanload(src, size, mstate, vstate)) {
regs[rd] = NULL;
@@ -4555,6 +4696,23 @@ next:
} while (c != '\0');
dest[j] = '\0';
+
+ if (mstate->dtms_getf != NULL &&
+ !(mstate->dtms_access & DTRACE_ACCESS_KERNEL) &&
+ (z = state->dts_cred.dcr_cred->cr_zone) != kcred->cr_zone) {
+ /*
+ * If we've done a getf() as a part of this ECB and we
+ * don't have kernel access (and we're not in the global
+ * zone), check if the path we cleaned up begins with
+ * the zone's root path, and trim it off if so. Note
+ * that this is an output cleanliness issue, not a
+ * security issue: knowing one's zone root path does
+ * not enable privilege escalation.
+ */
+ if (strstr(dest, z->zone_rootpath) == dest)
+ dest += strlen(z->zone_rootpath) - 1;
+ }
+
regs[rd] = (uintptr_t)dest;
mstate->dtms_scratch_ptr += size;
break;
@@ -4939,71 +5097,50 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate,
pc = DIF_INSTR_LABEL(instr);
break;
case DIF_OP_RLDSB:
- if (!dtrace_canstore(regs[r1], 1, mstate, vstate)) {
- *flags |= CPU_DTRACE_KPRIV;
- *illval = regs[r1];
+ if (!dtrace_canload(regs[r1], 1, mstate, vstate))
break;
- }
/*FALLTHROUGH*/
case DIF_OP_LDSB:
regs[rd] = (int8_t)dtrace_load8(regs[r1]);
break;
case DIF_OP_RLDSH:
- if (!dtrace_canstore(regs[r1], 2, mstate, vstate)) {
- *flags |= CPU_DTRACE_KPRIV;
- *illval = regs[r1];
+ if (!dtrace_canload(regs[r1], 2, mstate, vstate))
break;
- }
/*FALLTHROUGH*/
case DIF_OP_LDSH:
regs[rd] = (int16_t)dtrace_load16(regs[r1]);
break;
case DIF_OP_RLDSW:
- if (!dtrace_canstore(regs[r1], 4, mstate, vstate)) {
- *flags |= CPU_DTRACE_KPRIV;
- *illval = regs[r1];
+ if (!dtrace_canload(regs[r1], 4, mstate, vstate))
break;
- }
/*FALLTHROUGH*/
case DIF_OP_LDSW:
regs[rd] = (int32_t)dtrace_load32(regs[r1]);
break;
case DIF_OP_RLDUB:
- if (!dtrace_canstore(regs[r1], 1, mstate, vstate)) {
- *flags |= CPU_DTRACE_KPRIV;
- *illval = regs[r1];
+ if (!dtrace_canload(regs[r1], 1, mstate, vstate))
break;
- }
/*FALLTHROUGH*/
case DIF_OP_LDUB:
regs[rd] = dtrace_load8(regs[r1]);
break;
case DIF_OP_RLDUH:
- if (!dtrace_canstore(regs[r1], 2, mstate, vstate)) {
- *flags |= CPU_DTRACE_KPRIV;
- *illval = regs[r1];
+ if (!dtrace_canload(regs[r1], 2, mstate, vstate))
break;
- }
/*FALLTHROUGH*/
case DIF_OP_LDUH:
regs[rd] = dtrace_load16(regs[r1]);
break;
case DIF_OP_RLDUW:
- if (!dtrace_canstore(regs[r1], 4, mstate, vstate)) {
- *flags |= CPU_DTRACE_KPRIV;
- *illval = regs[r1];
+ if (!dtrace_canload(regs[r1], 4, mstate, vstate))
break;
- }
/*FALLTHROUGH*/
case DIF_OP_LDUW:
regs[rd] = dtrace_load32(regs[r1]);
break;
case DIF_OP_RLDX:
- if (!dtrace_canstore(regs[r1], 8, mstate, vstate)) {
- *flags |= CPU_DTRACE_KPRIV;
- *illval = regs[r1];
+ if (!dtrace_canload(regs[r1], 8, mstate, vstate))
break;
- }
/*FALLTHROUGH*/
case DIF_OP_LDX:
regs[rd] = dtrace_load64(regs[r1]);
@@ -5940,6 +6077,8 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
mstate.dtms_present = DTRACE_MSTATE_ARGS | DTRACE_MSTATE_PROBE;
mstate.dtms_access = DTRACE_ACCESS_ARGS | DTRACE_ACCESS_PROC;
+ mstate.dtms_getf = NULL;
+
*flags &= ~CPU_DTRACE_ERROR;
if (prov == dtrace_provider) {
@@ -6736,7 +6875,7 @@ dtrace_cred2priv(cred_t *cr, uint32_t *privp, uid_t *uidp, zoneid_t *zoneidp)
priv = DTRACE_PRIV_ALL;
} else {
*uidp = crgetuid(cr);
- *zoneidp = crgetzoneid(cr);
+ *zoneidp = crgetzonedid(cr);
priv = 0;
if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_KERNEL, B_FALSE))
@@ -7232,7 +7371,7 @@ dtrace_register(const char *name, const dtrace_pattr_t *pap, uint32_t priv,
provider->dtpv_priv.dtpp_flags = priv;
if (cr != NULL) {
provider->dtpv_priv.dtpp_uid = crgetuid(cr);
- provider->dtpv_priv.dtpp_zoneid = crgetzoneid(cr);
+ provider->dtpv_priv.dtpp_zoneid = crgetzonedid(cr);
}
provider->dtpv_pops = *pops;
@@ -7843,6 +7982,7 @@ dtrace_probe_enable(const dtrace_probedesc_t *desc, dtrace_enabling_t *enab)
uint32_t priv;
uid_t uid;
zoneid_t zoneid;
+ dtrace_state_t *state = enab->dten_vstate->dtvs_state;
ASSERT(MUTEX_HELD(&dtrace_lock));
dtrace_ecb_create_cache = NULL;
@@ -7857,8 +7997,22 @@ dtrace_probe_enable(const dtrace_probedesc_t *desc, dtrace_enabling_t *enab)
}
dtrace_probekey(desc, &pkey);
- dtrace_cred2priv(enab->dten_vstate->dtvs_state->dts_cred.dcr_cred,
- &priv, &uid, &zoneid);
+ dtrace_cred2priv(state->dts_cred.dcr_cred, &priv, &uid, &zoneid);
+
+ if ((priv & DTRACE_PRIV_ZONEOWNER) &&
+ state->dts_options[DTRACEOPT_ZONE] != DTRACEOPT_UNSET) {
+ /*
+ * If we have the privilege of instrumenting all zones but we
+ * have been told to instrument but one, we will spoof this up
+ * depriving ourselves of DTRACE_PRIV_ZONEOWNER for purposes
+ * of dtrace_match(). (Note that DTRACEOPT_ZONE is not for
+ * security but rather for performance: it allows the global
+ * zone to instrument USDT probes in a local zone without
+ * requiring all zones to be instrumented.)
+ */
+ priv &= ~DTRACE_PRIV_ZONEOWNER;
+ zoneid = state->dts_options[DTRACEOPT_ZONE];
+ }
return (dtrace_match(&pkey, priv, uid, zoneid, dtrace_ecb_create_enable,
enab));
@@ -8443,6 +8597,20 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs,
subr == DIF_SUBR_COPYOUTSTR) {
dp->dtdo_destructive = 1;
}
+
+ if (subr == DIF_SUBR_GETF) {
+ /*
+ * If we have a getf() we need to record that
+ * in our state. Note that our state can be
+ * NULL if this is a helper -- but in that
+ * case, the call to getf() is itself illegal,
+ * and will be caught (slightly later) when
+ * the helper is validated.
+ */
+ if (vstate->dtvs_state != NULL)
+ vstate->dtvs_state->dts_getf++;
+ }
+
break;
case DIF_OP_PUSHTR:
if (type != DIF_TYPE_STRING && type != DIF_TYPE_CTF)
@@ -13085,6 +13253,22 @@ dtrace_state_go(dtrace_state_t *state, processorid_t *cpu)
state->dts_activity = DTRACE_ACTIVITY_WARMUP;
+ if (state->dts_getf != 0 &&
+ !(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)) {
+ /*
+ * We don't have kernel privs but we have at least one call
+ * to getf(); we need to bump our zone's count, and (if
+ * this is the first enabling to have an unprivileged call
+ * to getf()) we need to hook into closef().
+ */
+ state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf++;
+
+ if (dtrace_getf++ == 0) {
+ ASSERT(dtrace_closef == NULL);
+ dtrace_closef = dtrace_getf_barrier;
+ }
+ }
+
/*
* Now it's time to actually fire the BEGIN probe. We need to disable
* interrupts here both to record the CPU on which we fired the BEGIN
@@ -13201,6 +13385,24 @@ dtrace_state_stop(dtrace_state_t *state, processorid_t *cpu)
state->dts_activity = DTRACE_ACTIVITY_STOPPED;
dtrace_sync();
+ if (state->dts_getf != 0 &&
+ !(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)) {
+ /*
+ * We don't have kernel privs but we have at least one call
+ * to getf(); we need to lower our zone's count, and (if
+ * this is the last enabling to have an unprivileged call
+ * to getf()) we need to clear the closef() hook.
+ */
+ ASSERT(state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf > 0);
+ ASSERT(dtrace_closef == dtrace_getf_barrier);
+ ASSERT(dtrace_getf > 0);
+
+ state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf--;
+
+ if (--dtrace_getf == 0)
+ dtrace_closef = NULL;
+ }
+
return (0);
}
@@ -13507,10 +13709,10 @@ dtrace_helper_trace(dtrace_helper_action_t *helper,
dtrace_mstate_t *mstate, dtrace_vstate_t *vstate, int where)
{
uint32_t size, next, nnext, i;
- dtrace_helptrace_t *ent;
+ dtrace_helptrace_t *ent, *buffer;
uint16_t flags = cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
- if (!dtrace_helptrace_enabled)
+ if ((buffer = dtrace_helptrace_buffer) == NULL)
return;
ASSERT(vstate->dtvs_nlocals <= dtrace_helptrace_nlocals);
@@ -13538,10 +13740,12 @@ dtrace_helper_trace(dtrace_helper_action_t *helper,
/*
* We have our slot; fill it in.
*/
- if (nnext == size)
+ if (nnext == size) {
+ dtrace_helptrace_wrapped++;
next = 0;
+ }
- ent = (dtrace_helptrace_t *)&dtrace_helptrace_buffer[next];
+ ent = (dtrace_helptrace_t *)((uintptr_t)buffer + next);
ent->dtht_helper = helper;
ent->dtht_where = where;
ent->dtht_nlocals = vstate->dtvs_nlocals;
@@ -13575,7 +13779,7 @@ dtrace_helper(int which, dtrace_mstate_t *mstate,
dtrace_helper_action_t *helper;
dtrace_vstate_t *vstate;
dtrace_difo_t *pred;
- int i, trace = dtrace_helptrace_enabled;
+ int i, trace = dtrace_helptrace_buffer != NULL;
ASSERT(which >= 0 && which < DTRACE_NHELPER_ACTIONS);
@@ -14761,6 +14965,23 @@ dtrace_toxrange_add(uintptr_t base, uintptr_t limit)
dtrace_toxranges++;
}
+static void
+dtrace_getf_barrier()
+{
+ /*
+ * When we have unprivileged (that is, non-DTRACE_CRV_KERNEL) enablings
+ * that contain calls to getf(), this routine will be called on every
+ * closef() before either the underlying vnode is released or the
+ * file_t itself is freed. By the time we are here, it is essential
+ * that the file_t can no longer be accessed from a call to getf()
+ * in probe context -- that assures that a dtrace_sync() can be used
+ * to clear out any enablings referring to the old structures.
+ */
+ if (curthread->t_procp->p_zone->zone_dtrace_getf != 0 ||
+ kcred->cr_zone->zone_dtrace_getf != 0)
+ dtrace_sync();
+}
+
/*
* DTrace Driver Cookbook Functions
*/
@@ -14875,17 +15096,6 @@ dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
mutex_exit(&cpu_lock);
/*
- * If DTrace helper tracing is enabled, we need to allocate the
- * trace buffer and initialize the values.
- */
- if (dtrace_helptrace_enabled) {
- ASSERT(dtrace_helptrace_buffer == NULL);
- dtrace_helptrace_buffer =
- kmem_zalloc(dtrace_helptrace_bufsize, KM_SLEEP);
- dtrace_helptrace_next = 0;
- }
-
- /*
* If there are already providers, we must ask them to provide their
* probes, and then match any anonymous enabling against them. Note
* that there should be no other retained enablings at this time:
@@ -14981,6 +15191,18 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
return (EBUSY);
}
+ if (dtrace_helptrace_enable && dtrace_helptrace_buffer == NULL) {
+ /*
+ * If DTrace helper tracing is enabled, we need to allocate the
+ * trace buffer and initialize the values.
+ */
+ dtrace_helptrace_buffer =
+ kmem_zalloc(dtrace_helptrace_bufsize, KM_SLEEP);
+ dtrace_helptrace_next = 0;
+ dtrace_helptrace_wrapped = 0;
+ dtrace_helptrace_enable = 0;
+ }
+
state = dtrace_state_create(devp, cred_p);
mutex_exit(&cpu_lock);
@@ -15002,6 +15224,7 @@ dtrace_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
{
minor_t minor = getminor(dev);
dtrace_state_t *state;
+ dtrace_helptrace_t *buf = NULL;
if (minor == DTRACEMNRN_HELPER)
return (0);
@@ -15019,6 +15242,18 @@ dtrace_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
dtrace_state_destroy(state->dts_anon);
}
+ if (dtrace_helptrace_disable) {
+ /*
+ * If we have been told to disable helper tracing, set the
+ * buffer to NULL before calling into dtrace_state_destroy();
+ * we take advantage of its dtrace_sync() to know that no
+ * CPU is in probe context with enabled helper tracing
+ * after it returns.
+ */
+ buf = dtrace_helptrace_buffer;
+ dtrace_helptrace_buffer = NULL;
+ }
+
dtrace_state_destroy(state);
ASSERT(dtrace_opens > 0);
@@ -15029,6 +15264,11 @@ dtrace_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL)
(void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
+ if (buf != NULL) {
+ kmem_free(buf, dtrace_helptrace_bufsize);
+ dtrace_helptrace_disable = 0;
+ }
+
mutex_exit(&dtrace_lock);
mutex_exit(&cpu_lock);
@@ -15917,12 +16157,10 @@ dtrace_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
dtrace_modload = NULL;
dtrace_modunload = NULL;
- mutex_exit(&cpu_lock);
+ ASSERT(dtrace_getf == 0);
+ ASSERT(dtrace_closef == NULL);
- if (dtrace_helptrace_enabled) {
- kmem_free(dtrace_helptrace_buffer, dtrace_helptrace_bufsize);
- dtrace_helptrace_buffer = NULL;
- }
+ mutex_exit(&cpu_lock);
kmem_free(dtrace_probes, dtrace_nprobes * sizeof (dtrace_probe_t *));
dtrace_probes = NULL;