diff options
Diffstat (limited to 'usr/src/uts/common/dtrace/dtrace.c')
-rw-r--r-- | usr/src/uts/common/dtrace/dtrace.c | 410 |
1 files changed, 324 insertions, 86 deletions
diff --git a/usr/src/uts/common/dtrace/dtrace.c b/usr/src/uts/common/dtrace/dtrace.c index 5013661588..8ef84d1322 100644 --- a/usr/src/uts/common/dtrace/dtrace.c +++ b/usr/src/uts/common/dtrace/dtrace.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, Joyent, Inc. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. */ @@ -116,7 +116,7 @@ int dtrace_destructive_disallow = 0; dtrace_optval_t dtrace_nonroot_maxsize = (16 * 1024 * 1024); size_t dtrace_difo_maxsize = (256 * 1024); -dtrace_optval_t dtrace_dof_maxsize = (256 * 1024); +dtrace_optval_t dtrace_dof_maxsize = (8 * 1024 * 1024); size_t dtrace_global_maxsize = (16 * 1024); size_t dtrace_actions_max = (16 * 1024); size_t dtrace_retain_max = 1024; @@ -171,6 +171,7 @@ static dtrace_provider_t *dtrace_provider; /* provider list */ static dtrace_meta_t *dtrace_meta_pid; /* user-land meta provider */ static int dtrace_opens; /* number of opens */ static int dtrace_helpers; /* number of helpers */ +static int dtrace_getf; /* number of unpriv getf()s */ static void *dtrace_softstate; /* softstate pointer */ static dtrace_hash_t *dtrace_bymod; /* probes hashed by module */ static dtrace_hash_t *dtrace_byfunc; /* probes hashed by function */ @@ -267,17 +268,22 @@ dtrace_id_t dtrace_probeid_error; /* special ERROR probe */ /* * DTrace Helper Tracing Variables + * + * These variables should be set dynamically to enable helper tracing. The + * only variables that should be set are dtrace_helptrace_enable (which should + * be set to a non-zero value to allocate helper tracing buffers on the next + * open of /dev/dtrace) and dtrace_helptrace_disable (which should be set to a + * non-zero value to deallocate helper tracing buffers on the next close of + * /dev/dtrace). When (and only when) helper tracing is disabled, the + * buffer size may also be set via dtrace_helptrace_bufsize. */ -uint32_t dtrace_helptrace_next = 0; -uint32_t dtrace_helptrace_nlocals; -char *dtrace_helptrace_buffer; -int dtrace_helptrace_bufsize = 512 * 1024; - -#ifdef DEBUG -int dtrace_helptrace_enabled = 1; -#else -int dtrace_helptrace_enabled = 0; -#endif +int dtrace_helptrace_enable = 0; +int dtrace_helptrace_disable = 0; +int dtrace_helptrace_bufsize = 16 * 1024 * 1024; +uint32_t dtrace_helptrace_nlocals; +static dtrace_helptrace_t *dtrace_helptrace_buffer; +static uint32_t dtrace_helptrace_next = 0; +static int dtrace_helptrace_wrapped = 0; /* * DTrace Error Hashing @@ -373,8 +379,8 @@ static kmutex_t dtrace_errlock; * disallow all negative sizes. Ranges of size 0 are allowed. */ #define DTRACE_INRANGE(testaddr, testsz, baseaddr, basesz) \ - ((testaddr) - (baseaddr) < (basesz) && \ - (testaddr) + (testsz) - (baseaddr) <= (basesz) && \ + ((testaddr) - (uintptr_t)(baseaddr) < (basesz) && \ + (testaddr) + (testsz) - (uintptr_t)(baseaddr) <= (basesz) && \ (testaddr) + (testsz) >= (testaddr)) /* @@ -475,6 +481,8 @@ static int dtrace_state_option(dtrace_state_t *, dtrace_optid_t, dtrace_optval_t); static int dtrace_ecb_create_enable(dtrace_probe_t *, void *); static void dtrace_helper_provider_destroy(dtrace_helper_provider_t *); +static int dtrace_priv_proc(dtrace_state_t *, dtrace_mstate_t *); +static void dtrace_getf_barrier(void); /* * DTrace Probe Context Functions @@ -619,7 +627,7 @@ dtrace_canstore(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, * up both thread-local variables and any global dynamically-allocated * variables. */ - if (DTRACE_INRANGE(addr, sz, (uintptr_t)vstate->dtvs_dynvars.dtds_base, + if (DTRACE_INRANGE(addr, sz, vstate->dtvs_dynvars.dtds_base, vstate->dtvs_dynvars.dtds_size)) { dtrace_dstate_t *dstate = &vstate->dtvs_dynvars; uintptr_t base = (uintptr_t)dstate->dtds_base + @@ -686,6 +694,7 @@ dtrace_canload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) { volatile uintptr_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval; + file_t *fp; /* * If we hold the privilege to read from kernel memory, then @@ -703,10 +712,99 @@ dtrace_canload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, /* * We're allowed to read from our own string table. */ - if (DTRACE_INRANGE(addr, sz, (uintptr_t)mstate->dtms_difo->dtdo_strtab, + if (DTRACE_INRANGE(addr, sz, mstate->dtms_difo->dtdo_strtab, mstate->dtms_difo->dtdo_strlen)) return (1); + if (vstate->dtvs_state != NULL && + dtrace_priv_proc(vstate->dtvs_state, mstate)) { + proc_t *p; + + /* + * When we have privileges to the current process, there are + * several context-related kernel structures that are safe to + * read, even absent the privilege to read from kernel memory. + * These reads are safe because these structures contain only + * state that (1) we're permitted to read, (2) is harmless or + * (3) contains pointers to additional kernel state that we're + * not permitted to read (and as such, do not present an + * opportunity for privilege escalation). Finally (and + * critically), because of the nature of their relation with + * the current thread context, the memory associated with these + * structures cannot change over the duration of probe context, + * and it is therefore impossible for this memory to be + * deallocated and reallocated as something else while it's + * being operated upon. + */ + if (DTRACE_INRANGE(addr, sz, curthread, sizeof (kthread_t))) + return (1); + + if ((p = curthread->t_procp) != NULL && DTRACE_INRANGE(addr, + sz, curthread->t_procp, sizeof (proc_t))) { + return (1); + } + + if (curthread->t_cred != NULL && DTRACE_INRANGE(addr, sz, + curthread->t_cred, sizeof (cred_t))) { + return (1); + } + + if (p != NULL && p->p_pidp != NULL && DTRACE_INRANGE(addr, sz, + &(p->p_pidp->pid_id), sizeof (pid_t))) { + return (1); + } + + if (curthread->t_cpu != NULL && DTRACE_INRANGE(addr, sz, + curthread->t_cpu, offsetof(cpu_t, cpu_pause_thread))) { + return (1); + } + } + + if ((fp = mstate->dtms_getf) != NULL) { + uintptr_t psz = sizeof (void *); + vnode_t *vp; + vnodeops_t *op; + + /* + * When getf() returns a file_t, the enabling is implicitly + * granted the (transient) right to read the returned file_t + * as well as the v_path and v_op->vnop_name of the underlying + * vnode. These accesses are allowed after a successful + * getf() because the members that they refer to cannot change + * once set -- and the barrier logic in the kernel's closef() + * path assures that the file_t and its referenced vode_t + * cannot themselves be stale (that is, it impossible for + * either dtms_getf itself or its f_vnode member to reference + * freed memory). + */ + if (DTRACE_INRANGE(addr, sz, fp, sizeof (file_t))) + return (1); + + if ((vp = fp->f_vnode) != NULL) { + if (DTRACE_INRANGE(addr, sz, &vp->v_path, psz)) + return (1); + + if (vp->v_path != NULL && DTRACE_INRANGE(addr, sz, + vp->v_path, strlen(vp->v_path) + 1)) { + return (1); + } + + if (DTRACE_INRANGE(addr, sz, &vp->v_op, psz)) + return (1); + + if ((op = vp->v_op) != NULL && + DTRACE_INRANGE(addr, sz, &op->vnop_name, psz)) { + return (1); + } + + if (op != NULL && op->vnop_name != NULL && + DTRACE_INRANGE(addr, sz, op->vnop_name, + strlen(op->vnop_name) + 1)) { + return (1); + } + } + } + DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV); *illval = addr; return (0); @@ -746,7 +844,7 @@ static int dtrace_vcanload(void *src, dtrace_diftype_t *type, dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) { - size_t sz; + size_t sz, strsize; ASSERT(type->dtdt_flags & DIF_TF_BYREF); /* @@ -756,11 +854,24 @@ dtrace_vcanload(void *src, dtrace_diftype_t *type, dtrace_mstate_t *mstate, if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) return (1); - if (type->dtdt_kind == DIF_TYPE_STRING) - sz = dtrace_strlen(src, - vstate->dtvs_state->dts_options[DTRACEOPT_STRSIZE]) + 1; - else + if (type->dtdt_kind == DIF_TYPE_STRING) { + dtrace_state_t *state = vstate->dtvs_state; + + if (state != NULL) { + strsize = state->dts_options[DTRACEOPT_STRSIZE]; + } else { + /* + * In helper context, we have a NULL state; fall back + * to using the system-wide default for the string size + * in this case. + */ + strsize = dtrace_strsize_default; + } + + sz = dtrace_strlen(src, strsize) + 1; + } else { sz = type->dtdt_size; + } return (dtrace_canload((uintptr_t)src, sz, mstate, vstate)); } @@ -1085,8 +1196,7 @@ dtrace_priv_proc_common_zone(dtrace_state_t *state) */ ASSERT(s_cr != NULL); - if ((cr = CRED()) != NULL && - s_cr->cr_zone == cr->cr_zone) + if ((cr = CRED()) != NULL && s_cr->cr_zone == cr->cr_zone) return (1); return (0); @@ -1209,19 +1319,17 @@ dtrace_priv_probe(dtrace_state_t *state, dtrace_mstate_t *mstate, mode = pops->dtps_mode(prov->dtpv_arg, probe->dtpr_id, probe->dtpr_arg); - ASSERT((mode & DTRACE_MODE_USER) || - (mode & DTRACE_MODE_KERNEL)); - ASSERT((mode & DTRACE_MODE_NOPRIV_RESTRICT) || - (mode & DTRACE_MODE_NOPRIV_DROP)); + ASSERT(mode & (DTRACE_MODE_USER | DTRACE_MODE_KERNEL)); + ASSERT(mode & (DTRACE_MODE_NOPRIV_RESTRICT | + DTRACE_MODE_NOPRIV_DROP)); } /* * If the dte_cond bits indicate that this consumer is only allowed to - * see user-mode firings of this probe, call the provider's dtps_mode() - * entry point to check that the probe was fired while in a user - * context. If that's not the case, use the policy specified by the - * provider to determine if we drop the probe or merely restrict - * operation. + * see user-mode firings of this probe, check that the probe was fired + * while in a user context. If that's not the case, use the policy + * specified by the provider to determine if we drop the probe or + * merely restrict operation. */ if (ecb->dte_cond & DTRACE_COND_USERMODE) { ASSERT(mode != DTRACE_MODE_NOPRIV_DROP); @@ -1288,6 +1396,15 @@ dtrace_priv_probe(dtrace_state_t *state, dtrace_mstate_t *mstate, } } + /* + * By merits of being in this code path at all, we have limited + * privileges. If the provider has indicated that limited privileges + * are to denote restricted operation, strip off the ability to access + * arguments. + */ + if (mode & DTRACE_MODE_LIMITEDPRIV_RESTRICT) + mstate->dtms_access &= ~DTRACE_ACCESS_ARGS; + return (1); } @@ -2924,7 +3041,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, } case DIF_VAR_CURTHREAD: - if (!dtrace_priv_kernel(state)) + if (!dtrace_priv_proc(state, mstate)) return (0); return ((uint64_t)(uintptr_t)curthread); @@ -4452,11 +4569,35 @@ case DIF_SUBR_GETMAJOR: break; } + case DIF_SUBR_GETF: { + uintptr_t fd = tupregs[0].dttk_value; + uf_info_t *finfo = &curthread->t_procp->p_user.u_finfo; + file_t *fp; + + if (!dtrace_priv_proc(state, mstate)) { + regs[rd] = NULL; + break; + } + + /* + * This is safe because fi_nfiles only increases, and the + * fi_list array is not freed when the array size doubles. + * (See the comment in flist_grow() for details on the + * management of the u_finfo structure.) + */ + fp = fd < finfo->fi_nfiles ? finfo->fi_list[fd].uf_file : NULL; + + mstate->dtms_getf = fp; + regs[rd] = (uintptr_t)fp; + break; + } + case DIF_SUBR_CLEANPATH: { char *dest = (char *)mstate->dtms_scratch_ptr, c; uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; uintptr_t src = tupregs[0].dttk_value; int i = 0, j = 0; + zone_t *z; if (!dtrace_strcanload(src, size, mstate, vstate)) { regs[rd] = NULL; @@ -4555,6 +4696,23 @@ next: } while (c != '\0'); dest[j] = '\0'; + + if (mstate->dtms_getf != NULL && + !(mstate->dtms_access & DTRACE_ACCESS_KERNEL) && + (z = state->dts_cred.dcr_cred->cr_zone) != kcred->cr_zone) { + /* + * If we've done a getf() as a part of this ECB and we + * don't have kernel access (and we're not in the global + * zone), check if the path we cleaned up begins with + * the zone's root path, and trim it off if so. Note + * that this is an output cleanliness issue, not a + * security issue: knowing one's zone root path does + * not enable privilege escalation. + */ + if (strstr(dest, z->zone_rootpath) == dest) + dest += strlen(z->zone_rootpath) - 1; + } + regs[rd] = (uintptr_t)dest; mstate->dtms_scratch_ptr += size; break; @@ -4939,71 +5097,50 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, pc = DIF_INSTR_LABEL(instr); break; case DIF_OP_RLDSB: - if (!dtrace_canstore(regs[r1], 1, mstate, vstate)) { - *flags |= CPU_DTRACE_KPRIV; - *illval = regs[r1]; + if (!dtrace_canload(regs[r1], 1, mstate, vstate)) break; - } /*FALLTHROUGH*/ case DIF_OP_LDSB: regs[rd] = (int8_t)dtrace_load8(regs[r1]); break; case DIF_OP_RLDSH: - if (!dtrace_canstore(regs[r1], 2, mstate, vstate)) { - *flags |= CPU_DTRACE_KPRIV; - *illval = regs[r1]; + if (!dtrace_canload(regs[r1], 2, mstate, vstate)) break; - } /*FALLTHROUGH*/ case DIF_OP_LDSH: regs[rd] = (int16_t)dtrace_load16(regs[r1]); break; case DIF_OP_RLDSW: - if (!dtrace_canstore(regs[r1], 4, mstate, vstate)) { - *flags |= CPU_DTRACE_KPRIV; - *illval = regs[r1]; + if (!dtrace_canload(regs[r1], 4, mstate, vstate)) break; - } /*FALLTHROUGH*/ case DIF_OP_LDSW: regs[rd] = (int32_t)dtrace_load32(regs[r1]); break; case DIF_OP_RLDUB: - if (!dtrace_canstore(regs[r1], 1, mstate, vstate)) { - *flags |= CPU_DTRACE_KPRIV; - *illval = regs[r1]; + if (!dtrace_canload(regs[r1], 1, mstate, vstate)) break; - } /*FALLTHROUGH*/ case DIF_OP_LDUB: regs[rd] = dtrace_load8(regs[r1]); break; case DIF_OP_RLDUH: - if (!dtrace_canstore(regs[r1], 2, mstate, vstate)) { - *flags |= CPU_DTRACE_KPRIV; - *illval = regs[r1]; + if (!dtrace_canload(regs[r1], 2, mstate, vstate)) break; - } /*FALLTHROUGH*/ case DIF_OP_LDUH: regs[rd] = dtrace_load16(regs[r1]); break; case DIF_OP_RLDUW: - if (!dtrace_canstore(regs[r1], 4, mstate, vstate)) { - *flags |= CPU_DTRACE_KPRIV; - *illval = regs[r1]; + if (!dtrace_canload(regs[r1], 4, mstate, vstate)) break; - } /*FALLTHROUGH*/ case DIF_OP_LDUW: regs[rd] = dtrace_load32(regs[r1]); break; case DIF_OP_RLDX: - if (!dtrace_canstore(regs[r1], 8, mstate, vstate)) { - *flags |= CPU_DTRACE_KPRIV; - *illval = regs[r1]; + if (!dtrace_canload(regs[r1], 8, mstate, vstate)) break; - } /*FALLTHROUGH*/ case DIF_OP_LDX: regs[rd] = dtrace_load64(regs[r1]); @@ -5940,6 +6077,8 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, mstate.dtms_present = DTRACE_MSTATE_ARGS | DTRACE_MSTATE_PROBE; mstate.dtms_access = DTRACE_ACCESS_ARGS | DTRACE_ACCESS_PROC; + mstate.dtms_getf = NULL; + *flags &= ~CPU_DTRACE_ERROR; if (prov == dtrace_provider) { @@ -6736,7 +6875,7 @@ dtrace_cred2priv(cred_t *cr, uint32_t *privp, uid_t *uidp, zoneid_t *zoneidp) priv = DTRACE_PRIV_ALL; } else { *uidp = crgetuid(cr); - *zoneidp = crgetzoneid(cr); + *zoneidp = crgetzonedid(cr); priv = 0; if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_KERNEL, B_FALSE)) @@ -7232,7 +7371,7 @@ dtrace_register(const char *name, const dtrace_pattr_t *pap, uint32_t priv, provider->dtpv_priv.dtpp_flags = priv; if (cr != NULL) { provider->dtpv_priv.dtpp_uid = crgetuid(cr); - provider->dtpv_priv.dtpp_zoneid = crgetzoneid(cr); + provider->dtpv_priv.dtpp_zoneid = crgetzonedid(cr); } provider->dtpv_pops = *pops; @@ -7843,6 +7982,7 @@ dtrace_probe_enable(const dtrace_probedesc_t *desc, dtrace_enabling_t *enab) uint32_t priv; uid_t uid; zoneid_t zoneid; + dtrace_state_t *state = enab->dten_vstate->dtvs_state; ASSERT(MUTEX_HELD(&dtrace_lock)); dtrace_ecb_create_cache = NULL; @@ -7857,8 +7997,22 @@ dtrace_probe_enable(const dtrace_probedesc_t *desc, dtrace_enabling_t *enab) } dtrace_probekey(desc, &pkey); - dtrace_cred2priv(enab->dten_vstate->dtvs_state->dts_cred.dcr_cred, - &priv, &uid, &zoneid); + dtrace_cred2priv(state->dts_cred.dcr_cred, &priv, &uid, &zoneid); + + if ((priv & DTRACE_PRIV_ZONEOWNER) && + state->dts_options[DTRACEOPT_ZONE] != DTRACEOPT_UNSET) { + /* + * If we have the privilege of instrumenting all zones but we + * have been told to instrument but one, we will spoof this up + * depriving ourselves of DTRACE_PRIV_ZONEOWNER for purposes + * of dtrace_match(). (Note that DTRACEOPT_ZONE is not for + * security but rather for performance: it allows the global + * zone to instrument USDT probes in a local zone without + * requiring all zones to be instrumented.) + */ + priv &= ~DTRACE_PRIV_ZONEOWNER; + zoneid = state->dts_options[DTRACEOPT_ZONE]; + } return (dtrace_match(&pkey, priv, uid, zoneid, dtrace_ecb_create_enable, enab)); @@ -8443,6 +8597,20 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, subr == DIF_SUBR_COPYOUTSTR) { dp->dtdo_destructive = 1; } + + if (subr == DIF_SUBR_GETF) { + /* + * If we have a getf() we need to record that + * in our state. Note that our state can be + * NULL if this is a helper -- but in that + * case, the call to getf() is itself illegal, + * and will be caught (slightly later) when + * the helper is validated. + */ + if (vstate->dtvs_state != NULL) + vstate->dtvs_state->dts_getf++; + } + break; case DIF_OP_PUSHTR: if (type != DIF_TYPE_STRING && type != DIF_TYPE_CTF) @@ -13085,6 +13253,22 @@ dtrace_state_go(dtrace_state_t *state, processorid_t *cpu) state->dts_activity = DTRACE_ACTIVITY_WARMUP; + if (state->dts_getf != 0 && + !(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)) { + /* + * We don't have kernel privs but we have at least one call + * to getf(); we need to bump our zone's count, and (if + * this is the first enabling to have an unprivileged call + * to getf()) we need to hook into closef(). + */ + state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf++; + + if (dtrace_getf++ == 0) { + ASSERT(dtrace_closef == NULL); + dtrace_closef = dtrace_getf_barrier; + } + } + /* * Now it's time to actually fire the BEGIN probe. We need to disable * interrupts here both to record the CPU on which we fired the BEGIN @@ -13201,6 +13385,24 @@ dtrace_state_stop(dtrace_state_t *state, processorid_t *cpu) state->dts_activity = DTRACE_ACTIVITY_STOPPED; dtrace_sync(); + if (state->dts_getf != 0 && + !(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)) { + /* + * We don't have kernel privs but we have at least one call + * to getf(); we need to lower our zone's count, and (if + * this is the last enabling to have an unprivileged call + * to getf()) we need to clear the closef() hook. + */ + ASSERT(state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf > 0); + ASSERT(dtrace_closef == dtrace_getf_barrier); + ASSERT(dtrace_getf > 0); + + state->dts_cred.dcr_cred->cr_zone->zone_dtrace_getf--; + + if (--dtrace_getf == 0) + dtrace_closef = NULL; + } + return (0); } @@ -13507,10 +13709,10 @@ dtrace_helper_trace(dtrace_helper_action_t *helper, dtrace_mstate_t *mstate, dtrace_vstate_t *vstate, int where) { uint32_t size, next, nnext, i; - dtrace_helptrace_t *ent; + dtrace_helptrace_t *ent, *buffer; uint16_t flags = cpu_core[CPU->cpu_id].cpuc_dtrace_flags; - if (!dtrace_helptrace_enabled) + if ((buffer = dtrace_helptrace_buffer) == NULL) return; ASSERT(vstate->dtvs_nlocals <= dtrace_helptrace_nlocals); @@ -13538,10 +13740,12 @@ dtrace_helper_trace(dtrace_helper_action_t *helper, /* * We have our slot; fill it in. */ - if (nnext == size) + if (nnext == size) { + dtrace_helptrace_wrapped++; next = 0; + } - ent = (dtrace_helptrace_t *)&dtrace_helptrace_buffer[next]; + ent = (dtrace_helptrace_t *)((uintptr_t)buffer + next); ent->dtht_helper = helper; ent->dtht_where = where; ent->dtht_nlocals = vstate->dtvs_nlocals; @@ -13575,7 +13779,7 @@ dtrace_helper(int which, dtrace_mstate_t *mstate, dtrace_helper_action_t *helper; dtrace_vstate_t *vstate; dtrace_difo_t *pred; - int i, trace = dtrace_helptrace_enabled; + int i, trace = dtrace_helptrace_buffer != NULL; ASSERT(which >= 0 && which < DTRACE_NHELPER_ACTIONS); @@ -14761,6 +14965,23 @@ dtrace_toxrange_add(uintptr_t base, uintptr_t limit) dtrace_toxranges++; } +static void +dtrace_getf_barrier() +{ + /* + * When we have unprivileged (that is, non-DTRACE_CRV_KERNEL) enablings + * that contain calls to getf(), this routine will be called on every + * closef() before either the underlying vnode is released or the + * file_t itself is freed. By the time we are here, it is essential + * that the file_t can no longer be accessed from a call to getf() + * in probe context -- that assures that a dtrace_sync() can be used + * to clear out any enablings referring to the old structures. + */ + if (curthread->t_procp->p_zone->zone_dtrace_getf != 0 || + kcred->cr_zone->zone_dtrace_getf != 0) + dtrace_sync(); +} + /* * DTrace Driver Cookbook Functions */ @@ -14875,17 +15096,6 @@ dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) mutex_exit(&cpu_lock); /* - * If DTrace helper tracing is enabled, we need to allocate the - * trace buffer and initialize the values. - */ - if (dtrace_helptrace_enabled) { - ASSERT(dtrace_helptrace_buffer == NULL); - dtrace_helptrace_buffer = - kmem_zalloc(dtrace_helptrace_bufsize, KM_SLEEP); - dtrace_helptrace_next = 0; - } - - /* * If there are already providers, we must ask them to provide their * probes, and then match any anonymous enabling against them. Note * that there should be no other retained enablings at this time: @@ -14981,6 +15191,18 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) return (EBUSY); } + if (dtrace_helptrace_enable && dtrace_helptrace_buffer == NULL) { + /* + * If DTrace helper tracing is enabled, we need to allocate the + * trace buffer and initialize the values. + */ + dtrace_helptrace_buffer = + kmem_zalloc(dtrace_helptrace_bufsize, KM_SLEEP); + dtrace_helptrace_next = 0; + dtrace_helptrace_wrapped = 0; + dtrace_helptrace_enable = 0; + } + state = dtrace_state_create(devp, cred_p); mutex_exit(&cpu_lock); @@ -15002,6 +15224,7 @@ dtrace_close(dev_t dev, int flag, int otyp, cred_t *cred_p) { minor_t minor = getminor(dev); dtrace_state_t *state; + dtrace_helptrace_t *buf = NULL; if (minor == DTRACEMNRN_HELPER) return (0); @@ -15019,6 +15242,18 @@ dtrace_close(dev_t dev, int flag, int otyp, cred_t *cred_p) dtrace_state_destroy(state->dts_anon); } + if (dtrace_helptrace_disable) { + /* + * If we have been told to disable helper tracing, set the + * buffer to NULL before calling into dtrace_state_destroy(); + * we take advantage of its dtrace_sync() to know that no + * CPU is in probe context with enabled helper tracing + * after it returns. + */ + buf = dtrace_helptrace_buffer; + dtrace_helptrace_buffer = NULL; + } + dtrace_state_destroy(state); ASSERT(dtrace_opens > 0); @@ -15029,6 +15264,11 @@ dtrace_close(dev_t dev, int flag, int otyp, cred_t *cred_p) if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL) (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE); + if (buf != NULL) { + kmem_free(buf, dtrace_helptrace_bufsize); + dtrace_helptrace_disable = 0; + } + mutex_exit(&dtrace_lock); mutex_exit(&cpu_lock); @@ -15917,12 +16157,10 @@ dtrace_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) dtrace_modload = NULL; dtrace_modunload = NULL; - mutex_exit(&cpu_lock); + ASSERT(dtrace_getf == 0); + ASSERT(dtrace_closef == NULL); - if (dtrace_helptrace_enabled) { - kmem_free(dtrace_helptrace_buffer, dtrace_helptrace_bufsize); - dtrace_helptrace_buffer = NULL; - } + mutex_exit(&cpu_lock); kmem_free(dtrace_probes, dtrace_nprobes * sizeof (dtrace_probe_t *)); dtrace_probes = NULL; |