summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/dtrace/dtrace.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/dtrace/dtrace.c')
-rw-r--r--usr/src/uts/common/dtrace/dtrace.c227
1 files changed, 127 insertions, 100 deletions
diff --git a/usr/src/uts/common/dtrace/dtrace.c b/usr/src/uts/common/dtrace/dtrace.c
index 8538331d59..32d0967772 100644
--- a/usr/src/uts/common/dtrace/dtrace.c
+++ b/usr/src/uts/common/dtrace/dtrace.c
@@ -22,6 +22,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
*/
/*
@@ -2513,9 +2514,10 @@ dtrace_speculation_commit(dtrace_state_t *state, processorid_t cpu,
{
dtrace_speculation_t *spec;
dtrace_buffer_t *src, *dest;
- uintptr_t daddr, saddr, dlimit;
+ uintptr_t daddr, saddr, dlimit, slimit;
dtrace_speculation_state_t current, new;
intptr_t offs;
+ uint64_t timestamp;
if (which == 0)
return;
@@ -2591,7 +2593,37 @@ dtrace_speculation_commit(dtrace_state_t *state, processorid_t cpu,
}
/*
- * We have the space; copy the buffer across. (Note that this is a
+ * We have sufficient space to copy the speculative buffer into the
+ * primary buffer. First, modify the speculative buffer, filling
+ * in the timestamp of all entries with the current time. The data
+ * must have the commit() time rather than the time it was traced,
+ * so that all entries in the primary buffer are in timestamp order.
+ */
+ timestamp = dtrace_gethrtime();
+ saddr = (uintptr_t)src->dtb_tomax;
+ slimit = saddr + src->dtb_offset;
+ while (saddr < slimit) {
+ size_t size;
+ dtrace_rechdr_t *dtrh = (dtrace_rechdr_t *)saddr;
+
+ if (dtrh->dtrh_epid == DTRACE_EPIDNONE) {
+ saddr += sizeof (dtrace_epid_t);
+ continue;
+ }
+ ASSERT3U(dtrh->dtrh_epid, <=, state->dts_necbs);
+ size = state->dts_ecbs[dtrh->dtrh_epid - 1]->dte_size;
+
+ ASSERT3U(saddr + size, <=, slimit);
+ ASSERT3U(size, >=, sizeof (dtrace_rechdr_t));
+ ASSERT3U(DTRACE_RECORD_LOAD_TIMESTAMP(dtrh), ==, UINT64_MAX);
+
+ DTRACE_RECORD_STORE_TIMESTAMP(dtrh, timestamp);
+
+ saddr += size;
+ }
+
+ /*
+ * Copy the buffer across. (Note that this is a
* highly subobtimal bcopy(); in the unlikely event that this becomes
* a serious performance issue, a high-performance DTrace-specific
* bcopy() should obviously be invented.)
@@ -6085,7 +6117,7 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
if (now - state->dts_alive > dtrace_deadman_timeout) {
/*
* We seem to be dead. Unless we (a) have kernel
- * destructive permissions (b) have expicitly enabled
+ * destructive permissions (b) have explicitly enabled
* destructive actions and (c) destructive actions have
* not been disabled, we're going to transition into
* the KILLED state, from which no further processing
@@ -6113,8 +6145,18 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
tomax = buf->dtb_tomax;
ASSERT(tomax != NULL);
- if (ecb->dte_size != 0)
- DTRACE_STORE(uint32_t, tomax, offs, ecb->dte_epid);
+ if (ecb->dte_size != 0) {
+ dtrace_rechdr_t dtrh;
+ if (!(mstate.dtms_present & DTRACE_MSTATE_TIMESTAMP)) {
+ mstate.dtms_timestamp = dtrace_gethrtime();
+ mstate.dtms_present |= DTRACE_MSTATE_TIMESTAMP;
+ }
+ ASSERT3U(ecb->dte_size, >=, sizeof (dtrace_rechdr_t));
+ dtrh.dtrh_epid = ecb->dte_epid;
+ DTRACE_RECORD_STORE_TIMESTAMP(&dtrh,
+ mstate.dtms_timestamp);
+ *((dtrace_rechdr_t *)(tomax + offs)) = dtrh;
+ }
mstate.dtms_epid = ecb->dte_epid;
mstate.dtms_present |= DTRACE_MSTATE_EPID;
@@ -6278,7 +6320,9 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
continue;
switch (act->dta_kind) {
- case DTRACEACT_SPECULATE:
+ case DTRACEACT_SPECULATE: {
+ dtrace_rechdr_t *dtrh;
+
ASSERT(buf == &state->dts_buffer[cpuid]);
buf = dtrace_speculation_buffer(state,
cpuid, val);
@@ -6300,10 +6344,23 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
tomax = buf->dtb_tomax;
ASSERT(tomax != NULL);
- if (ecb->dte_size != 0)
- DTRACE_STORE(uint32_t, tomax, offs,
- ecb->dte_epid);
+ if (ecb->dte_size == 0)
+ continue;
+
+ ASSERT3U(ecb->dte_size, >=,
+ sizeof (dtrace_rechdr_t));
+ dtrh = ((void *)(tomax + offs));
+ dtrh->dtrh_epid = ecb->dte_epid;
+ /*
+ * When the speculation is committed, all of
+ * the records in the speculative buffer will
+ * have their timestamps set to the commit
+ * time. Until then, it is set to a sentinel
+ * value, for debugability.
+ */
+ DTRACE_RECORD_STORE_TIMESTAMP(dtrh, UINT64_MAX);
continue;
+ }
case DTRACEACT_CHILL:
if (dtrace_priv_kernel_destructive(state))
@@ -9532,9 +9589,9 @@ dtrace_ecb_add(dtrace_state_t *state, dtrace_probe_t *probe)
/*
* The default size is the size of the default action: recording
- * the epid.
+ * the header.
*/
- ecb->dte_size = ecb->dte_needed = sizeof (dtrace_epid_t);
+ ecb->dte_size = ecb->dte_needed = sizeof (dtrace_rechdr_t);
ecb->dte_alignment = sizeof (dtrace_epid_t);
epid = state->dts_epid++;
@@ -9633,122 +9690,89 @@ dtrace_ecb_enable(dtrace_ecb_t *ecb)
static void
dtrace_ecb_resize(dtrace_ecb_t *ecb)
{
- uint32_t maxalign = sizeof (dtrace_epid_t);
- uint32_t align = sizeof (uint8_t), offs, diff;
dtrace_action_t *act;
- int wastuple = 0;
+ uint32_t curneeded = UINT32_MAX;
uint32_t aggbase = UINT32_MAX;
- dtrace_state_t *state = ecb->dte_state;
/*
- * If we record anything, we always record the epid. (And we always
- * record it first.)
+ * If we record anything, we always record the dtrace_rechdr_t. (And
+ * we always record it first.)
*/
- offs = sizeof (dtrace_epid_t);
- ecb->dte_size = ecb->dte_needed = sizeof (dtrace_epid_t);
+ ecb->dte_size = sizeof (dtrace_rechdr_t);
+ ecb->dte_alignment = sizeof (dtrace_epid_t);
for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
dtrace_recdesc_t *rec = &act->dta_rec;
+ ASSERT(rec->dtrd_size > 0 || rec->dtrd_alignment == 1);
- if ((align = rec->dtrd_alignment) > maxalign)
- maxalign = align;
-
- if (!wastuple && act->dta_intuple) {
- /*
- * This is the first record in a tuple. Align the
- * offset to be at offset 4 in an 8-byte aligned
- * block.
- */
- diff = offs + sizeof (dtrace_aggid_t);
-
- if (diff = (diff & (sizeof (uint64_t) - 1)))
- offs += sizeof (uint64_t) - diff;
-
- aggbase = offs - sizeof (dtrace_aggid_t);
- ASSERT(!(aggbase & (sizeof (uint64_t) - 1)));
- }
-
- /*LINTED*/
- if (rec->dtrd_size != 0 && (diff = (offs & (align - 1)))) {
- /*
- * The current offset is not properly aligned; align it.
- */
- offs += align - diff;
- }
-
- rec->dtrd_offset = offs;
-
- if (offs + rec->dtrd_size > ecb->dte_needed) {
- ecb->dte_needed = offs + rec->dtrd_size;
-
- if (ecb->dte_needed > state->dts_needed)
- state->dts_needed = ecb->dte_needed;
- }
+ ecb->dte_alignment = MAX(ecb->dte_alignment,
+ rec->dtrd_alignment);
if (DTRACEACT_ISAGG(act->dta_kind)) {
dtrace_aggregation_t *agg = (dtrace_aggregation_t *)act;
- dtrace_action_t *first = agg->dtag_first, *prev;
- ASSERT(rec->dtrd_size != 0 && first != NULL);
- ASSERT(wastuple);
+ ASSERT(rec->dtrd_size != 0);
+ ASSERT(agg->dtag_first != NULL);
+ ASSERT(act->dta_prev->dta_intuple);
ASSERT(aggbase != UINT32_MAX);
+ ASSERT(curneeded != UINT32_MAX);
agg->dtag_base = aggbase;
- while ((prev = first->dta_prev) != NULL &&
- DTRACEACT_ISAGG(prev->dta_kind)) {
- agg = (dtrace_aggregation_t *)prev;
- first = agg->dtag_first;
- }
+ curneeded = P2ROUNDUP(curneeded, rec->dtrd_alignment);
+ rec->dtrd_offset = curneeded;
+ curneeded += rec->dtrd_size;
+ ecb->dte_needed = MAX(ecb->dte_needed, curneeded);
- if (prev != NULL) {
- offs = prev->dta_rec.dtrd_offset +
- prev->dta_rec.dtrd_size;
- } else {
- offs = sizeof (dtrace_epid_t);
- }
- wastuple = 0;
+ aggbase = UINT32_MAX;
+ curneeded = UINT32_MAX;
+ } else if (act->dta_intuple) {
+ if (curneeded == UINT32_MAX) {
+ /*
+ * This is the first record in a tuple. Align
+ * curneeded to be at offset 4 in an 8-byte
+ * aligned block.
+ */
+ ASSERT(act->dta_prev == NULL ||
+ !act->dta_prev->dta_intuple);
+ ASSERT3U(aggbase, ==, UINT32_MAX);
+ curneeded = P2PHASEUP(ecb->dte_size,
+ sizeof (uint64_t), sizeof (dtrace_aggid_t));
+
+ aggbase = curneeded - sizeof (dtrace_aggid_t);
+ ASSERT(IS_P2ALIGNED(aggbase,
+ sizeof (uint64_t)));
+ }
+ curneeded = P2ROUNDUP(curneeded, rec->dtrd_alignment);
+ rec->dtrd_offset = curneeded;
+ curneeded += rec->dtrd_size;
} else {
- if (!act->dta_intuple)
- ecb->dte_size = offs + rec->dtrd_size;
+ /* tuples must be followed by an aggregation */
+ ASSERT(act->dta_prev == NULL ||
+ !act->dta_prev->dta_intuple);
- offs += rec->dtrd_size;
+ ecb->dte_size = P2ROUNDUP(ecb->dte_size,
+ rec->dtrd_alignment);
+ rec->dtrd_offset = ecb->dte_size;
+ ecb->dte_size += rec->dtrd_size;
+ ecb->dte_needed = MAX(ecb->dte_needed, ecb->dte_size);
}
-
- wastuple = act->dta_intuple;
}
if ((act = ecb->dte_action) != NULL &&
!(act->dta_kind == DTRACEACT_SPECULATE && act->dta_next == NULL) &&
- ecb->dte_size == sizeof (dtrace_epid_t)) {
+ ecb->dte_size == sizeof (dtrace_rechdr_t)) {
/*
- * If the size is still sizeof (dtrace_epid_t), then all
+ * If the size is still sizeof (dtrace_rechdr_t), then all
* actions store no data; set the size to 0.
*/
- ecb->dte_alignment = maxalign;
ecb->dte_size = 0;
-
- /*
- * If the needed space is still sizeof (dtrace_epid_t), then
- * all actions need no additional space; set the needed
- * size to 0.
- */
- if (ecb->dte_needed == sizeof (dtrace_epid_t))
- ecb->dte_needed = 0;
-
- return;
}
- /*
- * Set our alignment, and make sure that the dte_size and dte_needed
- * are aligned to the size of an EPID.
- */
- ecb->dte_alignment = maxalign;
- ecb->dte_size = (ecb->dte_size + (sizeof (dtrace_epid_t) - 1)) &
- ~(sizeof (dtrace_epid_t) - 1);
- ecb->dte_needed = (ecb->dte_needed + (sizeof (dtrace_epid_t) - 1)) &
- ~(sizeof (dtrace_epid_t) - 1);
- ASSERT(ecb->dte_size <= ecb->dte_needed);
+ ecb->dte_size = P2ROUNDUP(ecb->dte_size, sizeof (dtrace_epid_t));
+ ecb->dte_needed = P2ROUNDUP(ecb->dte_needed, (sizeof (dtrace_epid_t)));
+ ecb->dte_state->dts_needed = MAX(ecb->dte_state->dts_needed,
+ ecb->dte_needed);
}
static dtrace_action_t *
@@ -10118,7 +10142,7 @@ dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc)
break;
case DTRACEACT_SPECULATE:
- if (ecb->dte_size > sizeof (dtrace_epid_t))
+ if (ecb->dte_size > sizeof (dtrace_rechdr_t))
return (EINVAL);
if (dp == NULL)
@@ -10231,7 +10255,7 @@ dtrace_ecb_action_remove(dtrace_ecb_t *ecb)
ecb->dte_action = NULL;
ecb->dte_action_last = NULL;
- ecb->dte_size = sizeof (dtrace_epid_t);
+ ecb->dte_size = 0;
}
static void
@@ -10502,12 +10526,13 @@ dtrace_buffer_switch(dtrace_buffer_t *buf)
caddr_t tomax = buf->dtb_tomax;
caddr_t xamot = buf->dtb_xamot;
dtrace_icookie_t cookie;
- hrtime_t now = dtrace_gethrtime();
+ hrtime_t now;
ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH));
ASSERT(!(buf->dtb_flags & DTRACEBUF_RING));
cookie = dtrace_interrupt_disable();
+ now = dtrace_gethrtime();
buf->dtb_tomax = xamot;
buf->dtb_xamot = tomax;
buf->dtb_xamot_drops = buf->dtb_drops;
@@ -10802,7 +10827,7 @@ dtrace_buffer_reserve(dtrace_buffer_t *buf, size_t needed, size_t align,
if (epid == DTRACE_EPIDNONE) {
size = sizeof (uint32_t);
} else {
- ASSERT(epid <= state->dts_necbs);
+ ASSERT3U(epid, <=, state->dts_necbs);
ASSERT(state->dts_ecbs[epid - 1] != NULL);
size = state->dts_ecbs[epid - 1]->dte_size;
@@ -15837,6 +15862,7 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
desc.dtbd_drops = buf->dtb_drops;
desc.dtbd_errors = buf->dtb_errors;
desc.dtbd_oldest = buf->dtb_xamot_offset;
+ desc.dtbd_timestamp = dtrace_gethrtime();
mutex_exit(&dtrace_lock);
@@ -15889,6 +15915,7 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
desc.dtbd_drops = buf->dtb_xamot_drops;
desc.dtbd_errors = buf->dtb_xamot_errors;
desc.dtbd_oldest = 0;
+ desc.dtbd_timestamp = buf->dtb_switched;
mutex_exit(&dtrace_lock);