summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/os/cyclic.c
diff options
context:
space:
mode:
authorMadhavan Venkataraman <Madhavan.Venkataraman@Sun.COM>2008-11-09 14:37:17 -0800
committerMadhavan Venkataraman <Madhavan.Venkataraman@Sun.COM>2008-11-09 14:37:17 -0800
commit87a18d3f5ba5da1985af86439cf1c94a9118b665 (patch)
tree4b3372d4ded27d65645f402df06ba6d1711cb6c5 /usr/src/uts/common/os/cyclic.c
parente857d0f3f579b4f19712d7713099feb8af0bad49 (diff)
downloadillumos-joyent-87a18d3f5ba5da1985af86439cf1c94a9118b665.tar.gz
6565503 callout processing is single threaded, throttling applications that rely on scalable callouts
6311743 callout table lock contention in timeout and untimeout
Diffstat (limited to 'usr/src/uts/common/os/cyclic.c')
-rw-r--r--usr/src/uts/common/os/cyclic.c320
1 files changed, 313 insertions, 7 deletions
diff --git a/usr/src/uts/common/os/cyclic.c b/usr/src/uts/common/os/cyclic.c
index 61ac9ac5b8..1bb6baf445 100644
--- a/usr/src/uts/common/os/cyclic.c
+++ b/usr/src/uts/common/os/cyclic.c
@@ -23,8 +23,6 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* The Cyclic Subsystem
* --------------------
@@ -109,6 +107,7 @@
* cyclic_add_omni() <-- Creates an omnipresent cyclic
* cyclic_remove() <-- Removes a cyclic
* cyclic_bind() <-- Change a cyclic's CPU or partition binding
+ * cyclic_reprogram() <-- Reprogram a cyclic's expiration
*
* Inter-subsystem Interfaces
*
@@ -550,6 +549,63 @@
* leverages the existing cyclic expiry processing, which will compensate
* for any time lost while juggling.
*
+ * Reprogramming
+ *
+ * Normally, after a cyclic fires, its next expiration is computed from
+ * the current time and the cyclic interval. But there are situations when
+ * the next expiration needs to be reprogrammed by the kernel subsystem that
+ * is using the cyclic. cyclic_reprogram() allows this to be done. This,
+ * unlike the other kernel at-large cyclic API functions, is permitted to
+ * be called from the cyclic handler. This is because it does not use the
+ * cpu_lock to serialize access.
+ *
+ * When cyclic_reprogram() is called for an omni-cyclic, the operation is
+ * applied to the omni-cyclic's component on the current CPU.
+ *
+ * If a high-level cyclic handler reprograms its own cyclic, then
+ * cyclic_fire() detects that and does not recompute the cyclic's next
+ * expiration. However, for a lock-level or a low-level cyclic, the
+ * actual cyclic handler will execute at the lower PIL only after
+ * cyclic_fire() is done with all expired cyclics. To deal with this, such
+ * cyclics can be specified with a special interval of CY_INFINITY (INT64_MAX).
+ * cyclic_fire() recognizes this special value and recomputes the next
+ * expiration to CY_INFINITY. This effectively moves the cyclic to the
+ * bottom of the heap and prevents it from going off until its handler has
+ * had a chance to reprogram it. Infact, this is the way to create and reuse
+ * "one-shot" timers in the context of the cyclic subsystem without using
+ * cyclic_remove().
+ *
+ * Here is the procedure for cyclic reprogramming:
+ *
+ * 1. cyclic_reprogram() calls cyclic_reprogram_xcall() on the CPU
+ * that houses the cyclic.
+ * 2. cyclic_reprogram_xcall() raises interrupt level to CY_HIGH_LEVEL
+ * 3. The cyclic is located in the cyclic heap. The search for this is
+ * done from the bottom of the heap to the top as reprogrammable cyclics
+ * would be located closer to the bottom than the top.
+ * 4. The cyclic expiration is set and the cyclic is moved to its
+ * correct position in the heap (up or down depending on whether the
+ * new expiration is less than or greater than the old one).
+ * 5. If the cyclic move modified the root of the heap, the backend is
+ * reprogrammed.
+ *
+ * Reprogramming can be a frequent event (see the callout subsystem). So,
+ * the serialization used has to be efficient. As with all other cyclic
+ * operations, the interrupt level is raised during reprogramming. Plus,
+ * during reprogramming, the cyclic must not be juggled (regular cyclic)
+ * or stopped (omni-cyclic). The implementation defines a per-cyclic
+ * reader-writer lock to accomplish this. This lock is acquired in the
+ * reader mode by cyclic_reprogram() and writer mode by cyclic_juggle() and
+ * cyclic_omni_stop(). The reader-writer lock makes it efficient if
+ * an omni-cyclic is reprogrammed on different CPUs frequently.
+ *
+ * Note that since the cpu_lock is not used during reprogramming, it is
+ * the responsibility of the user of the reprogrammable cyclic to make sure
+ * that the cyclic is not removed via cyclic_remove() during reprogramming.
+ * This is not an unreasonable requirement as the user will typically have
+ * some sort of synchronization for its cyclic-related activities. This
+ * little caveat exists because the cyclic ID is not really an ID. It is
+ * implemented as a pointer to a structure.
*/
#include <sys/cyclic_impl.h>
#include <sys/sysmacros.h>
@@ -914,6 +970,27 @@ cyclic_fire(cpu_t *c)
cyclic_expire(cpu, ndx, cyclic);
/*
+ * If the handler reprogrammed the cyclic, then don't
+ * recompute the expiration. Then, if the interval is
+ * infinity, set the expiration to infinity. This can
+ * be used to create one-shot timers.
+ */
+ if (exp != cyclic->cy_expire) {
+ /*
+ * If a hi level cyclic reprograms itself,
+ * the heap adjustment and reprogramming of the
+ * clock source have already been done at this
+ * point. So, we can continue.
+ */
+ continue;
+ }
+
+ if (cyclic->cy_interval == CY_INFINITY)
+ exp = CY_INFINITY;
+ else
+ exp += cyclic->cy_interval;
+
+ /*
* If this cyclic will be set to next expire in the distant
* past, we have one of two situations:
*
@@ -932,7 +1009,6 @@ cyclic_fire(cpu_t *c)
* debugger while still being longer than any legitimate
* stretch at CY_HIGH_LEVEL).
*/
- exp += cyclic->cy_interval;
if (now - exp > NANOSEC) {
hrtime_t interval = cyclic->cy_interval;
@@ -1676,20 +1752,22 @@ cyclic_remove_xcall(cyc_xcallarg_t *arg)
cyc_backend_t *be = cpu->cyp_backend;
cyb_arg_t bar = be->cyb_arg;
cyc_cookie_t cookie;
- cyc_index_t ndx = arg->cyx_ndx, nelems = cpu->cyp_nelems, i;
- cyc_index_t *heap = cpu->cyp_heap, last;
+ cyc_index_t ndx = arg->cyx_ndx, nelems, i;
+ cyc_index_t *heap, last;
cyclic_t *cyclic;
#ifdef DEBUG
cyc_index_t root;
#endif
ASSERT(cpu->cyp_state == CYS_REMOVING);
- ASSERT(nelems > 0);
cookie = be->cyb_set_level(bar, CY_HIGH_LEVEL);
CYC_TRACE1(cpu, CY_HIGH_LEVEL, "remove-xcall", ndx);
+ heap = cpu->cyp_heap;
+ nelems = cpu->cyp_nelems;
+ ASSERT(nelems > 0);
cyclic = &cpu->cyp_cyclics[ndx];
/*
@@ -1862,6 +1940,94 @@ cyclic_remove_here(cyc_cpu_t *cpu, cyc_index_t ndx, cyc_time_t *when, int wait)
}
/*
+ * If cyclic_reprogram() is called on the same CPU as the cyclic's CPU, then
+ * it calls this function directly. Else, it invokes this function through
+ * an X-call to the cyclic's CPU.
+ */
+static void
+cyclic_reprogram_cyclic(cyc_cpu_t *cpu, cyc_index_t ndx, hrtime_t expire)
+{
+ cyc_backend_t *be = cpu->cyp_backend;
+ cyb_arg_t bar = be->cyb_arg;
+ cyc_cookie_t cookie;
+ cyc_index_t nelems, i;
+ cyc_index_t *heap;
+ cyclic_t *cyclic;
+ hrtime_t oexpire;
+ int reprog;
+
+ cookie = be->cyb_set_level(bar, CY_HIGH_LEVEL);
+
+ CYC_TRACE1(cpu, CY_HIGH_LEVEL, "reprog-xcall", ndx);
+
+ nelems = cpu->cyp_nelems;
+ ASSERT(nelems > 0);
+ heap = cpu->cyp_heap;
+
+ /*
+ * Reprogrammed cyclics are typically one-shot ones that get
+ * set to infinity on every expiration. We shorten the search by
+ * searching from the bottom of the heap to the top instead of the
+ * other way around.
+ */
+ for (i = nelems - 1; i >= 0; i--) {
+ if (heap[i] == ndx)
+ break;
+ }
+ if (i < 0)
+ panic("attempt to reprogram non-existent cyclic");
+
+ cyclic = &cpu->cyp_cyclics[ndx];
+ oexpire = cyclic->cy_expire;
+ cyclic->cy_expire = expire;
+
+ reprog = (i == 0);
+ if (expire > oexpire) {
+ CYC_TRACE1(cpu, CY_HIGH_LEVEL, "reprog-down", i);
+ cyclic_downheap(cpu, i);
+ } else if (i > 0) {
+ CYC_TRACE1(cpu, CY_HIGH_LEVEL, "reprog-up", i);
+ reprog = cyclic_upheap(cpu, i);
+ }
+
+ if (reprog && (cpu->cyp_state != CYS_SUSPENDED)) {
+ /*
+ * The root changed. Reprogram the clock source.
+ */
+ CYC_TRACE0(cpu, CY_HIGH_LEVEL, "reprog-root");
+ cyclic = &cpu->cyp_cyclics[heap[0]];
+ be->cyb_reprogram(bar, cyclic->cy_expire);
+ }
+
+ be->cyb_restore_level(bar, cookie);
+}
+
+static void
+cyclic_reprogram_xcall(cyc_xcallarg_t *arg)
+{
+ cyclic_reprogram_cyclic(arg->cyx_cpu, arg->cyx_ndx,
+ arg->cyx_when->cyt_when);
+}
+
+static void
+cyclic_reprogram_here(cyc_cpu_t *cpu, cyc_index_t ndx, hrtime_t expiration)
+{
+ cyc_backend_t *be = cpu->cyp_backend;
+ cyc_xcallarg_t arg;
+ cyc_time_t when;
+
+ ASSERT(expiration > 0);
+
+ arg.cyx_ndx = ndx;
+ arg.cyx_cpu = cpu;
+ arg.cyx_when = &when;
+ when.cyt_when = expiration;
+
+ be->cyb_xcall(be->cyb_arg, cpu->cyp_cpu,
+ (cyc_func_t)cyclic_reprogram_xcall, &arg);
+}
+
+/*
* cyclic_juggle_one_to() should only be called when the source cyclic
* can be juggled and the destination CPU is known to be able to accept
* it.
@@ -1905,6 +2071,13 @@ cyclic_juggle_one_to(cyc_id_t *idp, cyc_cpu_t *dest)
}
/*
+ * Prevent a reprogram of this cyclic while we are relocating it.
+ * Otherwise, cyclic_reprogram_here() will end up sending an X-call
+ * to the wrong CPU.
+ */
+ rw_enter(&idp->cyi_lock, RW_WRITER);
+
+ /*
* Remove the cyclic from the source. As mentioned above, we cannot
* block during this operation; if we cannot remove the cyclic
* without waiting, we spin for a time shorter than the interval, and
@@ -1934,7 +2107,13 @@ cyclic_juggle_one_to(cyc_id_t *idp, cyc_cpu_t *dest)
if (delay > (cyclic->cy_interval >> 1))
delay = cyclic->cy_interval >> 1;
+ /*
+ * Drop the RW lock to avoid a deadlock with the cyclic
+ * handler (because it can potentially call cyclic_reprogram().
+ */
+ rw_exit(&idp->cyi_lock);
drv_usecwait((clock_t)(delay / (NANOSEC / MICROSEC)));
+ rw_enter(&idp->cyi_lock, RW_WRITER);
}
/*
@@ -1945,6 +2124,12 @@ cyclic_juggle_one_to(cyc_id_t *idp, cyc_cpu_t *dest)
idp->cyi_ndx = cyclic_add_here(dest, &hdlr, &when, flags);
idp->cyi_cpu = dest;
kpreempt_enable();
+
+ /*
+ * Now that we have successfully relocated the cyclic, allow
+ * it to be reprogrammed.
+ */
+ rw_exit(&idp->cyi_lock);
}
static int
@@ -2326,6 +2511,8 @@ cyclic_omni_stop(cyc_id_t *idp, cyc_cpu_t *cpu)
{
cyc_omni_handler_t *omni = &idp->cyi_omni_hdlr;
cyc_omni_cpu_t *ocpu = idp->cyi_omni_list, *prev = NULL;
+ clock_t delay;
+ int ret;
CYC_PTRACE("omni-stop", cpu, idp);
ASSERT(MUTEX_HELD(&cpu_lock));
@@ -2333,6 +2520,13 @@ cyclic_omni_stop(cyc_id_t *idp, cyc_cpu_t *cpu)
ASSERT(idp->cyi_cpu == NULL);
ASSERT(ocpu != NULL);
+ /*
+ * Prevent a reprogram of this cyclic while we are removing it.
+ * Otherwise, cyclic_reprogram_here() will end up sending an X-call
+ * to the offlined CPU.
+ */
+ rw_enter(&idp->cyi_lock, RW_WRITER);
+
while (ocpu != NULL && ocpu->cyo_cpu != cpu) {
prev = ocpu;
ocpu = ocpu->cyo_next;
@@ -2351,7 +2545,51 @@ cyclic_omni_stop(cyc_id_t *idp, cyc_cpu_t *cpu)
prev->cyo_next = ocpu->cyo_next;
}
- (void) cyclic_remove_here(ocpu->cyo_cpu, ocpu->cyo_ndx, NULL, CY_WAIT);
+ /*
+ * Remove the cyclic from the source. We cannot block during this
+ * operation because we are holding the cyi_lock which can be held
+ * by the cyclic handler via cyclic_reprogram().
+ *
+ * If we cannot remove the cyclic without waiting, we spin for a time,
+ * and reattempt the (non-blocking) removal. If the handler is blocked
+ * on the cyi_lock, then we let go of it in the spin loop to give
+ * the handler a chance to run. Note that the removal will ultimately
+ * succeed -- even if the cyclic handler is blocked on a resource
+ * held by a thread which we have preempted, priority inheritance
+ * assures that the preempted thread will preempt us and continue
+ * to progress.
+ */
+ for (delay = 1; ; delay <<= 1) {
+ /*
+ * Before we begin this operation, disable kernel preemption.
+ */
+ kpreempt_disable();
+ ret = cyclic_remove_here(ocpu->cyo_cpu, ocpu->cyo_ndx, NULL,
+ CY_NOWAIT);
+ /*
+ * Enable kernel preemption while spinning.
+ */
+ kpreempt_enable();
+
+ if (ret)
+ break;
+
+ CYC_PTRACE("remove-omni-retry", idp, ocpu->cyo_cpu);
+
+ /*
+ * Drop the RW lock to avoid a deadlock with the cyclic
+ * handler (because it can potentially call cyclic_reprogram().
+ */
+ rw_exit(&idp->cyi_lock);
+ drv_usecwait(delay);
+ rw_enter(&idp->cyi_lock, RW_WRITER);
+ }
+
+ /*
+ * Now that we have successfully removed the cyclic, allow the omni
+ * cyclic to be reprogrammed on other CPUs.
+ */
+ rw_exit(&idp->cyi_lock);
/*
* The cyclic has been removed from this CPU; time to call the
@@ -2381,6 +2619,7 @@ cyclic_new_id()
*/
idp->cyi_cpu = NULL;
idp->cyi_ndx = 0;
+ rw_init(&idp->cyi_lock, NULL, RW_DEFAULT, NULL);
idp->cyi_next = cyclic_id_head;
idp->cyi_prev = NULL;
@@ -2798,6 +3037,73 @@ cyclic_bind(cyclic_id_t id, cpu_t *d, cpupart_t *part)
cyclic_bind_cpupart(id, part);
}
+int
+cyclic_reprogram(cyclic_id_t id, hrtime_t expiration)
+{
+ cyc_id_t *idp = (cyc_id_t *)id;
+ cyc_cpu_t *cpu;
+ cyc_omni_cpu_t *ocpu;
+ cyc_index_t ndx;
+
+ ASSERT(expiration > 0);
+
+ CYC_PTRACE("reprog", idp, idp->cyi_cpu);
+
+ kpreempt_disable();
+
+ /*
+ * Prevent the cyclic from moving or disappearing while we reprogram.
+ */
+ rw_enter(&idp->cyi_lock, RW_READER);
+
+ if (idp->cyi_cpu == NULL) {
+ ASSERT(curthread->t_preempt > 0);
+ cpu = CPU->cpu_cyclic;
+
+ /*
+ * For an omni cyclic, we reprogram the cyclic corresponding
+ * to the current CPU. Look for it in the list.
+ */
+ ocpu = idp->cyi_omni_list;
+ while (ocpu != NULL) {
+ if (ocpu->cyo_cpu == cpu)
+ break;
+ ocpu = ocpu->cyo_next;
+ }
+
+ if (ocpu == NULL) {
+ /*
+ * Didn't find it. This means that CPU offline
+ * must have removed it racing with us. So,
+ * nothing to do.
+ */
+ rw_exit(&idp->cyi_lock);
+
+ kpreempt_enable();
+
+ return (0);
+ }
+ ndx = ocpu->cyo_ndx;
+ } else {
+ cpu = idp->cyi_cpu;
+ ndx = idp->cyi_ndx;
+ }
+
+ if (cpu->cyp_cpu == CPU)
+ cyclic_reprogram_cyclic(cpu, ndx, expiration);
+ else
+ cyclic_reprogram_here(cpu, ndx, expiration);
+
+ /*
+ * Allow the cyclic to be moved or removed.
+ */
+ rw_exit(&idp->cyi_lock);
+
+ kpreempt_enable();
+
+ return (1);
+}
+
hrtime_t
cyclic_getres()
{