summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/os/timers.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/os/timers.c')
-rw-r--r--usr/src/uts/common/os/timers.c1354
1 files changed, 1354 insertions, 0 deletions
diff --git a/usr/src/uts/common/os/timers.c b/usr/src/uts/common/os/timers.c
new file mode 100644
index 0000000000..f62b848320
--- /dev/null
+++ b/usr/src/uts/common/os/timers.c
@@ -0,0 +1,1354 @@
+/*
+ * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Copyright (c) 1982, 1986 Regents of the University of California.
+ * All rights reserved. The Berkeley software License Agreement
+ * specifies the terms and conditions for redistribution.
+ */
+
+#include <sys/param.h>
+#include <sys/user.h>
+#include <sys/vnode.h>
+#include <sys/proc.h>
+#include <sys/time.h>
+#include <sys/systm.h>
+#include <sys/kmem.h>
+#include <sys/cmn_err.h>
+#include <sys/cpuvar.h>
+#include <sys/timer.h>
+#include <sys/debug.h>
+#include <sys/sysmacros.h>
+#include <sys/cyclic.h>
+
+static void realitexpire(void *);
+static void realprofexpire(void *);
+static void timeval_advance(struct timeval *, struct timeval *);
+
+kmutex_t tod_lock; /* protects time-of-day stuff */
+
+/*
+ * Constant to define the minimum interval value of the ITIMER_REALPROF timer.
+ * Value is in microseconds; defaults to 500 usecs. Setting this value
+ * significantly lower may allow for denial-of-service attacks.
+ */
+int itimer_realprof_minimum = 500;
+
+/*
+ * macro to compare a timeval to a timestruc
+ */
+
+#define TVTSCMP(tvp, tsp, cmp) \
+ /* CSTYLED */ \
+ ((tvp)->tv_sec cmp (tsp)->tv_sec || \
+ ((tvp)->tv_sec == (tsp)->tv_sec && \
+ /* CSTYLED */ \
+ (tvp)->tv_usec * 1000 cmp (tsp)->tv_nsec))
+
+/*
+ * Time of day and interval timer support.
+ *
+ * These routines provide the kernel entry points to get and set
+ * the time-of-day and per-process interval timers. Subroutines
+ * here provide support for adding and subtracting timeval structures
+ * and decrementing interval timers, optionally reloading the interval
+ * timers when they expire.
+ */
+
+/*
+ * SunOS function to generate monotonically increasing time values.
+ */
+void
+uniqtime(struct timeval *tv)
+{
+ static struct timeval last;
+ timestruc_t ts;
+ time_t sec;
+ int usec, nsec;
+
+ /*
+ * protect modification of last
+ */
+ mutex_enter(&tod_lock);
+ gethrestime(&ts);
+
+ /*
+ * Fast algorithm to convert nsec to usec -- see hrt2ts()
+ * in common/os/timers.c for a full description.
+ */
+ nsec = ts.tv_nsec;
+ usec = nsec + (nsec >> 2);
+ usec = nsec + (usec >> 1);
+ usec = nsec + (usec >> 2);
+ usec = nsec + (usec >> 4);
+ usec = nsec - (usec >> 3);
+ usec = nsec + (usec >> 2);
+ usec = nsec + (usec >> 3);
+ usec = nsec + (usec >> 4);
+ usec = nsec + (usec >> 1);
+ usec = nsec + (usec >> 6);
+ usec = usec >> 10;
+ sec = ts.tv_sec;
+
+ /*
+ * Try to keep timestamps unique, but don't be obsessive about
+ * it in the face of large differences.
+ */
+ if ((sec <= last.tv_sec) && /* same or lower seconds, and */
+ ((sec != last.tv_sec) || /* either different second or */
+ (usec <= last.tv_usec)) && /* lower microsecond, and */
+ ((last.tv_sec - sec) <= 5)) { /* not way back in time */
+ sec = last.tv_sec;
+ usec = last.tv_usec + 1;
+ if (usec >= MICROSEC) {
+ usec -= MICROSEC;
+ sec++;
+ }
+ }
+ last.tv_sec = sec;
+ last.tv_usec = usec;
+ mutex_exit(&tod_lock);
+
+ tv->tv_sec = sec;
+ tv->tv_usec = usec;
+}
+
+/*
+ * Timestamps are exported from the kernel in several places.
+ * Such timestamps are commonly used for either uniqueness or for
+ * sequencing - truncation to 32-bits is fine for uniqueness,
+ * but sequencing is going to take more work as we get closer to 2038!
+ */
+void
+uniqtime32(struct timeval32 *tv32p)
+{
+ struct timeval tv;
+
+ uniqtime(&tv);
+ TIMEVAL_TO_TIMEVAL32(tv32p, &tv);
+}
+
+int
+gettimeofday(struct timeval *tp)
+{
+ struct timeval atv;
+
+ if (tp) {
+ uniqtime(&atv);
+ if (get_udatamodel() == DATAMODEL_NATIVE) {
+ if (copyout(&atv, tp, sizeof (atv)))
+ return (set_errno(EFAULT));
+ } else {
+ struct timeval32 tv32;
+
+ if (TIMEVAL_OVERFLOW(&atv))
+ return (set_errno(EOVERFLOW));
+ TIMEVAL_TO_TIMEVAL32(&tv32, &atv);
+
+ if (copyout(&tv32, tp, sizeof (tv32)))
+ return (set_errno(EFAULT));
+ }
+ }
+ return (0);
+}
+
+int
+getitimer(uint_t which, struct itimerval *itv)
+{
+ int error;
+
+ if (get_udatamodel() == DATAMODEL_NATIVE)
+ error = xgetitimer(which, itv, 0);
+ else {
+ struct itimerval kitv;
+
+ if ((error = xgetitimer(which, &kitv, 1)) == 0) {
+ if (ITIMERVAL_OVERFLOW(&kitv)) {
+ error = EOVERFLOW;
+ } else {
+ struct itimerval32 itv32;
+
+ ITIMERVAL_TO_ITIMERVAL32(&itv32, &kitv);
+ if (copyout(&itv32, itv, sizeof (itv32)) != 0)
+ error = EFAULT;
+ }
+ }
+ }
+
+ return (error ? (set_errno(error)) : 0);
+}
+
+int
+xgetitimer(uint_t which, struct itimerval *itv, int iskaddr)
+{
+ struct proc *p = curproc;
+ struct timeval now;
+ struct itimerval aitv;
+ hrtime_t ts, first, interval, remain;
+
+ mutex_enter(&p->p_lock);
+
+ switch (which) {
+ case ITIMER_VIRTUAL:
+ case ITIMER_PROF:
+ aitv = ttolwp(curthread)->lwp_timer[which];
+ break;
+
+ case ITIMER_REAL:
+ uniqtime(&now);
+ aitv = p->p_realitimer;
+
+ if (timerisset(&aitv.it_value)) {
+ /*CSTYLED*/
+ if (timercmp(&aitv.it_value, &now, <)) {
+ timerclear(&aitv.it_value);
+ } else {
+ timevalsub(&aitv.it_value, &now);
+ }
+ }
+ break;
+
+ case ITIMER_REALPROF:
+ if (curproc->p_rprof_cyclic == CYCLIC_NONE) {
+ bzero(&aitv, sizeof (aitv));
+ break;
+ }
+
+ aitv = curproc->p_rprof_timer;
+
+ first = tv2hrt(&aitv.it_value);
+ interval = tv2hrt(&aitv.it_interval);
+
+ if ((ts = gethrtime()) < first) {
+ /*
+ * We haven't gone off for the first time; the time
+ * remaining is simply the first time we will go
+ * off minus the current time.
+ */
+ remain = first - ts;
+ } else {
+ if (interval == 0) {
+ /*
+ * This was set as a one-shot, and we've
+ * already gone off; there is no time
+ * remaining.
+ */
+ remain = 0;
+ } else {
+ /*
+ * We have a non-zero interval; we need to
+ * determine how far we are into the current
+ * interval, and subtract that from the
+ * interval to determine the time remaining.
+ */
+ remain = interval - ((ts - first) % interval);
+ }
+ }
+
+ hrt2tv(remain, &aitv.it_value);
+ break;
+
+ default:
+ mutex_exit(&p->p_lock);
+ return (EINVAL);
+ }
+
+ mutex_exit(&p->p_lock);
+
+ if (iskaddr) {
+ bcopy(&aitv, itv, sizeof (*itv));
+ } else {
+ ASSERT(get_udatamodel() == DATAMODEL_NATIVE);
+ if (copyout(&aitv, itv, sizeof (*itv)))
+ return (EFAULT);
+ }
+
+ return (0);
+}
+
+
+int
+setitimer(uint_t which, struct itimerval *itv, struct itimerval *oitv)
+{
+ int error;
+
+ if (oitv != NULL)
+ if ((error = getitimer(which, oitv)) != 0)
+ return (error);
+
+ if (itv == NULL)
+ return (0);
+
+ if (get_udatamodel() == DATAMODEL_NATIVE)
+ error = xsetitimer(which, itv, 0);
+ else {
+ struct itimerval32 itv32;
+ struct itimerval kitv;
+
+ if (copyin(itv, &itv32, sizeof (itv32)))
+ error = EFAULT;
+ ITIMERVAL32_TO_ITIMERVAL(&kitv, &itv32);
+ error = xsetitimer(which, &kitv, 1);
+ }
+
+ return (error ? (set_errno(error)) : 0);
+}
+
+int
+xsetitimer(uint_t which, struct itimerval *itv, int iskaddr)
+{
+ struct itimerval aitv;
+ struct timeval now;
+ struct proc *p = curproc;
+ kthread_t *t;
+ timeout_id_t tmp_id;
+ cyc_handler_t hdlr;
+ cyc_time_t when;
+ cyclic_id_t cyclic;
+ hrtime_t ts;
+ int min;
+
+ if (itv == NULL)
+ return (0);
+
+ if (iskaddr) {
+ bcopy(itv, &aitv, sizeof (aitv));
+ } else {
+ ASSERT(get_udatamodel() == DATAMODEL_NATIVE);
+ if (copyin(itv, &aitv, sizeof (aitv)))
+ return (EFAULT);
+ }
+
+ if (which == ITIMER_REALPROF) {
+ min = MAX((int)(cyclic_getres() / (NANOSEC / MICROSEC)),
+ itimer_realprof_minimum);
+ } else {
+ min = usec_per_tick;
+ }
+
+ if (itimerfix(&aitv.it_value, min) ||
+ (itimerfix(&aitv.it_interval, min) && timerisset(&aitv.it_value)))
+ return (EINVAL);
+
+ mutex_enter(&p->p_lock);
+ switch (which) {
+ case ITIMER_REAL:
+ /*
+ * The SITBUSY flag prevents conflicts with multiple
+ * threads attempting to perform setitimer(ITIMER_REAL)
+ * at the same time, even when we drop p->p_lock below.
+ * Any blocked thread returns successfully because the
+ * effect is the same as if it got here first, finished,
+ * and the other thread then came through and destroyed
+ * what it did. We are just protecting the system from
+ * malfunctioning due to the race condition.
+ */
+ if (p->p_flag & SITBUSY) {
+ mutex_exit(&p->p_lock);
+ return (0);
+ }
+ p->p_flag |= SITBUSY;
+ while ((tmp_id = p->p_itimerid) != 0) {
+ /*
+ * Avoid deadlock in callout_delete (called from
+ * untimeout) which may go to sleep (while holding
+ * p_lock). Drop p_lock and re-acquire it after
+ * untimeout returns. Need to clear p_itimerid
+ * while holding p_lock.
+ */
+ p->p_itimerid = 0;
+ mutex_exit(&p->p_lock);
+ (void) untimeout(tmp_id);
+ mutex_enter(&p->p_lock);
+ }
+ if (timerisset(&aitv.it_value)) {
+ uniqtime(&now);
+ timevaladd(&aitv.it_value, &now);
+ p->p_itimerid = realtime_timeout(realitexpire,
+ p, hzto(&aitv.it_value));
+ }
+ p->p_realitimer = aitv;
+ p->p_flag &= ~SITBUSY;
+ break;
+
+ case ITIMER_REALPROF:
+ cyclic = p->p_rprof_cyclic;
+ p->p_rprof_cyclic = CYCLIC_NONE;
+
+ mutex_exit(&p->p_lock);
+
+ /*
+ * We're now going to acquire cpu_lock, remove the old cyclic
+ * if necessary, and add our new cyclic.
+ */
+ mutex_enter(&cpu_lock);
+
+ if (cyclic != CYCLIC_NONE)
+ cyclic_remove(cyclic);
+
+ if (!timerisset(&aitv.it_value)) {
+ /*
+ * If we were passed a value of 0, we're done.
+ */
+ mutex_exit(&cpu_lock);
+ return (0);
+ }
+
+ hdlr.cyh_func = realprofexpire;
+ hdlr.cyh_arg = p;
+ hdlr.cyh_level = CY_LOW_LEVEL;
+
+ when.cyt_when = (ts = gethrtime() + tv2hrt(&aitv.it_value));
+ when.cyt_interval = tv2hrt(&aitv.it_interval);
+
+ if (when.cyt_interval == 0) {
+ /*
+ * Using the same logic as for CLOCK_HIGHRES timers, we
+ * set the interval to be INT64_MAX - when.cyt_when to
+ * effect a one-shot; see the comment in clock_highres.c
+ * for more details on why this works.
+ */
+ when.cyt_interval = INT64_MAX - when.cyt_when;
+ }
+
+ cyclic = cyclic_add(&hdlr, &when);
+
+ mutex_exit(&cpu_lock);
+
+ /*
+ * We have now successfully added the cyclic. Reacquire
+ * p_lock, and see if anyone has snuck in.
+ */
+ mutex_enter(&p->p_lock);
+
+ if (p->p_rprof_cyclic != CYCLIC_NONE) {
+ /*
+ * We're racing with another thread establishing an
+ * ITIMER_REALPROF interval timer. We'll let the other
+ * thread win (this is a race at the application level,
+ * so letting the other thread win is acceptable).
+ */
+ mutex_exit(&p->p_lock);
+ mutex_enter(&cpu_lock);
+ cyclic_remove(cyclic);
+ mutex_exit(&cpu_lock);
+
+ return (0);
+ }
+
+ /*
+ * Success. Set our tracking variables in the proc structure,
+ * cancel any outstanding ITIMER_PROF, and allocate the
+ * per-thread SIGPROF buffers, if possible.
+ */
+ hrt2tv(ts, &aitv.it_value);
+ p->p_rprof_timer = aitv;
+ p->p_rprof_cyclic = cyclic;
+
+ t = p->p_tlist;
+ do {
+ struct itimerval *itvp;
+
+ itvp = &ttolwp(t)->lwp_timer[ITIMER_PROF];
+ timerclear(&itvp->it_interval);
+ timerclear(&itvp->it_value);
+
+ if (t->t_rprof != NULL)
+ continue;
+
+ t->t_rprof =
+ kmem_zalloc(sizeof (struct rprof), KM_NOSLEEP);
+ aston(t);
+ } while ((t = t->t_forw) != p->p_tlist);
+
+ break;
+
+ case ITIMER_VIRTUAL:
+ ttolwp(curthread)->lwp_timer[ITIMER_VIRTUAL] = aitv;
+ break;
+
+ case ITIMER_PROF:
+ if (p->p_rprof_cyclic != CYCLIC_NONE) {
+ /*
+ * Silently ignore ITIMER_PROF if ITIMER_REALPROF
+ * is in effect.
+ */
+ break;
+ }
+
+ ttolwp(curthread)->lwp_timer[ITIMER_PROF] = aitv;
+ break;
+
+ default:
+ mutex_exit(&p->p_lock);
+ return (EINVAL);
+ }
+ mutex_exit(&p->p_lock);
+ return (0);
+}
+
+/*
+ * Real interval timer expired:
+ * send process whose timer expired an alarm signal.
+ * If time is not set up to reload, then just return.
+ * Else compute next time timer should go off which is > current time.
+ * This is where delay in processing this timeout causes multiple
+ * SIGALRM calls to be compressed into one.
+ */
+static void
+realitexpire(void *arg)
+{
+ struct proc *p = arg;
+ struct timeval *valp = &p->p_realitimer.it_value;
+ struct timeval *intervalp = &p->p_realitimer.it_interval;
+#if !defined(_LP64)
+ clock_t ticks;
+#endif
+
+ mutex_enter(&p->p_lock);
+#if !defined(_LP64)
+ if ((ticks = hzto(valp)) > 1) {
+ /*
+ * If we are executing before we were meant to, it must be
+ * because of an overflow in a prior hzto() calculation.
+ * In this case, we want to go to sleep for the recalculated
+ * number of ticks. For the special meaning of the value "1"
+ * see comment in timespectohz().
+ */
+ p->p_itimerid = realtime_timeout(realitexpire, p, ticks);
+ mutex_exit(&p->p_lock);
+ return;
+ }
+#endif
+ sigtoproc(p, NULL, SIGALRM);
+ if (!timerisset(intervalp)) {
+ timerclear(valp);
+ p->p_itimerid = 0;
+ } else {
+ /* advance timer value past current time */
+ timeval_advance(valp, intervalp);
+ p->p_itimerid = realtime_timeout(realitexpire, p, hzto(valp));
+ }
+ mutex_exit(&p->p_lock);
+}
+
+/*
+ * Real time profiling interval timer expired:
+ * Increment microstate counters for each lwp in the process
+ * and ensure that running lwps are kicked into the kernel.
+ * If time is not set up to reload, then just return.
+ * Else compute next time timer should go off which is > current time,
+ * as above.
+ */
+static void
+realprofexpire(void *arg)
+{
+ struct proc *p = arg;
+ kthread_t *t;
+
+ mutex_enter(&p->p_lock);
+ if ((t = p->p_tlist) == NULL) {
+ mutex_exit(&p->p_lock);
+ return;
+ }
+ do {
+ int mstate;
+
+ /*
+ * Attempt to allocate the SIGPROF buffer, but don't sleep.
+ */
+ if (t->t_rprof == NULL)
+ t->t_rprof = kmem_zalloc(sizeof (struct rprof),
+ KM_NOSLEEP);
+ if (t->t_rprof == NULL)
+ continue;
+
+ thread_lock(t);
+ switch (t->t_state) {
+ case TS_SLEEP:
+ /*
+ * Don't touch the lwp is it is swapped out.
+ */
+ if (!(t->t_schedflag & TS_LOAD)) {
+ mstate = LMS_SLEEP;
+ break;
+ }
+ switch (mstate = ttolwp(t)->lwp_mstate.ms_prev) {
+ case LMS_TFAULT:
+ case LMS_DFAULT:
+ case LMS_KFAULT:
+ case LMS_USER_LOCK:
+ break;
+ default:
+ mstate = LMS_SLEEP;
+ break;
+ }
+ break;
+ case TS_RUN:
+ mstate = LMS_WAIT_CPU;
+ break;
+ case TS_ONPROC:
+ switch (mstate = t->t_mstate) {
+ case LMS_USER:
+ case LMS_SYSTEM:
+ case LMS_TRAP:
+ break;
+ default:
+ mstate = LMS_SYSTEM;
+ break;
+ }
+ break;
+ default:
+ mstate = t->t_mstate;
+ break;
+ }
+ t->t_rprof->rp_anystate = 1;
+ t->t_rprof->rp_state[mstate]++;
+ aston(t);
+ /*
+ * force the thread into the kernel
+ * if it is not already there.
+ */
+ if (t->t_state == TS_ONPROC && t->t_cpu != CPU)
+ poke_cpu(t->t_cpu->cpu_id);
+ thread_unlock(t);
+ } while ((t = t->t_forw) != p->p_tlist);
+
+ mutex_exit(&p->p_lock);
+}
+
+/*
+ * Advances timer value past the current time of day. See the detailed
+ * comment for this logic in realitsexpire(), above.
+ */
+static void
+timeval_advance(struct timeval *valp, struct timeval *intervalp)
+{
+ int cnt2nth;
+ struct timeval interval2nth;
+
+ for (;;) {
+ interval2nth = *intervalp;
+ for (cnt2nth = 0; ; cnt2nth++) {
+ timevaladd(valp, &interval2nth);
+ /*CSTYLED*/
+ if (TVTSCMP(valp, &hrestime, >))
+ break;
+ timevaladd(&interval2nth, &interval2nth);
+ }
+ if (cnt2nth == 0)
+ break;
+ timevalsub(valp, &interval2nth);
+ }
+}
+
+/*
+ * Check that a proposed value to load into the .it_value or .it_interval
+ * part of an interval timer is acceptable, and set it to at least a
+ * specified minimal value.
+ */
+int
+itimerfix(struct timeval *tv, int minimum)
+{
+ if (tv->tv_sec < 0 || tv->tv_sec > 100000000 ||
+ tv->tv_usec < 0 || tv->tv_usec >= MICROSEC)
+ return (EINVAL);
+ if (tv->tv_sec == 0 && tv->tv_usec != 0 && tv->tv_usec < minimum)
+ tv->tv_usec = minimum;
+ return (0);
+}
+
+/*
+ * Same as itimerfix, except a) it takes a timespec instead of a timeval and
+ * b) it doesn't truncate based on timeout granularity; consumers of this
+ * interface (e.g. timer_settime()) depend on the passed timespec not being
+ * modified implicitly.
+ */
+int
+itimerspecfix(timespec_t *tv)
+{
+ if (tv->tv_sec < 0 || tv->tv_nsec < 0 || tv->tv_nsec >= NANOSEC)
+ return (EINVAL);
+ return (0);
+}
+
+/*
+ * Decrement an interval timer by a specified number
+ * of microseconds, which must be less than a second,
+ * i.e. < 1000000. If the timer expires, then reload
+ * it. In this case, carry over (usec - old value) to
+ * reducint the value reloaded into the timer so that
+ * the timer does not drift. This routine assumes
+ * that it is called in a context where the timers
+ * on which it is operating cannot change in value.
+ */
+int
+itimerdecr(struct itimerval *itp, int usec)
+{
+ if (itp->it_value.tv_usec < usec) {
+ if (itp->it_value.tv_sec == 0) {
+ /* expired, and already in next interval */
+ usec -= itp->it_value.tv_usec;
+ goto expire;
+ }
+ itp->it_value.tv_usec += MICROSEC;
+ itp->it_value.tv_sec--;
+ }
+ itp->it_value.tv_usec -= usec;
+ usec = 0;
+ if (timerisset(&itp->it_value))
+ return (1);
+ /* expired, exactly at end of interval */
+expire:
+ if (timerisset(&itp->it_interval)) {
+ itp->it_value = itp->it_interval;
+ itp->it_value.tv_usec -= usec;
+ if (itp->it_value.tv_usec < 0) {
+ itp->it_value.tv_usec += MICROSEC;
+ itp->it_value.tv_sec--;
+ }
+ } else
+ itp->it_value.tv_usec = 0; /* sec is already 0 */
+ return (0);
+}
+
+/*
+ * Add and subtract routines for timevals.
+ * N.B.: subtract routine doesn't deal with
+ * results which are before the beginning,
+ * it just gets very confused in this case.
+ * Caveat emptor.
+ */
+void
+timevaladd(struct timeval *t1, struct timeval *t2)
+{
+ t1->tv_sec += t2->tv_sec;
+ t1->tv_usec += t2->tv_usec;
+ timevalfix(t1);
+}
+
+void
+timevalsub(struct timeval *t1, struct timeval *t2)
+{
+ t1->tv_sec -= t2->tv_sec;
+ t1->tv_usec -= t2->tv_usec;
+ timevalfix(t1);
+}
+
+void
+timevalfix(struct timeval *t1)
+{
+ if (t1->tv_usec < 0) {
+ t1->tv_sec--;
+ t1->tv_usec += MICROSEC;
+ }
+ if (t1->tv_usec >= MICROSEC) {
+ t1->tv_sec++;
+ t1->tv_usec -= MICROSEC;
+ }
+}
+
+/*
+ * Same as the routines above. These routines take a timespec instead
+ * of a timeval.
+ */
+void
+timespecadd(timespec_t *t1, timespec_t *t2)
+{
+ t1->tv_sec += t2->tv_sec;
+ t1->tv_nsec += t2->tv_nsec;
+ timespecfix(t1);
+}
+
+void
+timespecsub(timespec_t *t1, timespec_t *t2)
+{
+ t1->tv_sec -= t2->tv_sec;
+ t1->tv_nsec -= t2->tv_nsec;
+ timespecfix(t1);
+}
+
+void
+timespecfix(timespec_t *t1)
+{
+ if (t1->tv_nsec < 0) {
+ t1->tv_sec--;
+ t1->tv_nsec += NANOSEC;
+ } else {
+ if (t1->tv_nsec >= NANOSEC) {
+ t1->tv_sec++;
+ t1->tv_nsec -= NANOSEC;
+ }
+ }
+}
+
+/*
+ * Compute number of hz until specified time.
+ * Used to compute third argument to timeout() from an absolute time.
+ */
+clock_t
+hzto(struct timeval *tv)
+{
+ timespec_t ts, now;
+
+ ts.tv_sec = tv->tv_sec;
+ ts.tv_nsec = tv->tv_usec * 1000;
+ gethrestime_lasttick(&now);
+
+ return (timespectohz(&ts, now));
+}
+
+/*
+ * Compute number of hz until specified time for a given timespec value.
+ * Used to compute third argument to timeout() from an absolute time.
+ */
+clock_t
+timespectohz(timespec_t *tv, timespec_t now)
+{
+ clock_t ticks;
+ time_t sec;
+ int nsec;
+
+ /*
+ * Compute number of ticks we will see between now and
+ * the target time; returns "1" if the destination time
+ * is before the next tick, so we always get some delay,
+ * and returns LONG_MAX ticks if we would overflow.
+ */
+ sec = tv->tv_sec - now.tv_sec;
+ nsec = tv->tv_nsec - now.tv_nsec + nsec_per_tick - 1;
+
+ if (nsec < 0) {
+ sec--;
+ nsec += NANOSEC;
+ } else if (nsec >= NANOSEC) {
+ sec++;
+ nsec -= NANOSEC;
+ }
+
+ ticks = NSEC_TO_TICK(nsec);
+
+ /*
+ * Compute ticks, accounting for negative and overflow as above.
+ * Overflow protection kicks in at about 70 weeks for hz=50
+ * and at about 35 weeks for hz=100. (Rather longer for the 64-bit
+ * kernel :-)
+ */
+ if (sec < 0 || (sec == 0 && ticks < 1))
+ ticks = 1; /* protect vs nonpositive */
+ else if (sec > (LONG_MAX - ticks) / hz)
+ ticks = LONG_MAX; /* protect vs overflow */
+ else
+ ticks += sec * hz; /* common case */
+
+ return (ticks);
+}
+
+/*
+ * Same as timespectohz() except that we adjust the clock ticks down a bit.
+ * If we will be waiting for a long time, we may encounter skewing problems
+ * due to adjtime() system calls. Since we can skew up to 1/16 lbolt rate
+ * if adjtime is going crazy, we reduce the time delta since timeout() takes
+ * clock ticks rather than wallclock elapsed time. This may cause the caller
+ * (who calls timeout()) to return with a timeout prematurely and callers
+ * must accommodate this. See lwp_timeout(), queue_lwptimer() and
+ * cv_waituntil_sig(), currently the only callers of this function.
+ */
+clock_t
+timespectohz_adj(timespec_t *tv, timespec_t now)
+{
+ timespec_t wait_time = *tv;
+
+ timespecsub(&wait_time, &now);
+ wait_time.tv_sec -= wait_time.tv_sec >> 4;
+ wait_time.tv_nsec -= wait_time.tv_nsec >> 4;
+ timespecadd(&wait_time, &now);
+ return (timespectohz(&wait_time, now));
+}
+
+/*
+ * hrt2ts(): convert from hrtime_t to timestruc_t.
+ *
+ * All this routine really does is:
+ *
+ * tsp->sec = hrt / NANOSEC;
+ * tsp->nsec = hrt % NANOSEC;
+ *
+ * The black magic below avoids doing a 64-bit by 32-bit integer divide,
+ * which is quite expensive. There's actually much more going on here than
+ * it might first appear -- don't try this at home.
+ *
+ * For the adventuresome, here's an explanation of how it works.
+ *
+ * Multiplication by a fixed constant is easy -- you just do the appropriate
+ * shifts and adds. For example, to multiply by 10, we observe that
+ *
+ * x * 10 = x * (8 + 2)
+ * = (x * 8) + (x * 2)
+ * = (x << 3) + (x << 1).
+ *
+ * In general, you can read the algorithm right off the bits: the number 10
+ * is 1010 in binary; bits 1 and 3 are ones, so x * 10 = (x << 1) + (x << 3).
+ *
+ * Sometimes you can do better. For example, 15 is 1111 binary, so the normal
+ * shift/add computation is x * 15 = (x << 0) + (x << 1) + (x << 2) + (x << 3).
+ * But, it's cheaper if you capitalize on the fact that you have a run of ones:
+ * 1111 = 10000 - 1, hence x * 15 = (x << 4) - (x << 0). [You would never
+ * actually perform the operation << 0, since it's a no-op; I'm just writing
+ * it that way for clarity.]
+ *
+ * The other way you can win is if you get lucky with the prime factorization
+ * of your constant. The number 1,000,000,000, which we have to multiply
+ * by below, is a good example. One billion is 111011100110101100101000000000
+ * in binary. If you apply the bit-grouping trick, it doesn't buy you very
+ * much, because it's only a win for groups of three or more equal bits:
+ *
+ * 111011100110101100101000000000 = 1000000000000000000000000000000
+ * - 000100011001010011011000000000
+ *
+ * Thus, instead of the 13 shift/add pairs (26 operations) implied by the LHS,
+ * we have reduced this to 10 shift/add pairs (20 operations) on the RHS.
+ * This is better, but not great.
+ *
+ * However, we can factor 1,000,000,000 = 2^9 * 5^9 = 2^9 * 125 * 125 * 125,
+ * and multiply by each factor. Multiplication by 125 is particularly easy,
+ * since 128 is nearby: x * 125 = (x << 7) - x - x - x, which is just four
+ * operations. So, to multiply by 1,000,000,000, we perform three multipli-
+ * cations by 125, then << 9, a total of only 3 * 4 + 1 = 13 operations.
+ * This is the algorithm we actually use in both hrt2ts() and ts2hrt().
+ *
+ * Division is harder; there is no equivalent of the simple shift-add algorithm
+ * we used for multiplication. However, we can convert the division problem
+ * into a multiplication problem by pre-computing the binary representation
+ * of the reciprocal of the divisor. For the case of interest, we have
+ *
+ * 1 / 1,000,000,000 = 1.0001001011100000101111101000001B-30,
+ *
+ * to 32 bits of precision. (The notation B-30 means "* 2^-30", just like
+ * E-18 means "* 10^-18".)
+ *
+ * So, to compute x / 1,000,000,000, we just multiply x by the 32-bit
+ * integer 10001001011100000101111101000001, then normalize (shift) the
+ * result. This constant has several large bits runs, so the multiply
+ * is relatively cheap:
+ *
+ * 10001001011100000101111101000001 = 10001001100000000110000001000001
+ * - 00000000000100000000000100000000
+ *
+ * Again, you can just read the algorithm right off the bits:
+ *
+ * sec = hrt;
+ * sec += (hrt << 6);
+ * sec -= (hrt << 8);
+ * sec += (hrt << 13);
+ * sec += (hrt << 14);
+ * sec -= (hrt << 20);
+ * sec += (hrt << 23);
+ * sec += (hrt << 24);
+ * sec += (hrt << 27);
+ * sec += (hrt << 31);
+ * sec >>= (32 + 30);
+ *
+ * Voila! The only problem is, since hrt is 64 bits, we need to use 96-bit
+ * arithmetic to perform this calculation. That's a waste, because ultimately
+ * we only need the highest 32 bits of the result.
+ *
+ * The first thing we do is to realize that we don't need to use all of hrt
+ * in the calculation. The lowest 30 bits can contribute at most 1 to the
+ * quotient (2^30 / 1,000,000,000 = 1.07...), so we'll deal with them later.
+ * The highest 2 bits have to be zero, or hrt won't fit in a timestruc_t.
+ * Thus, the only bits of hrt that matter for division are bits 30..61.
+ * These 32 bits are just the lower-order word of (hrt >> 30). This brings
+ * us down from 96-bit math to 64-bit math, and our algorithm becomes:
+ *
+ * tmp = (uint32_t) (hrt >> 30);
+ * sec = tmp;
+ * sec += (tmp << 6);
+ * sec -= (tmp << 8);
+ * sec += (tmp << 13);
+ * sec += (tmp << 14);
+ * sec -= (tmp << 20);
+ * sec += (tmp << 23);
+ * sec += (tmp << 24);
+ * sec += (tmp << 27);
+ * sec += (tmp << 31);
+ * sec >>= 32;
+ *
+ * Next, we're going to reduce this 64-bit computation to a 32-bit
+ * computation. We begin by rewriting the above algorithm to use relative
+ * shifts instead of absolute shifts. That is, instead of computing
+ * tmp << 6, tmp << 8, tmp << 13, etc, we'll just shift incrementally:
+ * tmp <<= 6, tmp <<= 2 (== 8 - 6), tmp <<= 5 (== 13 - 8), etc:
+ *
+ * tmp = (uint32_t) (hrt >> 30);
+ * sec = tmp;
+ * tmp <<= 6; sec += tmp;
+ * tmp <<= 2; sec -= tmp;
+ * tmp <<= 5; sec += tmp;
+ * tmp <<= 1; sec += tmp;
+ * tmp <<= 6; sec -= tmp;
+ * tmp <<= 3; sec += tmp;
+ * tmp <<= 1; sec += tmp;
+ * tmp <<= 3; sec += tmp;
+ * tmp <<= 4; sec += tmp;
+ * sec >>= 32;
+ *
+ * Now for the final step. Instead of throwing away the low 32 bits at
+ * the end, we can throw them away as we go, only keeping the high 32 bits
+ * of the product at each step. So, for example, where we now have
+ *
+ * tmp <<= 6; sec = sec + tmp;
+ * we will instead have
+ * tmp <<= 6; sec = (sec + tmp) >> 6;
+ * which is equivalent to
+ * sec = (sec >> 6) + tmp;
+ *
+ * The final shift ("sec >>= 32") goes away.
+ *
+ * All we're really doing here is long multiplication, just like we learned in
+ * grade school, except that at each step, we only look at the leftmost 32
+ * columns. The cumulative error is, at most, the sum of all the bits we
+ * throw away, which is 2^-32 + 2^-31 + ... + 2^-2 + 2^-1 == 1 - 2^-32.
+ * Thus, the final result ("sec") is correct to +/- 1.
+ *
+ * It turns out to be important to keep "sec" positive at each step, because
+ * we don't want to have to explicitly extend the sign bit. Therefore,
+ * starting with the last line of code above, each line that would have read
+ * "sec = (sec >> n) - tmp" must be changed to "sec = tmp - (sec >> n)", and
+ * the operators (+ or -) in all previous lines must be toggled accordingly.
+ * Thus, we end up with:
+ *
+ * tmp = (uint32_t) (hrt >> 30);
+ * sec = tmp + (sec >> 6);
+ * sec = tmp - (tmp >> 2);
+ * sec = tmp - (sec >> 5);
+ * sec = tmp + (sec >> 1);
+ * sec = tmp - (sec >> 6);
+ * sec = tmp - (sec >> 3);
+ * sec = tmp + (sec >> 1);
+ * sec = tmp + (sec >> 3);
+ * sec = tmp + (sec >> 4);
+ *
+ * This yields a value for sec that is accurate to +1/-1, so we have two
+ * cases to deal with. The mysterious-looking "+ 7" in the code below biases
+ * the rounding toward zero, so that sec is always less than or equal to
+ * the correct value. With this modified code, sec is accurate to +0/-2, with
+ * the -2 case being very rare in practice. With this change, we only have to
+ * deal with one case (sec too small) in the cleanup code.
+ *
+ * The other modification we make is to delete the second line above
+ * ("sec = tmp + (sec >> 6);"), since it only has an effect when bit 31 is
+ * set, and the cleanup code can handle that rare case. This reduces the
+ * *guaranteed* accuracy of sec to +0/-3, but speeds up the common cases.
+ *
+ * Finally, we compute nsec = hrt - (sec * 1,000,000,000). nsec will always
+ * be positive (since sec is never too large), and will at most be equal to
+ * the error in sec (times 1,000,000,000) plus the low-order 30 bits of hrt.
+ * Thus, nsec < 3 * 1,000,000,000 + 2^30, which is less than 2^32, so we can
+ * safely assume that nsec fits in 32 bits. Consequently, when we compute
+ * sec * 1,000,000,000, we only need the low 32 bits, so we can just do 32-bit
+ * arithmetic and let the high-order bits fall off the end.
+ *
+ * Since nsec < 3 * 1,000,000,000 + 2^30 == 4,073,741,824, the cleanup loop:
+ *
+ * while (nsec >= NANOSEC) {
+ * nsec -= NANOSEC;
+ * sec++;
+ * }
+ *
+ * is guaranteed to complete in at most 4 iterations. In practice, the loop
+ * completes in 0 or 1 iteration over 95% of the time.
+ *
+ * On an SS2, this implementation of hrt2ts() takes 1.7 usec, versus about
+ * 35 usec for software division -- about 20 times faster.
+ */
+void
+hrt2ts(hrtime_t hrt, timestruc_t *tsp)
+{
+ uint32_t sec, nsec, tmp;
+
+ tmp = (uint32_t)(hrt >> 30);
+ sec = tmp - (tmp >> 2);
+ sec = tmp - (sec >> 5);
+ sec = tmp + (sec >> 1);
+ sec = tmp - (sec >> 6) + 7;
+ sec = tmp - (sec >> 3);
+ sec = tmp + (sec >> 1);
+ sec = tmp + (sec >> 3);
+ sec = tmp + (sec >> 4);
+ tmp = (sec << 7) - sec - sec - sec;
+ tmp = (tmp << 7) - tmp - tmp - tmp;
+ tmp = (tmp << 7) - tmp - tmp - tmp;
+ nsec = (uint32_t)hrt - (tmp << 9);
+ while (nsec >= NANOSEC) {
+ nsec -= NANOSEC;
+ sec++;
+ }
+ tsp->tv_sec = (time_t)sec;
+ tsp->tv_nsec = nsec;
+}
+
+/*
+ * Convert from timestruc_t to hrtime_t.
+ *
+ * The code below is equivalent to:
+ *
+ * hrt = tsp->tv_sec * NANOSEC + tsp->tv_nsec;
+ *
+ * but requires no integer multiply.
+ */
+hrtime_t
+ts2hrt(const timestruc_t *tsp)
+{
+ hrtime_t hrt;
+
+ hrt = tsp->tv_sec;
+ hrt = (hrt << 7) - hrt - hrt - hrt;
+ hrt = (hrt << 7) - hrt - hrt - hrt;
+ hrt = (hrt << 7) - hrt - hrt - hrt;
+ hrt = (hrt << 9) + tsp->tv_nsec;
+ return (hrt);
+}
+
+/*
+ * For the various 32-bit "compatibility" paths in the system.
+ */
+void
+hrt2ts32(hrtime_t hrt, timestruc32_t *ts32p)
+{
+ timestruc_t ts;
+
+ hrt2ts(hrt, &ts);
+ TIMESPEC_TO_TIMESPEC32(ts32p, &ts);
+}
+
+/*
+ * If this ever becomes performance critical (ha!), we can borrow the
+ * code from ts2hrt(), above, to multiply tv_sec by 1,000,000 and the
+ * straightforward (x << 10) - (x << 5) + (x << 3) to multiply tv_usec by
+ * 1,000. For now, we'll opt for readability (besides, the compiler does
+ * a passable job of optimizing constant multiplication into shifts and adds).
+ */
+hrtime_t
+tv2hrt(struct timeval *tvp)
+{
+ return ((hrtime_t)tvp->tv_sec * NANOSEC +
+ (hrtime_t)tvp->tv_usec * (NANOSEC / MICROSEC));
+}
+
+void
+hrt2tv(hrtime_t ts, struct timeval *tvp)
+{
+ tvp->tv_sec = ts / NANOSEC;
+ tvp->tv_usec = (ts % NANOSEC) / (NANOSEC / MICROSEC);
+}
+
+int
+nanosleep(timespec_t *rqtp, timespec_t *rmtp)
+{
+ timespec_t rqtime;
+ timespec_t rmtime;
+ timespec_t now;
+ int timecheck;
+ int ret = 1;
+ model_t datamodel = get_udatamodel();
+
+ if (datamodel == DATAMODEL_NATIVE) {
+ if (copyin(rqtp, &rqtime, sizeof (rqtime)))
+ return (set_errno(EFAULT));
+ } else {
+ timespec32_t rqtime32;
+
+ if (copyin(rqtp, &rqtime32, sizeof (rqtime32)))
+ return (set_errno(EFAULT));
+ TIMESPEC32_TO_TIMESPEC(&rqtime, &rqtime32);
+ }
+
+ if (rqtime.tv_sec < 0 || rqtime.tv_nsec < 0 ||
+ rqtime.tv_nsec >= NANOSEC)
+ return (set_errno(EINVAL));
+
+ if (timerspecisset(&rqtime)) {
+ timecheck = timechanged;
+ gethrestime(&now);
+ timespecadd(&rqtime, &now);
+ mutex_enter(&curthread->t_delay_lock);
+ while ((ret = cv_waituntil_sig(&curthread->t_delay_cv,
+ &curthread->t_delay_lock, &rqtime, timecheck)) > 0)
+ continue;
+ mutex_exit(&curthread->t_delay_lock);
+ }
+
+ if (rmtp) {
+ /*
+ * If cv_waituntil_sig() returned due to a signal, and
+ * there is time remaining, then set the time remaining.
+ * Else set time remaining to zero
+ */
+ rmtime.tv_sec = rmtime.tv_nsec = 0;
+ if (ret == 0) {
+ gethrestime(&now);
+ if ((now.tv_sec < rqtime.tv_sec) ||
+ ((now.tv_sec == rqtime.tv_sec) &&
+ (now.tv_nsec < rqtime.tv_nsec))) {
+ rmtime = rqtime;
+ timespecsub(&rmtime, &now);
+ }
+ }
+
+ if (datamodel == DATAMODEL_NATIVE) {
+ if (copyout(&rmtime, rmtp, sizeof (rmtime)))
+ return (set_errno(EFAULT));
+ } else {
+ timespec32_t rmtime32;
+
+ TIMESPEC_TO_TIMESPEC32(&rmtime32, &rmtime);
+ if (copyout(&rmtime32, rmtp, sizeof (rmtime32)))
+ return (set_errno(EFAULT));
+ }
+ }
+
+ if (ret == 0)
+ return (set_errno(EINTR));
+ return (0);
+}
+
+/*
+ * Routines to convert standard UNIX time (seconds since Jan 1, 1970)
+ * into year/month/day/hour/minute/second format, and back again.
+ * Note: these routines require tod_lock held to protect cached state.
+ */
+static int days_thru_month[64] = {
+ 0, 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366, 0, 0,
+ 0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365, 0, 0,
+ 0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365, 0, 0,
+ 0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365, 0, 0,
+};
+
+todinfo_t saved_tod;
+int saved_utc = -60;
+
+todinfo_t
+utc_to_tod(time_t utc)
+{
+ long dse, day, month, year;
+ todinfo_t tod;
+
+ ASSERT(MUTEX_HELD(&tod_lock));
+
+ if (utc < 0) /* should never happen */
+ utc = 0;
+
+ saved_tod.tod_sec += utc - saved_utc;
+ saved_utc = utc;
+ if (saved_tod.tod_sec >= 0 && saved_tod.tod_sec < 60)
+ return (saved_tod); /* only the seconds changed */
+
+ dse = utc / 86400; /* days since epoch */
+
+ tod.tod_sec = utc % 60;
+ tod.tod_min = (utc % 3600) / 60;
+ tod.tod_hour = (utc % 86400) / 3600;
+ tod.tod_dow = (dse + 4) % 7 + 1; /* epoch was a Thursday */
+
+ year = dse / 365 + 72; /* first guess -- always a bit too large */
+ do {
+ year--;
+ day = dse - 365 * (year - 70) - ((year - 69) >> 2);
+ } while (day < 0);
+
+ month = ((year & 3) << 4) + 1;
+ while (day >= days_thru_month[month + 1])
+ month++;
+
+ tod.tod_day = day - days_thru_month[month] + 1;
+ tod.tod_month = month & 15;
+ tod.tod_year = year;
+
+ saved_tod = tod;
+ return (tod);
+}
+
+time_t
+tod_to_utc(todinfo_t tod)
+{
+ time_t utc;
+ int year = tod.tod_year;
+ int month = tod.tod_month + ((year & 3) << 4);
+#ifdef DEBUG
+ /* only warn once, not each time called */
+ static int year_warn = 1;
+ static int month_warn = 1;
+ static int day_warn = 1;
+ static int hour_warn = 1;
+ static int min_warn = 1;
+ static int sec_warn = 1;
+ int days_diff = days_thru_month[month + 1] - days_thru_month[month];
+#endif
+
+ ASSERT(MUTEX_HELD(&tod_lock));
+
+#ifdef DEBUG
+ if (year_warn && (year < 70 || year > 8029)) {
+ cmn_err(CE_WARN,
+ "The hardware real-time clock appears to have the "
+ "wrong years value %d -- time needs to be reset\n",
+ year);
+ year_warn = 0;
+ }
+
+ if (month_warn && (tod.tod_month < 1 || tod.tod_month > 12)) {
+ cmn_err(CE_WARN,
+ "The hardware real-time clock appears to have the "
+ "wrong months value %d -- time needs to be reset\n",
+ tod.tod_month);
+ month_warn = 0;
+ }
+
+ if (day_warn && (tod.tod_day < 1 || tod.tod_day > days_diff)) {
+ cmn_err(CE_WARN,
+ "The hardware real-time clock appears to have the "
+ "wrong days value %d -- time needs to be reset\n",
+ tod.tod_day);
+ day_warn = 0;
+ }
+
+ if (hour_warn && (tod.tod_hour < 0 || tod.tod_hour > 23)) {
+ cmn_err(CE_WARN,
+ "The hardware real-time clock appears to have the "
+ "wrong hours value %d -- time needs to be reset\n",
+ tod.tod_hour);
+ hour_warn = 0;
+ }
+
+ if (min_warn && (tod.tod_min < 0 || tod.tod_min > 59)) {
+ cmn_err(CE_WARN,
+ "The hardware real-time clock appears to have the "
+ "wrong minutes value %d -- time needs to be reset\n",
+ tod.tod_min);
+ min_warn = 0;
+ }
+
+ if (sec_warn && (tod.tod_sec < 0 || tod.tod_sec > 59)) {
+ cmn_err(CE_WARN,
+ "The hardware real-time clock appears to have the "
+ "wrong seconds value %d -- time needs to be reset\n",
+ tod.tod_sec);
+ sec_warn = 0;
+ }
+#endif
+
+ utc = (year - 70); /* next 3 lines: utc = 365y + y/4 */
+ utc += (utc << 3) + (utc << 6);
+ utc += (utc << 2) + ((year - 69) >> 2);
+ utc += days_thru_month[month] + tod.tod_day - 1;
+ utc = (utc << 3) + (utc << 4) + tod.tod_hour; /* 24 * day + hour */
+ utc = (utc << 6) - (utc << 2) + tod.tod_min; /* 60 * hour + min */
+ utc = (utc << 6) - (utc << 2) + tod.tod_sec; /* 60 * min + sec */
+
+ return (utc);
+}